...
 
Commits (62)
......@@ -42,4 +42,4 @@ Tools/VadereAnalysisTools/VadereAnalysisTool/dist/
# Vadere output directories
**/output/
**/legacy/
**/*_private/
**/*_private/
\ No newline at end of file
Das Problem von Bene bzgl. des Aufstauen der Personen am Anfang des Korridors habe ich durch das hinzufügen von Wänden vermieden.
Mit der Standard Konfiguration des OSM:
Verteilungen bei Schrittweite 1/16 - 1 Lauf
230 231 232 frames
0 0.0 0.000000 1.000000 64
1 0.0 0.142857 0.857143 1
2 0.0 0.166667 0.833333 5
3 0.0 0.200000 0.800000 27
4 0.0 0.222222 0.777778 1
5 0.0 0.250000 0.750000 39
6 0.0 0.285714 0.714286 11
7 0.0 0.333333 0.666667 69
8 0.0 0.375000 0.625000 3
9 0.0 0.400000 0.600000 60
10 0.0 0.428571 0.571429 9
11 0.0 0.500000 0.500000 131
12 0.0 0.571429 0.428571 13
13 0.0 0.600000 0.400000 56
14 0.0 0.625000 0.375000 3
15 0.0 0.666667 0.333333 84
16 0.0 0.714286 0.285714 5
17 0.0 0.750000 0.250000 48
18 0.0 0.800000 0.200000 12
19 0.0 0.833333 0.166667 7
20 0.0 0.857143 0.142857 1
21 0.0 1.000000 0.000000 85
1. Die Trajektorien sehen sehr ähnlich zu den Orginalen aus.
Bei vielen Personen, d.h. hoher Dichte, bleiben auch die simulierten Personen auf dem Weg zum Ziel auf der Seite von der sie kahmen.
2. Jedoch laufen die Szenarien wesentlich schneller ab.
Bene Konfiguration:
Verteilungen bei Schrittweite 1/16 - 3 Läufe
230 231 232 frames
0 0.0 0.000000 1.000000 1537
1 0.0 0.142857 0.857143 2
2 0.0 0.166667 0.833333 75
3 0.0 0.200000 0.800000 271
4 0.0 0.250000 0.750000 657
5 0.0 0.285714 0.714286 85
6 0.0 0.333333 0.666667 1691
7 0.0 0.375000 0.625000 49
8 0.0 0.400000 0.600000 944
9 0.0 0.428571 0.571429 263
10 0.0 0.444444 0.555556 10
11 0.0 0.500000 0.500000 4017
12 0.0 0.555556 0.444444 10
13 0.0 0.571429 0.428571 214
14 0.0 0.600000 0.400000 967
15 0.0 0.625000 0.375000 32
16 0.0 0.666667 0.333333 1810
17 0.0 0.714286 0.285714 69
18 0.0 0.750000 0.250000 750
19 0.0 0.800000 0.200000 216
20 0.0 0.833333 0.166667 71
21 0.0 0.857143 0.142857 6
22 0.0 0.875000 0.125000 3
23 0.0 1.000000 0.000000 1814
1. sehr viele doppelte bilder
2. Szenarien sind nochmal schneller
"OBSTACLES" -> obstacleDensityWeight zwischen 0.0 und max 1.0, am besten 0.1
zhang 2011
dichte bilder wie im paper am Ende
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
This source diff could not be displayed because it is too large. You can view the blob instead.
import os
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from tqdm import tqdm
from multiprocessing import Pool
def processRow(args):
i, row, path = args
n_targets = 3
row = np.array(row)
targets = row[-n_targets:]
data = row[:-n_targets]
name = 'density-{0}-{1:.2f}-{2:.2f}-{3:.2f}.png'.format(i, targets[0], targets[1], targets[2])
filepath = os.path.join(path, name)
if os.path.exists(filepath):
#print("Skipping", filepath, 'since it already exits!')
return False
image = data.reshape((10, 23))
fig, ax = plt.subplots()
ax.imshow(image, interpolation='Nearest')
ax.axis('off')
fig.gca().set_axis_off()
fig.subplots_adjust(top = 1, bottom = 0, right = 1, left = 0, hspace = 0, wspace = 0)
ax.margins(0,0)
fig.gca().xaxis.set_major_locator(plt.NullLocator())
fig.gca().yaxis.set_major_locator(plt.NullLocator())
fig.savefig(filepath, bbox_inches='tight', pad_inches=0)
return True
def processFile(file, base_directory, image_directory, number_of_cores=1, _filter=lambda x: x):
path = os.path.join(image_directory, file[:-4])
try:
os.mkdir(path)
except Exception:
print("Path", path, "already exists")
for chunk in pd.read_csv(open(os.path.join(base_directory, file), 'r'), sep=';', header=None, chunksize=500):
chunk = _filter(chunk)
with Pool(processes=number_of_cores) as p:
with tqdm(total=len(chunk), desc='process chunk', leave=False) as pbar:
rows = list(map(lambda r: (*r, path), list(chunk.iterrows())))
for i, result in enumerate(p.imap_unordered(processRow, rows)):
pbar.update()
print('Done', file)
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
import os
import matplotlib.pyplot as plt
import numpy as np
import scipy.spatial as spatial
import cProfile
from tqdm import tqdm
from multiprocessing import Pool
from collections import defaultdict
from shapely.geometry import Polygon, MultiPolygon, Point
from functools import reduce
def extract_pedestrians_in_area(df, area):
""" Extract pedestrian inside the given area
"""
is_x = (df.x.values >= area[0]) & (df.x.values <= (area[0] + area[2]))
is_y = (df.y.values >= area[1]) & (df.y.values <= (area[1] + area[3]))
return df[is_x & is_y]
# Taken from
# https://stackoverflow.com/questions/23901943/voronoi-compute-exact-boundaries-of-every-region
def voronoi_polygons(voronoi, diameter):
"""Generate shapely.geometry.Polygon objects corresponding to the
regions of a scipy.spatial.Voronoi object, in the order of the
input points. The polygons for the infinite regions are large
enough that all points within a distance 'diameter' of a Voronoi
vertex are contained in one of the infinite polygons.
"""
centroid = voronoi.points.mean(axis=0)
# Mapping from (input point index, Voronoi point index) to list of
# unit vectors in the directions of the infinite ridges starting
# at the Voronoi point and neighbouring the input point.
ridge_direction = defaultdict(list)
for (p, q), rv in zip(voronoi.ridge_points, voronoi.ridge_vertices):
u, v = sorted(rv)
if u == -1:
# Infinite ridge starting at ridge point with index v,
# equidistant from input points with indexes p and q.
t = voronoi.points[q] - voronoi.points[p] # tangent
n = np.array([-t[1], t[0]]) / np.linalg.norm(t) # normal
midpoint = voronoi.points[[p, q]].mean(axis=0)
direction = np.sign(np.dot(midpoint - centroid, n)) * n
ridge_direction[p, v].append(direction)
ridge_direction[q, v].append(direction)
for i, r in enumerate(voronoi.point_region):
region = voronoi.regions[r]
if -1 not in region:
# Finite region.
yield Polygon(voronoi.vertices[region]), voronoi.points[i]
continue
# Infinite region.
inf = region.index(-1) # Index of vertex at infinity.
j = region[(inf - 1) % len(region)] # Index of previous vertex.
k = region[(inf + 1) % len(region)] # Index of next vertex.
if j == k:
# Region has one Voronoi vertex with two ridges.
dir_j, dir_k = ridge_direction[i, j]
else:
# Region has two Voronoi vertices, each with one ridge.
dir_j, = ridge_direction[i, j]
dir_k, = ridge_direction[i, k]
# Length of ridges needed for the extra edge to lie at least
# 'diameter' away from all Voronoi vertices.
length = 2 * diameter / np.linalg.norm(dir_j + dir_k)
# Polygon consists of finite part plus an extra edge.
finite_part = voronoi.vertices[region[inf + 1:] + region[:inf]]
extra_edge = [voronoi.vertices[j] + dir_j * length,
voronoi.vertices[k] + dir_k * length]
yield Polygon(np.concatenate((finite_part, extra_edge))), voronoi.points[i]
def process_timestep(args):
""" Process given timestep
"""
_, trajectories, context = args
# get pedestrians inside area
area = context.get('area')
pedestrians = extract_pedestrians_in_area(trajectories, area)
hasVelocity = 'velocity' in pedestrians.columns
# skip timesteps with less than 4 pedestrians
if len(pedestrians) < 4:
return None
boundary = context.get('boundary')
positions = list(zip(pedestrians.x.values, pedestrians.y.values))
# generate voronoi diagram
voronoi = spatial.Voronoi(positions)
xdim = context.get('xdim')
ydim = context.get('ydim')
coordinates = context.get('coordinates')
D = np.zeros((ydim * xdim, 1))
V = np.zeros((ydim * xdim, 1))
diameter = context.get('diameter')
polygons = []
# convert infinite voronoi cell to finite
# and cut out area inside boundaries
for p, pos in voronoi_polygons(voronoi, diameter):
intersection = p.intersection(boundary)
if type(intersection) == MultiPolygon:
for i in range(len(intersection)):
polygons.append((intersection[i], 1 / intersection[i].area, pos))
if type(intersection) == Polygon:
polygons.append((intersection, 1 / intersection.area, pos))
number_of_polygons = len(polygons)
interval = range(int(number_of_polygons / 2) + 2)
for index, point in coordinates:
p = Point(point)
# skip points outside of scenario
if not boundary.contains(p):
continue
# search for polygon containing current grid point
for i in interval:
if polygons[i][0].contains(p):
D[index] = polygons[i][1]
if hasVelocity:
V[index] = pedestrians[(pedestrians.x.values == polygons[i][2][0]) & (pedestrians.y.values == polygons[i][2][1])].velocity.values[0]
break
end = number_of_polygons - 1 - i
if polygons[end][0].contains(p):
D[index] = polygons[end][1]
if hasVelocity:
V[index] = pedestrians[(pedestrians.x.values == polygons[end][2][0]) & (pedestrians.y.values == polygons[end][2][1])].velocity.values[0]
break
return D, V
def sum(x1, x2):
return x1 + x2
def run(df, context, nprocesses=3):
""" Process experiment
"""
# create coordinate grid
area = context.get('area')
resolution = context.get('resolution', 0.1)
x = np.arange(area[0], area[0] + area[2], resolution)
y = np.arange(area[1], area[1] + area[3], resolution)
xdim = len(x)
ydim = len(y)
xx, yy = np.meshgrid(x, y)
coordinates = list(zip(xx.ravel(), yy.ravel()))
context['xdim'] = xdim
context['ydim'] = ydim
context['coordinates'] = enumerate(coordinates)
# calculate radius of voronoi calculation
boundary = context.get('boundary')
bb = np.array(boundary.boundary.coords)
diameter = np.linalg.norm(bb.ptp(axis=0))
context['diameter'] = diameter
skip = context.get('skip', 1)
# group trajectories by timestep
grouped = list(df.groupby('timeStep'))
# take every n-th frame
timesteps = list(grouped)[::skip]
# extend data with context
timesteps = list(map(lambda a: (*a, context), timesteps))
total = len(timesteps)
densities = []
velocities = []
with Pool(processes=nprocesses) as p:
with tqdm(total=total, desc=context.get('name')) as pbar:
# process timesteps in parallel
for _, result in enumerate(p.imap_unordered(process_timestep, timesteps)):
pbar.update()
# append heatmaps for timestep to collection
if result is not None:
D, V = result
densities.append(D)
velocities.append(V)
# sum up all heatmaps and normalize
density = reduce(sum, densities) / len(densities)
velocity = reduce(sum, velocities) / len(velocities)
# flip and reshape data
return np.flipud(density.reshape((ydim, xdim))), np.flipud(velocity.reshape((ydim, xdim)))
This diff is collapsed.
from density.gaussian import *
from density.pedestrian import *
\ No newline at end of file
import math
import numpy as np
import utils.writer.density as writer
INDEX_TIME_STEP = 0
INDEX_PED_ID = 1
INDEX_POS_X = 2
INDEX_POS_Y = 3
INDEX_TARGET_ID = 4
# ----------------------------------------------------------------------------------------------------------------------
# Pedestrian count density
# ----------------------------------------------------------------------------------------------------------------------
# counts pedestrian density in units
# @param data chronologically sorted pedestrian data by time step
# @param size of measurement_field
def calculate_pedestrian_density(data, observation_area, resolution, output_root_directory, output_file_name, count):
size_matrix = (int(observation_area[2] / resolution), int(observation_area[3] / resolution))
tmp = np.array(data[4])
for timestep in data: # iterate over pedestrians
matrix = np.zeros(size_matrix) # new matrix for new time step
for ped in timestep:
add_pedestrian(ped, matrix, observation_area, resolution)
writer.write_matrix_to_file(matrix, output_root_directory, output_file_name, timestep[0][0], count) # write matrix directly to file
# add density of ped depending on current possition
def add_pedestrian(ped, matrix, observation_area, resolution):
x_pos = np.round((ped[INDEX_POS_X] - observation_area[0])/ resolution, 1) # round to 1 numb after decimal point
y_pos = np.round((ped[INDEX_POS_Y] - observation_area[1])/ resolution, 1)
x_pos_frac, x_pos_int = math.modf(x_pos)
y_pos_frac, y_pos_int = math.modf(y_pos)
x_pos_int = int(x_pos_int)
y_pos_int = int(y_pos_int)
h = matrix.shape[1]
pedVal = 1
positions = []
if x_pos_frac != 0 and y_pos_frac != 0: # ped is only standing with in one field
positions.append([x_pos_int, y_pos_int])
else: # border cases
positions.append([x_pos_int, y_pos_int]) # at least current must added
if x_pos_frac == 0: # ped standing between two fields on x-axis
if y_pos_frac == 0: # ped standing between two fields on y-axis
# check if coordinates are valid
if x_pos_int > 0:
positions.append([x_pos_int - 1, y_pos_int]) # field to the left of ped
if y_pos_int > 0:
positions.append([x_pos_int, y_pos_int - 1]) # field below ped
positions.append([x_pos_int - 1, y_pos_int - 1]) # field diagonal from ped
else: # only between two field on x-axis
if x_pos_int > 0:
positions.append([x_pos_int - 1, y_pos_int])
else: # only between two field on y-axis
if y_pos_int > 0:
positions.append([x_pos_int, y_pos_int - 1])
for pos in positions:
matrix[h - pos[1] - 1][pos[0]] = pedVal / len(positions)
from filter.binarization import *
\ No newline at end of file
import cv2
import numpy as np
from matplotlib import pyplot as plt
#add picture in working directory or give path
img = cv2.imread('C:\Users\julia\OneDrive\Bilder\Saved Pictures\pexels-photo-266174.jpeg',0)
ret,thresh1 = cv2.threshold(img,127,255,cv2.THRESH_BINARY)
ret,thresh2 = cv2.threshold(img,127,255,cv2.THRESH_BINARY_INV)
ret,thresh3 = cv2.threshold(img,127,255,cv2.THRESH_TRUNC)
ret,thresh4 = cv2.threshold(img,127,255,cv2.THRESH_TOZERO)
ret,thresh5 = cv2.threshold(img,127,255,cv2.THRESH_TOZERO_INV)
titles = ['Original Image','BINARY','BINARY_INV','TRUNC','TOZERO','TOZERO_INV']
images = [img, thresh1, thresh2, thresh3, thresh4, thresh5]
for i in np.arange(6):
plt.subplot(2,3,i+1),plt.imshow(images[i],'gray')
plt.title(titles[i])
plt.xticks([]),plt.yticks([])
plt.show();
\ No newline at end of file
import time
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from rf.error_calculation import calc_and_print_errors
def load_data(directory, log_file):
imported_files = os.listdir(directory)
startTime = time.time()
# import data
data_frame_density = pd.DataFrame()
for ifile in range(0, len(imported_files)):
file = imported_files[ifile]
if file.endswith(".csv"):
# read in data
tmp_density = pd.read_csv(directory + file, header=None, sep=';')
data_frame_density = data_frame_density.append(tmp_density)
if np.mod(ifile, 10) == 0:
print("Reading files: ", round(ifile / len(imported_files) * 100), "%")
# split data in samples and response
y_density = data_frame_density[data_frame_density.columns[-3:]]
x_density = data_frame_density.iloc[:, 0:-3]
return x_density, y_density
def apply_heuristic(directory, log_file, test_size_percent,obs_area, resolution):
x_density, y_density = load_data(directory, log_file)
# split data into training and test set of data
x_train_density, x_test_density, y_train_density, y_test_density = train_test_split(x_density, y_density,
random_state=1,
test_size=test_size_percent)
# apply heuristic to each matrix in the test set
x_test_density_np = x_test_density._get_values
y_test_density_np = y_test_density._get_values
n_heatmaps = x_test_density_np.shape[0]
dim_heatmaps_x = int(obs_area[2]/resolution)
dim_heatmaps_y = int(obs_area[3]/resolution)
error_euklid_all = -1*np.ones([n_heatmaps,1])
estimate3_all = -1*np.ones([n_heatmaps,3])
for i in range(0, n_heatmaps-1):
snapshot = np.reshape(x_test_density_np[i], [dim_heatmaps_y, dim_heatmaps_x])
# # Method 1
# part_left = int(np.round(dim_heatmaps_x/3))
# part_straight = int(np.round(dim_heatmaps_x/3))
# part_right = int(dim_heatmaps_x - part_straight-part_left)
#
# vals_left = snapshot[:,0:part_left]
# vals_straight = snapshot[:,part_left:part_left+part_straight]
# vals_right = snapshot[:,part_left+part_straight:dim_heatmaps_x]
#
# total = np.sum(vals_left)*part_left + np.sum(vals_straight)*part_straight + np.sum(vals_right)*part_right
# part_left = np.sum(vals_left)*part_left / total
# part_straight = np.sum(vals_straight)*part_straight / total
# part_right = np.sum(vals_right)*part_right / total
#
# print('First estimate: [%d %d %d]' % (part_left*100, part_straight*100, part_right*100))
#
# # Method 2
#
# part_left2 = np.sum(vals_left) / np.sum(snapshot)
# part_straight2 = np.sum(vals_straight) / np.sum(snapshot)
# part_right2 = np.sum(vals_right) / np.sum(snapshot)
#
# print('Second estimate: [%d %d %d]' % (part_left2*100, part_straight2*100, part_right2*100))
# Method #3
if np.mod(dim_heatmaps_x,3) == 0: # multiple of 3
part_left = int(dim_heatmaps_x / 3)
part_straight = part_left
part_right = part_left
vals_left = snapshot[:, 0:part_left]
vals_straight = snapshot[:, part_left:2*part_left]
vals_right = snapshot[:, 2*part_left:dim_heatmaps_x]
total = np.sum(vals_left) * part_left + np.sum(vals_straight) * part_straight + np.sum(
vals_right) * part_right
part_left = np.sum(vals_left) * part_left / total
part_straight = np.sum(vals_straight) * part_straight / total
part_right = np.sum(vals_right) * part_right / total
else:
idx_left_straight = int(np.floor(dim_heatmaps_x / 3))
idx_straight_right = int(dim_heatmaps_x - np.ceil(dim_heatmaps_x / 3))
# whole parts
vals_left3 = np.sum(snapshot[:, 0:idx_left_straight ])
vals_straight3 = np.sum(snapshot[:, idx_left_straight+1:idx_straight_right])
vals_right3 = np.sum(snapshot[:, idx_straight_right +1 : dim_heatmaps_x])
factor = np.mod(dim_heatmaps_x,3)/3
vals_left3 = vals_left3 + np.sum(snapshot[:,idx_left_straight]) * factor
vals_straight3 = vals_straight3 + np.sum(snapshot[:,idx_left_straight]) * (1-factor) + np.sum(snapshot[:,idx_straight_right]) * (1-factor)
vals_right3 = vals_right3 + np.sum(snapshot[:,idx_straight_right]) * factor
# Make sure Method 3 works
assert (vals_left3 + vals_straight3 + vals_right3 - np.sum(snapshot) < 1e-10)
estimate3 = np.array([vals_left3/np.sum(snapshot), vals_straight3/np.sum(snapshot), vals_right3/np.sum(snapshot)])
# Save results
error_euklid = np.sqrt(np.sum(np.square(y_test_density_np[i] - estimate3)))
error_euklid_all[i,:] = error_euklid
estimate3_all[i,:] = estimate3
# Error in each heatmap
# print(" ")
# print('Actual distribution: [%.4f %.4f %.4f]' % (y_test_density_np[i][0], y_test_density_np[i][1], y_test_density_np[i][2]))
# print('Estimate: [%.4f %.4f %.4f]' % ( estimate3[0], estimate3[1], estimate3[2] ))
# print(" ")
# print(" * RESULTS * \n")
# print('Mean Euclidean Error: %.4f' % error_euklid)
# print('Mean Euclidean Error: %.2f%%' % (error_euklid / np.sqrt(2) * 100))
# evaluate all
print(" * HEURISTIC RESULTS * ")
print(" (Mean over all test snapshots)")
log_file.write("\n * HEURISTIC RESULTS * \n")
calc_and_print_errors(y_test_density_np, estimate3_all, log_file)
'''
# Euclidean error
euklid_error2 = np.sqrt(np.sum(np.square(estimate3_all - y_test_density_np), 1))
euklid_error_mean2 = np.mean(euklid_error2)
euklid_error_mean_percent2 = euklid_error_mean2 / np.sqrt(2) * 100
# RMSE (per Direction)
rmse_per_dir = np.sqrt(np.mean(np.square(estimate3_all - y_test_density_np),axis=0))
## Print results
print(" ")
print("Mean Euclidean Error (Test data): %f" % euklid_error_mean2)
print("Mean Euclidean Error (Test data): %.2f%%" % euklid_error_mean_percent2)
print(" ")
print("RMSE (Test data) Per Direction: [%.4f %.4f %.4f]" % (rmse_per_dir[0], rmse_per_dir[1], rmse_per_dir[2]))
print("RMSE (Test data) Per Direction: [%.2f%% %.2f%% %.2f%%]" % (rmse_per_dir[0]*100, rmse_per_dir[1]*100, rmse_per_dir[2]*100))
print(" ")
print("Mean of Euclidean Errors: %f " % np.mean(error_euklid_all))
print("Mean of Euclidean Errors: %.2f%%" % (np.mean(error_euklid_all)/np.sqrt(2)*100))
print(" ")
print("Std of Euclidean Errors: %f " % np.std(error_euklid_all))
print("Std of Euclidean Errors: %.2f%%" % (np.std(error_euklid_all)/np.sqrt(2)*100))
# Write results to log file
log_file.write("\n * HEURISTIC RESULTS * \n")
log_file.write("RMSE (Test data) Per Direction: [%f %f %f]\n" % (rmse_per_dir[0], rmse_per_dir[1], rmse_per_dir[2]))
log_file.write("RMSE (Test data) Per Direction: [%.2f%% %.2f%% %.2f%%] \n\n" % (rmse_per_dir[0]*100, rmse_per_dir[1]*100, rmse_per_dir[2]*100))
log_file.write("Mean of Euclidean Errors: %f \n" % np.mean(error_euklid_all))
log_file.write("Mean of Euclidean Errors: %.2f%%\n" % (np.mean(error_euklid_all)/np.sqrt(2)*100))
log_file.write("Std of Euclidean Errors: %f \n" % np.std(error_euklid_all))
log_file.write("Std of Euclidean Errors: %.2f%%\n" % (np.std(error_euklid_all)/np.sqrt(2)*100))
print(" ")
'''
from __future__ import print_function
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
batch_size = 128
num_classes = 10
epochs = 12
# input image dimensions
img_rows, img_cols = 28, 28
# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()
if K.image_data_format() == 'channels_first':
x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
input_shape = (1, img_rows, img_cols)
else:
x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
input_shape = (img_rows, img_cols, 1)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
activation='relu',
input_shape=input_shape))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))
model.compile(loss=keras.losses.categorical_crossentropy,
optimizer=keras.optimizers.Adadelta(),
metrics=['accuracy'])
model.fit(x_train, y_train,
batch_size=batch_size,
epochs=epochs,
verbose=1,
validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
\ No newline at end of file
import numpy as np
def calc_and_print_errors_rf(y_test_density, y_predicted_normiert, log_file, score_training, score_test, score_oob):
log_file.write(
"Accuracy on training set (score): [%f %f %f]\n" % (score_training[0], score_training[1], score_training[2]))
log_file.write("Accuracy on test set (score): [%f %f %f] \n" % (score_test[0], score_test[1], score_test[2]))
log_file.write("Out of bag error (score): [%f %f %f] \n\n" % (score_oob[0], score_oob[1], score_oob[2]))
calc_and_print_errors(y_test_density, y_predicted_normiert, log_file)
def calc_and_print_errors(y_test_density, y_predicted_normiert, log_file):
### Calc error of prediction
# print importance of features
# print("Importance of features")
# print(rf_density_regressor.feature_importances_)
# Euclidean error over all forests and samples
euklid_error = np.sqrt(np.sum(np.square(y_predicted_normiert - y_test_density), 1))
euklid_error_mean = np.mean(euklid_error)
euklid_error_std = np.std(euklid_error)
euklid_error_mean_percent = euklid_error_mean / np.sqrt(2) * 100
# Daniel: Mean absolute error
mean_abs_error = np.mean(np.abs(y_predicted_normiert - y_test_density), axis=0)
mean_abs_error_percent = mean_abs_error * 100
mean_error = np.mean(y_predicted_normiert - y_test_density, axis=0)
# RMSE (per Tree)
rmse_per_tree = np.sqrt(np.mean(np.square(y_predicted_normiert - y_test_density), axis=0))
### Print to console
if len(rmse_per_tree) == 3:
print("RMSE (per direction): [%f %f %f]" % (rmse_per_tree[0], rmse_per_tree[1], rmse_per_tree[2]))
print("RMSE (per direction): [%.2f%% %.2f%% %.2f%%]\n" % (rmse_per_tree[0] * 100, rmse_per_tree[1] * 100, rmse_per_tree[2] * 100))
elif len(rmse_per_tree) == 2:
print("RMSE (per direction): [%f %f]" % (rmse_per_tree[0], rmse_per_tree[1]))
print("RMSE (per direction): [%.2f%% %.2f%%]\n" % (rmse_per_tree[0] * 100, rmse_per_tree[1] * 100))
print("Mean Euclidean Error: %f" % euklid_error_mean)
print("Mean Euclidean Error: %.2f%%" % euklid_error_mean_percent)
print("Std Euclidean Error: %f " % euklid_error_std)
print("Std Euclidean Error: %.2f%% \n" % (euklid_error_std / np.sqrt(2) * 100))
if len(rmse_per_tree) == 3:
print("Mean Absolute Error (per direction): [%f %f %f] " % (mean_abs_error[0], mean_abs_error[1], mean_abs_error[2]))
print("Mean Absolute Error (per direction): [%.2f%% %.2f%% %.2f%%]" % (mean_abs_error_percent[0], mean_abs_error_percent[1], mean_abs_error_percent[2]))
print("Mean Error (per direction): [%f %f %f]" % (mean_error[0], mean_error[1], mean_error[2]))
elif len(rmse_per_tree) == 2:
print("Mean Absolute Error (per direction): [%f %f] " % (mean_abs_error[0], mean_abs_error[1]))
print("Mean Absolute Error (per direction): [%.2f%% %.2f%%]" % (mean_abs_error_percent[0], mean_abs_error_percent[1]))
print("Mean Error (per direction): [%f %f]" % (mean_error[0], mean_error[1]))
###### Write results to file
log_file.write(" * Errors on test data set: \n\n")
if len(rmse_per_tree) == 3:
log_file.write("RMSE (per direction): [%f %f %f] \n" % (rmse_per_tree[0], rmse_per_tree[1], rmse_per_tree[2]))
log_file.write("RMSE (per direction): [%.2f%% %.2f%% %.2f%%] \n\n" % (rmse_per_tree[0] * 100, rmse_per_tree[1] * 100, rmse_per_tree[2] * 100))
log_file.write("Mean Euclidean Error: %f \n" % euklid_error_mean)
log_file.write("Mean Euclidean Error: %.2f%% \n" % euklid_error_mean_percent)
log_file.write("Std Euclidean Error: %f \n" % euklid_error_std)
log_file.write("Std Euclidean Error: %.2f%% \n\n" % (euklid_error_std / np.sqrt(2) * 100))
log_file.write("Mean Absolute Error (per forest): [%f %f %f] \n" % (mean_abs_error[0], mean_abs_error[1], mean_abs_error[2]))
log_file.write("Mean Absolute Error (per direction): [%.2f%% %.2f%% %.2f%%] \n" % (mean_abs_error_percent[0], mean_abs_error_percent[1], mean_abs_error_percent[2]))
log_file.write("Mean Error (per direction): [%f %f %f] \n" % (mean_error[0], mean_error[1], mean_error[2]))
elif len(rmse_per_tree) == 2:
log_file.write("RMSE (per direction): [%f %f] \n" % (rmse_per_tree[0], rmse_per_tree[1]))
log_file.write("RMSE (per direction): [%.2f%% %.2f%%] \n\n" % (rmse_per_tree[0] * 100, rmse_per_tree[1] * 100))
log_file.write("Mean Euclidean Error: %f \n" % euklid_error_mean)
log_file.write("Mean Euclidean Error: %.2f%% \n" % euklid_error_mean_percent)
log_file.write("Std Euclidean Error: %f \n" % euklid_error_std)
log_file.write("Std Euclidean Error: %.2f%% \n\n" % (euklid_error_std / np.sqrt(2) * 100))
log_file.write("Mean Absolute Error (per forest): [%f %f] \n" % (mean_abs_error[0], mean_abs_error[1]))
log_file.write("Mean Absolute Error (per direction): [%.2f%% %.2f%%] \n" % (mean_abs_error_percent[0], mean_abs_error_percent[1]))
log_file.write("Mean Error (per direction): [%f %f] \n" % (mean_error[0], mean_error[1]))
log_file.flush()
# -*- coding: utf-8 -*-
"""
Created on Sun Oct 22 13:56:09 2017
@author: Tim Lauster
"""
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.feature_selection import SelectFromModel
from matplotlib import pyplot as plt
#from sklearn.model_selection import cross_val_score
import pandas as pd
import numpy as np
import time
import os
import re
import mglearn
from sklearn.tree import export_graphviz
############# Parameters
def randomForest(test_size_percent, use_cores, directory, numberOfTrees, treeDepth):
print("*** SINGLE FOREST ***")
# Some useful length measurements
imported_files = os.listdir(directory)
startTime = time.time()
# import data
data_frame_density = pd.DataFrame()
for ifile in range(0, len(imported_files)):
file = imported_files[ifile]
if file.endswith(".csv"):
# read in data
tmp_density = pd.read_csv(directory + file, header=None, sep=';')
data_frame_density = data_frame_density.append(tmp_density)
if np.mod(ifile, 10) == 0:
print("Reading files: ", round(ifile / len(imported_files) * 100), "%")
print("Size of data_frame_density:", data_frame_density.shape)
# split data in samples and response
y_density = data_frame_density[data_frame_density.columns[-3:]]
x_density = data_frame_density.iloc[:, 0:-3]
# round to 1%
#y_density = np.round(y_density*100)/100
# split data into training and test set of data
x_train_density, x_test_density, y_train_density, y_test_density = train_test_split(x_density, y_density,
random_state=1,
test_size=test_size_percent)
print('Data is split in test & training set [%.2f s]' % (time.time() - startTime))
print("\t Number of Trees ", numberOfTrees)
startTime =time.time()
rf_density_regressor = RandomForestRegressor(n_estimators=numberOfTrees, n_jobs=use_cores, max_depth=treeDepth,
oob_score=True)
x_train_density.to_csv("x_train_density.txt")
y_train_density.to_csv("y_train_density.txt")
# train model
rf_density_regressor.fit(x_train_density, y_train_density)
print('Fitting is done [%.2f s]' % (time.time() - startTime))
# print(np.shape(x_train_density))
# print(np.shape(y_train_density))
startTime =time.time()
# evaluate model
y_predicted = rf_density_regressor.predict(x_test_density)
print('Prediction is done [%.2f s]' % (time.time() - startTime))
# print(np.shape(x_test_density))
# print(np.shape(y_predicted))
fid1 = open('y_predicted.csv', 'wt')
y_predicted.tofile(fid1, sep=";")
fid1.close()
filename = 'y_test_density' + '.csv'
y_test_density.to_csv(path_or_buf=filename, sep=";", header=False, index=False)
# standardization
row_sums = y_predicted.sum(axis=1)
y_predicted_normiert = y_predicted / row_sums[:, np.newaxis]
fid2 = open('y_predicted_normiert.csv', 'wt')
y_predicted_normiert.tofile(fid2, sep=";")
fid2.close()
# calc error of prediction
# print importance of features
# print("Importance of features")
# print(rf_density_regressor.feature_importances_)
# error calculation / writing into my "csv"
mean_euklid_error,mean_euklid_error_std,mean_euklid_error_max = euklidError(y_test_density, y_predicted_normiert)
mean_max_error, mean_max_error_std, mean_max_error_max = maxError(y_test_density,
y_predicted_normiert)
mean_euklid_error_percent = mean_euklid_error/np.sqrt(2)*100
euklid_error = np.sqrt(np.sum(np.square(y_predicted_normiert - np.array(y_test_density)), 1))
euklid_error_mean = np.mean(euklid_error)
euklid_error_mean_percent = euklid_error_mean / np.sqrt(2) * 100
print("**** RESULTS ****")
print("Mean Euklidean Error (Test data) - Students: %f " % mean_euklid_error)
print("Mean Euklidean Error (Test data) - Students: %f %%" % mean_euklid_error_percent)
print(" ")
print("Mean Euklidean Error (Test data): %f" % euklid_error_mean)
print("Mean Euklidean Error (Test data): %f %%" % euklid_error_mean_percent)
print(" ")
# From intro to machine learning book
# Returns the coefficient of determination R^2 of the prediction.
print("Accuracy on training set (Score): {:.3f}".format(
rf_density_regressor.score(x_train_density, y_train_density)))
print("Accuracy on test set (Score): {:.3f}".format(
rf_density_regressor.score(x_test_density, y_test_density)))
print("Out of bag error score %f" % rf_density_regressor.oob_score_)
###################### eval of the trees
n = int(np.sqrt(len(rf_density_regressor.feature_importances_)))
features_importance = np.reshape(rf_density_regressor.feature_importances_, [n, n])
# print(type(rf_density_regressor.feature_importances_))
# print(np.size(rf_density_regressor.feature_importances_))
# print(np.size(features_importance))
plt.figure()
plt.imshow(features_importance)
plt.title("Feature importance")
plt.colorbar()
plt.show()
# print("Out of bag error prediction")
# print(rf_density_regressor.oob_prediction_)
print(" ")
# print("Estimators")
# print(rf_density_regressor.estimators_)
## Visualize tree
# for tree_in_forest in rf_density_regressor.estimators_:
# export_graphviz(tree_in_forest,
# out_file="tree.dot")
# os.system('dot -Tpng tree.dot -o tree.png')
# print("visusalized tree.")
return None
############## Helper Functions for error calculation
def euklidError(y_data, y_predicted):
if len(y_data) != len(y_predicted):
print("dimensions dont match up")
return
y_error = np.zeros(len(y_data))
for i in range(0, len(y_data.index)):
tmpsum = 0
for j in range(0, len(y_data.columns)):
tmpsum += (y_data.values[i,j] - y_predicted[i,j])**2
y_error[i] = np.sqrt(tmpsum) # for each sample
fid1 = open('y_error.csv', 'wt')
y_error.tofile(fid1, sep=";")
fid1.close()
return np.mean(y_error), np.std(y_error), np.max(y_error)
def maxError(y_data, y_predicted):
if len(y_data) != len(y_predicted):
print("dimensions dont match up")
return None
y_error = np.zeros(len(y_data))
for i in range(0, len(y_data.index)):
tmpsum = np.zeros(shape = (3))
for j in range(0, len(y_data.columns)):
tmpsum[j] = (y_data.values[i,j] - y_predicted[i,j])
y_error[i] = np.max(tmpsum)
return np.mean(y_error), np.std(y_error), np.max(y_error)
if __name__ == '__main__':
test_size_percent = 0.2
use_cores = 4
directory = '../../data/output_preprocessed/'
numberOfTrees = [10]
treeDepths = [None] # as long as possible
randomForest(test_size_percent, use_cores, directory, numberOfTrees, treeDepths)
# -*- coding: utf-8 -*-
"""
Created on Sun Oct 22 13:56:09 2017
@author: Tim Lauster
"""
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
import time
import os
from rf.error_calculation import calc_and_print_errors_rf
# Parameters
def randomForest(test_size_percent, use_cores, directory, numberOfTrees, treeDepth, nTargets, log_file, log_file_name, obs_area, resolution):
print("*** MULTIPLE FORESTS ***")
# Some useful length measurements
imported_files = os.listdir(directory)
startTime = time.time()
# import data
data_frame_density = pd.DataFrame()
for ifile in range(0, len(imported_files)):
file = imported_files[ifile]
if file.endswith(".csv"):
# read in data
tmp_density = pd.read_csv(directory + file, header=None, sep=';')
data_frame_density = data_frame_density.append(tmp_density)
if np.mod(ifile, 10) == 0:
print("Reading files: ", round(ifile / len(imported_files) * 100), "%")
print("Size of data_frame_density:", data_frame_density.shape)
# split data in samples and response
y_density = data_frame_density[data_frame_density.columns[-3:]]
x_density = data_frame_density.iloc[:, 0:-3]
# round to 1%
#y_density = np.round(y_density*100)/100
# split data into training and test set of data
x_train_density, x_test_density, y_train_density, y_test_density = train_test_split(x_density, y_density,
#random_state=1,
shuffle=True,
test_size=test_size_percent)
print('Data is split in test & training set [%.2f s]' % (time.time() - startTime))
#print('Size train: %d %d'% (x_train_density.shape(0),x_train_density.shape(1)))
#print('Size test: %d %d'% (x_test_density.shape(0),x_test_density.shape(1)))
print("\t Number of Trees ", numberOfTrees)
n_test = y_test_density.shape
y_test_density_np = y_test_density._get_values
y_predicted = np.zeros([n_test[0],n_test[1]]) # empty numpy ndarray
score_test = np.zeros(nTargets)
score_training = np.zeros(nTargets)
score_oob = np.zeros(nTargets)
for iTarget in range(0, nTargets):
t1 = time.clock()
startTime =time.time()
rf_density_regressor= RandomForestRegressor(n_estimators=numberOfTrees, n_jobs=use_cores, max_depth=treeDepth,
oob_score=True)
# train model
rf_density_regressor.fit(x_train_density, y_train_density.iloc[:,iTarget])
print('Fitting is done [%.2f s]' % (time.time() - startTime))
dt_rf_training = time.clock() - t1
startTime =time.time()
# evaluate model
y_predicted_tmp = rf_density_regressor.predict(x_test_density)
y_predicted[:,iTarget] = y_predicted_tmp
#np.savetxt("y_predicted_tmp_{0}.csv".format(iTarget), y_predicted_tmp, delimiter=",")
print('Prediction is done [%.2f s]' % (time.time() - startTime))
# Evaluate performance
score_training[iTarget] = rf_density_regressor.score(x_train_density, y_train_density.iloc[:, iTarget])
score_test[iTarget] = rf_density_regressor.score(x_test_density, y_test_density.iloc[:, iTarget])
score_oob[iTarget] = rf_density_regressor.oob_score_
# np.savetxt("y_predicted.csv", y_predicted, delimiter=",")
# standardization
row_sums = y_predicted.sum(axis=1)
y_predicted_normiert = y_predicted / row_sums[:, np.newaxis]
combined = np.zeros((len(y_predicted_normiert), 6))
combined[:, :3] = y_predicted_normiert
combined[:, 3:] = y_test_density_np
np.savetxt("y_predicted_norm.csv", combined, delimiter=",")
log_file.write("\n * RANDOM FOREST RESULTS * \n")
log_file.write("Number of heatmaps: %d\n" % data_frame_density.__len__())
log_file.write("Size of heatmaps: [%d, %d]\n\n" % (data_frame_density.shape[0], data_frame_density.shape[1]))
print(" ")
print("**** RANDOM FOREST RESULTS ****")
calc_and_print_errors_rf(y_test_density_np, y_predicted_normiert, log_file, score_training, score_test, score_oob)
###################### eval of the trees
n = int(np.sqrt(len(rf_density_regressor.feature_importances_)))
features_importance = np.reshape(rf_density_regressor.feature_importances_, [int(obs_area[3]/resolution),int(obs_area[2]/resolution)])
# print(type(rf_density_regressor.feature_importances_))
# print(np.size(rf_density_regressor.feature_importances_))
# print(np.size(features_importance))
plt.figure()
plt.imshow(features_importance)
plt.title("Feature importance")
plt.colorbar()
# plt.show()
plt.savefig(log_file_name[0:len(log_file_name)-3] + 'pdf')
# print("Out of bag error prediction")
# print(rf_density_regressor.oob_prediction_)
print(" ")
# print("Estimators")
# print(rf_density_regressor.estimators_)
## Visualize tree
# for tree_in_forest in rf_density_regressor.estimators_:
# export_graphviz(tree_in_forest,
# out_file="tree.dot")
# os.system('dot -Tpng tree.dot -o tree.png')
# print("visusalized tree.")
return dt_rf_training
if __name__ == '__main__':
test_size_percent = 0.2
use_cores = 4
directory = '../../data/output_preprocessed/'
numberOfTrees = [10]
treeDepths = [None] # as long as possible
randomForest(test_size_percent, use_cores, directory, numberOfTrees, treeDepths)
import main_rf
# main.run_main(obs_area = [20, 10, 10, 10], resolution = 0.5)
# main.run_main(obs_area = [20, 15, 10, 10])
# main.run_main(obs_area = [20, 20, 10, 10])
# main.run_main(obs_area = [20, 25, 10, 10])
# main.run_main(obs_area = [20, 10, 10, 2.5])
# main.run_main(obs_area = [20, 10, 10, 5])
# main.run_main(obs_area = [20, 10, 10, 7.5])
# main.run_main(obs_area = [20, 10, 10, 10])
# main.run_main(obs_area = [20, 10, 10, 15])
## Camera cutout size
# for i in range(0,4):
# main.run_main(obs_area = [20,10,10,2.5] , resolution=0.5, framerate = 10, numberOfTrees=20)
# main.run_main(obs_area = [20,10,10,5] , resolution=0.5, framerate = 10, numberOfTrees=20)
# main.run_main(obs_area = [20,10,10,7.5] , resolution=0.5, framerate = 10, numberOfTrees=20)
# main.run_main(obs_area = [20,10,10,10] , resolution=0.5, framerate = 10, numberOfTrees=20)
# main.run_main(obs_area = [20,10,10,12.5], resolution=0.5, framerate = 10, numberOfTrees=20)
# main.run_main(obs_area = [20,10,10,15] , resolution=0.5, framerate = 10, numberOfTrees=20)
#main.run_main(obs_area = [20,10,10,17.5] , resolution=0.5, framerate = 10, numberOfTrees=20)
#main.run_main(obs_area = [20,10,10,20] , resolution=0.5, framerate = 10, numberOfTrees=20)
## Camera position
'''
for i in range(0,4):
main_rf.run_main(observation_area= [20, 10, 10, 10], resolution=0.5, framerate = 10, numberOfTrees=20)
main_rf.run_main(observation_area= [20, 15, 10, 10], resolution=0.5, framerate = 10, numberOfTrees=20)