forked from xuerenlv/social-lstm-tf
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
169 lines (143 loc) · 6.51 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
import os
import pickle
import numpy as np
import random
# TODO : (improve) Add functionality to retrieve data only from specific datasets
class DataLoader():
def __init__(self, batch_size=50, seq_length=5, datasets=[0, 1, 2, 3, 4], forcePreProcess=False):
'''
Initialiser function for the DataLoader class
params:
batch_size : Size of the mini-batch
seq_length : RNN sequence length
'''
# List of data directories where raw data resides
# self.data_dirs = ['./data/eth/univ', './data/eth/hotel',
# './data/ucy/zara/zara01', './data/ucy/zara/zara02',
# './data/ucy/univ']
self.data_dirs = ['./data/eth/univ', './data/eth/hotel']
self.used_data_dirs = [self.data_dirs[x] for x in datasets]
# Data directory where the pre-processed pickle file resides
self.data_dir = './data'
# Store the batch size and the sequence length arguments
self.batch_size = batch_size
self.seq_length = seq_length
# Define the path of the file in which the data needs to be stored
data_file = os.path.join(self.data_dir, "trajectories.cpkl")
# If the file doesn't exist already or if forcePreProcess is true
if not(os.path.exists(data_file)) or forcePreProcess:
print("Creating pre-processed data from raw data")
# Preprocess the data from the csv files
self.preprocess(self.used_data_dirs, data_file)
# Load the data from the pickled file
self.load_preprocessed(data_file)
# Reset all the pointers
self.reset_batch_pointer()
def preprocess(self, data_dirs, data_file):
'''
The function that pre-processes the pixel_pos.csv files of each dataset
into data that can be used
params:
data_dirs : List of directories where raw data resides
data_file : The file into which all the pre-processed data needs to be stored
'''
# all_ped_data would be a dictionary with mapping from each ped to their
# trajectories given by matrix 3 x numPoints with each column
# in the order x, y, frameId
# Pedestrians from all datasets are combined
# Dataset pedestrian indices are stored in dataset_indices
all_ped_data = {}
dataset_indices = []
current_ped = 0
# For each dataset
for directory in data_dirs:
# Define the path to its respective csv file
file_path = os.path.join(directory, 'pixel_pos.csv')
# Load data from the csv file
# Data is a 4 x numTrajPoints matrix
# where each column is a (frameId, pedId, y, x) vector
data = np.genfromtxt(file_path, delimiter=',')
# Get the number of pedestrians in the current dataset
numPeds = np.size(np.unique(data[1, :]))
# For each pedestrian in the dataset
for ped in range(1, numPeds+1):
# Extract trajectory of the current ped
traj = data[:, data[1, :] == ped]
# Format it as (x, y, frameId)
traj = traj[[3, 2, 0], :]
# Store this in the dictionary
all_ped_data[current_ped + ped] = traj
# Current dataset done
dataset_indices.append(current_ped+numPeds)
current_ped += numPeds
# The complete data is a tuple of all pedestrian data, and dataset ped indices
complete_data = (all_ped_data, dataset_indices)
# Store the complete data into the pickle file
f = open(data_file, "wb")
pickle.dump(complete_data, f, protocol=2)
f.close()
def load_preprocessed(self, data_file):
'''
Function to load the pre-processed data into the DataLoader object
params:
data_file : The path to the pickled data file
'''
# Load data from the pickled file
f = open(data_file, "rb")
self.raw_data = pickle.load(f)
f.close()
# Get the pedestrian data from the pickle file
all_ped_data = self.raw_data[0]
# Not using dataset_indices for now
# dataset_indices = self.raw_data[1]
# Construct the data with sequences(or trajectories) longer than seq_length
self.data = []
counter = 0
# For each pedestrian in the data
for ped in all_ped_data:
# Extract his trajectory
traj = all_ped_data[ped]
# If the length of the trajectory is greater than seq_length (+2 as we need both source and target data)
if traj.shape[1] > (self.seq_length+2):
# TODO: (Improve) Store only the (x,y) coordinates for now
self.data.append(traj[[0, 1], :].T)
# Number of batches this datapoint is worth
counter += int(traj.shape[1] / ((self.seq_length+2)))
# Calculate the number of batches (each of batch_size) in the data
self.num_batches = int(counter / self.batch_size)
def next_batch(self):
'''
Function to get the next batch of points
'''
# List of source and target data for the current batch
x_batch = []
y_batch = []
# For each sequence in the batch
for i in range(self.batch_size):
# Extract the trajectory of the pedestrian pointed out by self.pointer
traj = self.data[self.pointer]
# Number of sequences corresponding to his trajectory
n_batch = int(traj.shape[0] / (self.seq_length+2))
# Randomly sample a index from which his trajectory is to be considered
idx = random.randint(0, traj.shape[0] - self.seq_length - 2)
# Append the trajectory from idx until seq_length into source and target data
x_batch.append(np.copy(traj[idx:idx+self.seq_length, :]))
y_batch.append(np.copy(traj[idx+1:idx+self.seq_length+1, :]))
if random.random() < (1.0/float(n_batch)):
# Adjust sampling probability
# if this is a long datapoint, sample this data more with
# higher probability
self.tick_batch_pointer()
return x_batch, y_batch
def tick_batch_pointer(self):
'''
Advance the data pointer
'''
self.pointer += 1
if (self.pointer >= len(self.data)):
self.pointer = 0
def reset_batch_pointer(self):
'''
Reset the data pointer
'''
self.pointer = 0