@@ -116,7 +116,7 @@ def _convert_name(filenames, shuffle=False):
116
116
return base + ".hdf5"
117
117
118
118
119
- def open (path , convert = False , shuffle = False , * args , ** kwargs ):
119
+ def open (path , convert = False , shuffle = False , copy_index = True , * args , ** kwargs ):
120
120
"""Open a dataset from file given by path
121
121
122
122
Example:
@@ -129,6 +129,7 @@ def open(path, convert=False, shuffle=False, *args, **kwargs):
129
129
:param bool shuffle: shuffle converted dataset or not
130
130
:param args: extra arguments for file readers that need it
131
131
:param kwargs: extra keyword arguments
132
+ :param bool copy_index: copy index when source is read via pandas
132
133
:return: return dataset if file is supported, otherwise None
133
134
:rtype: Dataset
134
135
@@ -167,15 +168,18 @@ def open(path, convert=False, shuffle=False, *args, **kwargs):
167
168
path = filenames [0 ]
168
169
ext = os .path .splitext (path )[1 ]
169
170
if os .path .exists (filename_hdf5 ) and convert : # also check mtime?
170
- ds = vaex .file .open (filename_hdf5 , * args , ** kwargs )
171
+ if convert :
172
+ ds = vaex .file .open (filename_hdf5 )
173
+ else :
174
+ ds = vaex .file .open (filename_hdf5 , * args , ** kwargs )
171
175
else :
172
176
if ext == '.csv' : # special support for csv.. should probably approach it a different way
173
- ds = from_csv (path , ** kwargs )
177
+ ds = from_csv (path , copy_index = copy_index , ** kwargs )
174
178
else :
175
179
ds = vaex .file .open (path , * args , ** kwargs )
176
180
if convert :
177
181
ds .export_hdf5 (filename_hdf5 , shuffle = shuffle )
178
- ds = vaex .file .open (filename_hdf5 , * args , ** kwargs )
182
+ ds = vaex .file .open (filename_hdf5 ) # argument were meant for pandas?
179
183
if ds is None :
180
184
if os .path .exists (path ):
181
185
raise IOError ('Could not open file: {}, did you install vaex-hdf5?' .format (path ))
@@ -348,10 +352,10 @@ def from_ascii(path, seperator=None, names=True, skip_lines=0, skip_after=0, **k
348
352
return ds
349
353
350
354
351
- def from_csv (filename_or_buffer , ** kwargs ):
355
+ def from_csv (filename_or_buffer , copy_index = True , ** kwargs ):
352
356
"""Shortcut to read a csv file using pandas and convert to a dataset directly"""
353
357
import pandas as pd
354
- return from_pandas (pd .read_csv (filename_or_buffer , ** kwargs ))
358
+ return from_pandas (pd .read_csv (filename_or_buffer , ** kwargs ), copy_index = copy_index )
355
359
356
360
357
361
def read_csv (filepath_or_buffer , ** kwargs ):
0 commit comments