Skip to content

Commit 0b2c3f4

Browse files
new: option to skip the index for pandas reading, and fix to not pass argument when convert is true
1 parent 072ab9b commit 0b2c3f4

File tree

1 file changed

+10
-6
lines changed

1 file changed

+10
-6
lines changed

packages/vaex-core/vaex/__init__.py

+10-6
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ def _convert_name(filenames, shuffle=False):
116116
return base + ".hdf5"
117117

118118

119-
def open(path, convert=False, shuffle=False, *args, **kwargs):
119+
def open(path, convert=False, shuffle=False, copy_index=True, *args, **kwargs):
120120
"""Open a dataset from file given by path
121121
122122
Example:
@@ -129,6 +129,7 @@ def open(path, convert=False, shuffle=False, *args, **kwargs):
129129
:param bool shuffle: shuffle converted dataset or not
130130
:param args: extra arguments for file readers that need it
131131
:param kwargs: extra keyword arguments
132+
:param bool copy_index: copy index when source is read via pandas
132133
:return: return dataset if file is supported, otherwise None
133134
:rtype: Dataset
134135
@@ -167,15 +168,18 @@ def open(path, convert=False, shuffle=False, *args, **kwargs):
167168
path = filenames[0]
168169
ext = os.path.splitext(path)[1]
169170
if os.path.exists(filename_hdf5) and convert: # also check mtime?
170-
ds = vaex.file.open(filename_hdf5, *args, **kwargs)
171+
if convert:
172+
ds = vaex.file.open(filename_hdf5)
173+
else:
174+
ds = vaex.file.open(filename_hdf5, *args, **kwargs)
171175
else:
172176
if ext == '.csv': # special support for csv.. should probably approach it a different way
173-
ds = from_csv(path, **kwargs)
177+
ds = from_csv(path, copy_index=copy_index, **kwargs)
174178
else:
175179
ds = vaex.file.open(path, *args, **kwargs)
176180
if convert:
177181
ds.export_hdf5(filename_hdf5, shuffle=shuffle)
178-
ds = vaex.file.open(filename_hdf5, *args, **kwargs)
182+
ds = vaex.file.open(filename_hdf5) # argument were meant for pandas?
179183
if ds is None:
180184
if os.path.exists(path):
181185
raise IOError('Could not open file: {}, did you install vaex-hdf5?'.format(path))
@@ -348,10 +352,10 @@ def from_ascii(path, seperator=None, names=True, skip_lines=0, skip_after=0, **k
348352
return ds
349353

350354

351-
def from_csv(filename_or_buffer, **kwargs):
355+
def from_csv(filename_or_buffer, copy_index=True, **kwargs):
352356
"""Shortcut to read a csv file using pandas and convert to a dataset directly"""
353357
import pandas as pd
354-
return from_pandas(pd.read_csv(filename_or_buffer, **kwargs))
358+
return from_pandas(pd.read_csv(filename_or_buffer, **kwargs), copy_index=copy_index)
355359

356360

357361
def read_csv(filepath_or_buffer, **kwargs):

0 commit comments

Comments
 (0)