|
1 | 1 | from io import BytesIO |
2 | 2 |
|
3 | 3 | import numpy as np |
| 4 | +from odf.opendocument import OpenDocumentSpreadsheet |
| 5 | +from odf.table import Table, TableCell, TableRow |
| 6 | +from odf.text import P |
4 | 7 |
|
5 | 8 | from pandas import DataFrame, ExcelWriter, date_range, read_excel |
6 | 9 | import pandas.util.testing as tm |
7 | 10 |
|
8 | 11 |
|
9 | | -class Excel: |
| 12 | +def _generate_dataframe(): |
| 13 | + N = 2000 |
| 14 | + C = 5 |
| 15 | + df = DataFrame( |
| 16 | + np.random.randn(N, C), |
| 17 | + columns=["float{}".format(i) for i in range(C)], |
| 18 | + index=date_range("20000101", periods=N, freq="H"), |
| 19 | + ) |
| 20 | + df["object"] = tm.makeStringIndex(N) |
| 21 | + return df |
| 22 | + |
| 23 | + |
| 24 | +class WriteExcel: |
10 | 25 |
|
11 | 26 | params = ["openpyxl", "xlsxwriter", "xlwt"] |
12 | 27 | param_names = ["engine"] |
13 | 28 |
|
14 | 29 | def setup(self, engine): |
15 | | - N = 2000 |
16 | | - C = 5 |
17 | | - self.df = DataFrame( |
18 | | - np.random.randn(N, C), |
19 | | - columns=["float{}".format(i) for i in range(C)], |
20 | | - index=date_range("20000101", periods=N, freq="H"), |
21 | | - ) |
22 | | - self.df["object"] = tm.makeStringIndex(N) |
23 | | - self.bio_read = BytesIO() |
24 | | - self.writer_read = ExcelWriter(self.bio_read, engine=engine) |
25 | | - self.df.to_excel(self.writer_read, sheet_name="Sheet1") |
26 | | - self.writer_read.save() |
27 | | - self.bio_read.seek(0) |
28 | | - |
29 | | - def time_read_excel(self, engine): |
30 | | - read_excel(self.bio_read) |
| 30 | + self.df = _generate_dataframe() |
31 | 31 |
|
32 | 32 | def time_write_excel(self, engine): |
33 | | - bio_write = BytesIO() |
34 | | - bio_write.seek(0) |
35 | | - writer_write = ExcelWriter(bio_write, engine=engine) |
36 | | - self.df.to_excel(writer_write, sheet_name="Sheet1") |
37 | | - writer_write.save() |
| 33 | + bio = BytesIO() |
| 34 | + bio.seek(0) |
| 35 | + writer = ExcelWriter(bio, engine=engine) |
| 36 | + self.df.to_excel(writer, sheet_name="Sheet1") |
| 37 | + writer.save() |
| 38 | + |
| 39 | + |
| 40 | +class ReadExcel: |
| 41 | + |
| 42 | + params = ["xlrd", "openpyxl", "odf"] |
| 43 | + param_names = ["engine"] |
| 44 | + fname_excel = "spreadsheet.xlsx" |
| 45 | + fname_odf = "spreadsheet.ods" |
| 46 | + |
| 47 | + def _create_odf(self): |
| 48 | + doc = OpenDocumentSpreadsheet() |
| 49 | + table = Table(name="Table1") |
| 50 | + for row in self.df.values: |
| 51 | + tr = TableRow() |
| 52 | + for val in row: |
| 53 | + tc = TableCell(valuetype="string") |
| 54 | + tc.addElement(P(text=val)) |
| 55 | + tr.addElement(tc) |
| 56 | + table.addElement(tr) |
| 57 | + |
| 58 | + doc.spreadsheet.addElement(table) |
| 59 | + doc.save(self.fname_odf) |
| 60 | + |
| 61 | + def setup_cache(self): |
| 62 | + self.df = _generate_dataframe() |
| 63 | + |
| 64 | + self.df.to_excel(self.fname_excel, sheet_name="Sheet1") |
| 65 | + self._create_odf() |
| 66 | + |
| 67 | + def time_read_excel(self, engine): |
| 68 | + fname = self.fname_odf if engine == "odf" else self.fname_excel |
| 69 | + read_excel(fname, engine=engine) |
38 | 70 |
|
39 | 71 |
|
40 | 72 | from ..pandas_vb_common import setup # noqa: F401 isort:skip |
0 commit comments