-
Notifications
You must be signed in to change notification settings - Fork 0
/
combiner.py
94 lines (79 loc) · 2.83 KB
/
combiner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#make better notes
#add help notes
#future - add function for worksheet
def addTag(file,path,r,s):
import pandas as pd
import os
tempFrame = pd.read_excel(os.path.join(path,file),skiprows=r,sheetname=s)
tempFrame['tag'] = str(file)
tempFrame['sheet'] = str(s)
return tempFrame
def excel_mix(pathVar,startrow,sheet): #function for excel workbooks
import pandas as pd
import os
#variables to take from user input
#pathVar = r'' #path to file(s)
#extVar = '.xls'#update to all crosstab variations
#startrow = 10 #what row to start collecting data, 0 is default
#sheet = 'Compensation Data' #0 = default
resultFile = 'result.xlsx' #name of file to be created
#build list of files to combine
print('Gathering files...')
fileList = []
for filename in os.listdir(pathVar):
if filename.lower().endswith('.xlsx') or filename.lower().endswith('.xls'):
fileList.append(filename)
print('%s added to list' %(filename))
option = 'excel'
elif filename.lower().endswith('.csv'):
fileList.append(filename)
print('%s added to list' %(filename))
option ='csv'
#print error if result.xlsx already exists in fileList
#create data frame for each file
#combine dataframes into one
start = startrow -1
if option == 'excel':
frames = [ addTag(f,pathVar,start,sheet) for f in fileList ]
elif option == 'csv': #give startrow option for csv
frames = [ pd.read_csv(os.path.join(pathVar,f),encoding='cp1252') for f in fileList ]
else:
print('Invalid file type!')
number = len(fileList)
print('Combining %s files, please be patient :)' %(number))
result = pd.concat(frames)
del frames
#print combined dataframes into one exceel sheet
result.to_excel(os.path.join(pathVar,resultFile),sheet_name='ALLINFO',index=False)
del result
print('Done!')
def sheet_list (path,file): #gathers list of sheets from file
from openpyxl import load_workbook
import os
#load file
wb = load_workbook(os.path.join(path,file))
#get list of sheet names
sheet_list = wb.get_sheet_names()
#return list of sheets
return sheet_list
def sheet_mix(pathVar,file,startrow,sheetList): #function for excel sheets
import pandas as pd
import os
#variables to take from user input
#pathVar = r'' #path to file(s)
#extVar = '.xls'#update to all crosstab variations
#startrow = 10 #what row to start collecting data, 0 is default
#sheet = 'Compensation Data' #0 = default
resultFile = 'result.xlsx' #name of file to be created
#create data frame for each file
#combine dataframes into one
start = startrow -1
frames = [ addTag(file,pathVar,start,sheet) for sheet in sheetList ]
number = len(sheetList)
print('Combining %s files, please be patient :)' %(number))
result = pd.concat(frames)
del frames
#print combined dataframes into one exceel sheet
result.to_excel(os.path.join(pathVar,resultFile),sheet_name='ALLINFO',index=False)
del result
print('Done!')