Skip to content

Commit

Permalink
week4 assignment
Browse files Browse the repository at this point in the history
  • Loading branch information
jogging520 committed May 22, 2019
1 parent cc00788 commit 9443391
Show file tree
Hide file tree
Showing 52 changed files with 163,139 additions and 769 deletions.
16,282 changes: 16,282 additions & 0 deletions 李宏毅机器学习-作业/week4/01-Data/X_test_my.csv

Large diffs are not rendered by default.

32,562 changes: 32,562 additions & 0 deletions 李宏毅机器学习-作业/week4/01-Data/X_train_my.csv

Large diffs are not rendered by default.

32,562 changes: 32,562 additions & 0 deletions 李宏毅机器学习-作业/week4/01-Data/Y_train_my.csv

Large diffs are not rendered by default.

16,282 changes: 16,282 additions & 0 deletions 李宏毅机器学习-作业/week4/01-Data/test.csv

Large diffs are not rendered by default.

32,562 changes: 32,562 additions & 0 deletions 李宏毅机器学习-作业/week4/01-Data/train.csv

Large diffs are not rendered by default.

Binary file removed 李宏毅机器学习-作业/week4/02-Output/Cnn.png
Binary file not shown.
Binary file not shown.
Binary file removed 李宏毅机器学习-作业/week4/02-Output/Dnn.png
Binary file not shown.
16,282 changes: 16,282 additions & 0 deletions 李宏毅机器学习-作业/week4/02-Output/Predict

Large diffs are not rendered by default.

Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
16,282 changes: 16,282 additions & 0 deletions 李宏毅机器学习-作业/week4/02-Output/correct_answer.csv

Large diffs are not rendered by default.

Diff not rendered.
Diff not rendered.
Diff not rendered.
92 changes: 0 additions & 92 deletions 李宏毅机器学习-作业/week4/Base.py

This file was deleted.

64 changes: 33 additions & 31 deletions 李宏毅机器学习-作业/week4/DataProcessing.py
Original file line number Diff line number Diff line change
@@ -1,45 +1,47 @@
import os, csv
import csv, os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from Plot import plotDigits

def makeDataProcessing(dfData):
dfDataX = dfData.drop(["education_num", "sex"], axis=1)

def makeDataProcessing(Data):
listLabel = []
listImageVector = []
listImage = []
for index, strRow in enumerate(Data):
strLabel, strImageVector = strRow.split(",")
if index != 0:
arrayLabel = int(strLabel)
arrayImageVector = np.fromstring(strImageVector, dtype=int, sep=" ") # for dnn
arrayImage = arrayImageVector.reshape(48, 48, 1) # for cnn
listObjectColumnName = [col for col in dfDataX.columns if dfDataX[col].dtypes=="object"]
listNonObjectColumnName = [col for col in dfDataX.columns if dfDataX[col].dtypes!="object"]

listLabel.append(arrayLabel)
listImageVector.append(arrayImageVector)
listImage.append(arrayImage)
return listLabel, listImageVector, listImage
dfNonObjectData = dfDataX[listNonObjectColumnName]
dfNonObjectData.insert(2, "sex", (dfData["sex"]==" Male").astype(np.int)) # Male 1 Femal 0

dfObjectData = dfDataX[listObjectColumnName]
dfObjectData = pd.get_dummies(dfObjectData)

if __name__ == "__main__":

strProjectFolder = os.path.dirname(__file__)
strOutputPath = "02-Output/"
dfDataX = dfNonObjectData.join(dfObjectData)
dfDataX = dfDataX.astype("int64")
return dfDataX

DataTrain = open(os.path.join(strProjectFolder, "01-Data/train.csv"), "r")
DataTest = open(os.path.join(strProjectFolder, "01-Data/test.csv"), "r")
if __name__ == "__main__":

listTrainLabel, listTrainImageVector, listTrainImage = makeDataProcessing(DataTrain)
np.savez(os.path.join(strProjectFolder, "01-Data/Train.npz"), Label=np.asarray(listTrainLabel), Image=np.asarray(listTrainImage))
# read raw data
dfDataTrain = pd.read_csv(os.path.join(os.path.dirname(__file__), "train.csv"))
dfDataTest = pd.read_csv(os.path.join(os.path.dirname(__file__), "test.csv"))

_, listTestImageVector, listTestImage = makeDataProcessing(DataTest)
np.savez(os.path.join(strProjectFolder, "01-Data/Test.npz"), Image=np.asarray(listTestImage))
# show Training Size and Testing Size
intTrainSize = len(dfDataTrain)
intTestSize = len(dfDataTest)

listShowId = [0, 299, 2, 7, 3, 15, 4]
listShowImage = [listTrainImage[i] for i in listShowId]
listLabelX = ["Angry", "Disgust", "Fear", "Happy", "Sad", "Surprise", "Neutral"]
plotDigits(instances=listShowImage, intImagesPerRow=7, listLabelX=listLabelX, strProjectFolder=strProjectFolder, strOutputPath=strOutputPath)
# processing Training Label (Y)
dfDataTrainY = dfDataTrain["income"]
dfTrainY = pd.DataFrame((dfDataTrainY==" >50K").astype("int64"), columns=["income"]) # >50K 1, =<50K 0

# processing Training and Testing data (X)
dfDataTrain = dfDataTrain.drop(["income"], axis=1)
dfAllData = pd.concat([dfDataTrain, dfDataTest], axis=0, ignore_index=True)
dfAllData = makeDataProcessing(dfData=dfAllData)

# sperate All data to Training and Testing
dfTrainX = dfAllData[0:intTrainSize]
dfTestX = dfAllData[intTrainSize:(intTrainSize + intTestSize)]

# save Training data, Testing data and Training label
dfTrainX.to_csv(os.path.join(os.path.dirname(__file__), "X_train_my.csv"), index=False)
dfTestX.to_csv(os.path.join(os.path.dirname(__file__), "X_Test_my.csv"), index=False)
dfTrainY.to_csv(os.path.join(os.path.dirname(__file__), "Y_train_my.csv"), index=False)
84 changes: 0 additions & 84 deletions 李宏毅机器学习-作业/week4/Model.py

This file was deleted.

Loading

0 comments on commit 9443391

Please sign in to comment.