Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[UPDATE] fix some script error #91

Open
wants to merge 4 commits into
base: java
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 2 additions & 6 deletions Agent/workspace/hyperopt/abalone/code/code.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,19 +105,15 @@

# List of models to evaluate
catboost_model = CatBoostRegressor(random_state=1, verbose=False)
# lgbm_model = LGBMRegressor(verbose=-1, random_state=1)
# xgb_model = XGBRegressor(verbose=0, random_state=1, enable_categorical=True)


# # Fit the models on the training data
# catboost_model.fit(X_train, y_train)
# lgbm_model.fit(X_train, y_train)
# xgb_model.fit(X_train, y_train)


# # Evaluate the models
# catboost_preds = catboost_model.predict(X_val)
# lgbm_preds = lgbm_model.predict(X_val)
# xgb_preds = xgb_model.predict(X_val)


# final_preds = np.round((catboost_preds + lgbm_preds + xgb_preds) / 3).astype("int")

Expand Down
13 changes: 7 additions & 6 deletions Agent/workspace/hyperopt/abalone2/code/code.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@

np.random.seed(RANDOM_SEED)
random.seed(RANDOM_SEED)
FILE_PATH = "./workspace/hyperopt/abalone/data/"
FILE_PATH = "./workspace/hyperopt/abalone2/data/"
# FILE_PATH="../data/"
submmision_file = "submission.csv"
train = pd.read_csv(FILE_PATH + "train.csv")
Expand Down Expand Up @@ -317,14 +317,15 @@ def log_transformation(data, columns):
"xgboost_weight": 0.48550637896530635,
"catboost_weight": 4.189724537494019,
}

voting_regressor = VotingRegressor(
estimators=cv_estimators,
weights=[
weights_list=[
weight_best_params["lgbm_weight"],
weight_best_params["xgboost_weight"],
weight_best_params["catboost_weight"]
]

]
voting_regressor = VotingRegressor(
estimators=cv_estimators,
weights=weights_list
)

# cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=RANDOM_SEED)
Expand Down
8 changes: 4 additions & 4 deletions Agent/workspace/hyperopt/bank-churn2/code/code.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,8 @@ def plot_kde_for_all_columns(df):
# Below are the parameters for xgboost.

xgb_params = {"booster": "gbtree",
"lambda": 0.8611971458776956,
"alpha": 3.3684132992886347e-07,
"reg_lambda": 0.8611971458776956,
"reg_alpha": 3.3684132992886347e-07,
"max_depth": 3,
"eta": 0.17374299923922656,
"gamma": 1.2505690952357777e-06,
Expand Down Expand Up @@ -144,9 +144,9 @@ def plot_kde_for_all_columns(df):


# ## Voting Ensemble

weight_list=[0.2,0.4,0.4]
voter = VotingClassifier(estimators=[("m1", xgb_model), ("m2", lgbm_model), ("m3", cb_model)], voting="soft",
weights=[0.2, 0.4, 0.4])
weights=weight_list)
# voter.fit(X,y)


Expand Down
219 changes: 219 additions & 0 deletions Agent/workspace/hyperopt/digit-recognizer/code/code.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
#!/usr/bin/env python
# coding: utf-8

# get_ipython().system('pip install keras-tuner')


import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import layers
from tensorflow.keras.datasets import mnist

# from kerastuner import RandomSearch

import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
# from keras.callbacks import ReduceLROnPlateau
# from keras.optimizers import RMSprop
# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory
from keras.datasets import mnist
FILE_PATH = "./workspace/hyperopt/digit-recognizer/data/"

# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
# for filename in filenames:
# print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.


# Load the data
train = pd.read_csv(FILE_PATH+'train.csv')
labels = train.iloc[:,0].values.astype('int32')

X_train = (train.iloc[:,1:].values).astype('float32')
X_test = (pd.read_csv(FILE_PATH+'test.csv').values).astype('float32')

#reshape into images
X_train = X_train.reshape(-1,28,28,1)
X_test = X_test.reshape(-1,28,28,1)

# one hot encoding
y_train = tf.keras.utils.to_categorical(labels)

# print("Check data")
# print(labels)
# print(X_train[0].shape)
# print(y_train)


# Load Data from Keras MNIST
(train_imagesRaw, train_labelsRaw), (test_imagesRaw, test_labelsRaw) = mnist.load_data()


#reshape into images
X_train_keras = train_imagesRaw.reshape(-1,28,28,1)
X_test_keras = test_imagesRaw.reshape(-1,28,28,1)

# print("X_train_keras",X_train_keras.shape)
# print("X_test_keras",X_test_keras.shape)

train_labels_keras = tf.keras.utils.to_categorical(train_labelsRaw)
test_labels_keras = tf.keras.utils.to_categorical(test_labelsRaw)
# print("train_labels_keras ",train_labels_keras.shape)
# print("test_labels_keras ", test_labels_keras.shape)


# merge datasets

train_images = np.concatenate((X_train_keras,X_train,X_test_keras), axis=0)
# print("new Concatenated train_images ", train_images.shape)
# print("_"*50)

train_labels = np.concatenate((train_labels_keras,y_train,test_labels_keras), axis=0)
# print("new Concatenated train_labels ", train_labels.shape)


#visualize an image

# fig = plt.figure()
# plt.imshow(X_train[6][:,:,0], cmap='gray', interpolation='none')
# plt.xticks([])
# plt.yticks([])


scale = np.max(train_images)
train_images /= scale
X_test /= scale

#visualize scales

# print("Max: {}".format(scale))


# X_train, X_val, y_train, y_val = train_test_split(train_images, train_labels, test_size=0.10)


# # Here we define the input and output layer sizes
input_size = X_train.shape
n_logits = y_train.shape[1]

# print("Input: {}".format(input_size))
# print("Output: {}".format(n_logits))

num_layers = 8 #hp.Int('num_layers', min_value=2, max_value=16, step=2)

lr = 1e-4 #hp.Choice('learning_rate', [1e-3, 5e-4])
filters = 128 #hp.Int('filters_' + idx, 32, 256, step=32, default=64)
pool_type = 'max' #hp.Choice('pool_' + idx, values=['max', 'avg'])

inputs = layers.Input(shape=(28, 28, 1))
x = inputs
for idx in range(num_layers):
idx = str(idx)
x = layers.Conv2D(filters=filters, kernel_size=3, padding='same',
activation='relu')(x)

# add a pooling layers if needed
if x.shape[1] >= 8:
if pool_type == 'max':
x = layers.MaxPooling2D(2)(x)
elif pool_type == 'avg':
x = layers.AveragePooling2D(2)(x)

# My dense layer

x = layers.Flatten()(x)
x = layers.Dense(256, activation='relu')(x)
x = layers.Dense(256, activation='relu')(x)
x = layers.Dense(256, activation='relu')(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(n_logits, activation='softmax')(x)

# Build model
model = keras.Model(inputs, outputs)
model.compile(optimizer=Adam(lr),
loss='categorical_crossentropy',
metrics=['accuracy'])




# def build_model(hp):
# """Function that build a TF model based on hyperparameters values.
# Args:
# hp (HyperParameter): hyperparameters values
# Returns:
# Model: Compiled model
# """
# num_layers = hp.Int('num_layers', min_value=2, max_value=16, step=2)

# lr = hp.Choice('learning_rate', [1e-3, 5e-4])

# inputs = layers.Input(shape=(28, 28, 1))
# x = inputs

# for idx in range(num_layers):
# idx = str(idx)

# filters = hp.Int('filters_' + idx, 32, 256, step=32, default=64)
# x = layers.Conv2D(filters=filters, kernel_size=3, padding='same',
# activation='relu')(x)

# # add a pooling layers if needed
# if x.shape[1] >= 8:
# pool_type = hp.Choice('pool_' + idx, values=['max', 'avg'])
# if pool_type == 'max':
# x = layers.MaxPooling2D(2)(x)
# elif pool_type == 'avg':
# x = layers.AveragePooling2D(2)(x)

# # My dense layer

# x = layers.Flatten()(x)
# x = layers.Dense(256, activation='relu')(x)
# x = layers.Dense(256, activation='relu')(x)
# x = layers.Dense(256, activation='relu')(x)
# x = layers.Dropout(0.5)(x)
# outputs = layers.Dense(n_logits, activation='softmax')(x)

# # Build model
# model = keras.Model(inputs, outputs)
# model.compile(optimizer=Adam(lr),
# loss='categorical_crossentropy',
# metrics=['accuracy'])
# return model


# tuner = RandomSearch(
# build_model,
# objective='val_accuracy',
# max_trials=8,
# executions_per_trial=3,
# directory='my_dir',
# project_name='mnist')

# tuner.search_space_summary()


# tuner.search(X_train, y_train,
# epochs=30,
# validation_data=(X_val, y_val))


# model = tuner.get_best_models(num_models=1)[0]
# model.summary()


# # generate predictions
# predictions_vector = model.predict(X_test, verbose=0)
# predictions = np.argmax(predictions_vector,axis=1)

# pd.DataFrame({"ImageId": list(range(1,len(predictions)+1)), "Label": predictions}).to_csv("preds.csv", index=False, header=True)

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"pygments_lexer":"ipython3","nbconvert_exporter":"python","version":"3.6.4","file_extension":".py","codemirror_mode":{"name":"ipython","version":3},"name":"python","mimetype":"text/x-python"},"kaggle":{"accelerator":"gpu","dataSources":[{"sourceId":3004,"databundleVersionId":861823,"sourceType":"competition"}],"dockerImageVersionId":29841,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":true}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"!pip install keras-tuner","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"import numpy as np # linear algebra\nimport pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n\nimport tensorflow as tf\nfrom tensorflow import keras\nfrom tensorflow.keras.utils import to_categorical\nfrom tensorflow.keras.optimizers import Adam\nfrom tensorflow.keras import layers\nfrom tensorflow.keras.datasets import mnist\n\nfrom kerastuner import RandomSearch\n\nimport matplotlib.pyplot as plt\nfrom sklearn.model_selection import train_test_split\nfrom keras.callbacks import ReduceLROnPlateau\nfrom keras.optimizers import RMSprop\n# Input data files are available in the \"../input/\" directory.\n# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory\nfrom keras.datasets import mnist\n\nimport os\nfor dirname, _, filenames in os.walk('/kaggle/input'):\n for filename in filenames:\n print(os.path.join(dirname, filename))\n\n# Any results you write to the current directory are saved as output.","metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Load the data\ntrain = pd.read_csv('../input/digit-recognizer/train.csv')\nlabels = train.iloc[:,0].values.astype('int32')\n\nX_train = (train.iloc[:,1:].values).astype('float32')\nX_test = (pd.read_csv('../input/digit-recognizer/test.csv').values).astype('float32')\n\n#reshape into images\nX_train = X_train.reshape(-1,28,28,1)\nX_test = X_test.reshape(-1,28,28,1)\n\n# one hot encoding\ny_train = tf.keras.utils.to_categorical(labels) \n\nprint(\"Check data\")\nprint(labels)\nprint(X_train[0].shape)\nprint(y_train)","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Load Data from Keras MNIST\n(train_imagesRaw, train_labelsRaw), (test_imagesRaw, test_labelsRaw) = mnist.load_data()","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#reshape into images\nX_train_keras = train_imagesRaw.reshape(-1,28,28,1)\nX_test_keras = test_imagesRaw.reshape(-1,28,28,1)\n\nprint(\"X_train_keras\",X_train_keras.shape)\nprint(\"X_test_keras\",X_test_keras.shape)\n\ntrain_labels_keras = tf.keras.utils.to_categorical(train_labelsRaw)\ntest_labels_keras = tf.keras.utils.to_categorical(test_labelsRaw)\nprint(\"train_labels_keras \",train_labels_keras.shape)\nprint(\"test_labels_keras \", test_labels_keras.shape)","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# merge datasets\n\ntrain_images = np.concatenate((X_train_keras,X_train,X_test_keras), axis=0)\nprint(\"new Concatenated train_images \", train_images.shape)\nprint(\"_\"*50)\n\ntrain_labels = np.concatenate((train_labels_keras,y_train,test_labels_keras), axis=0)\nprint(\"new Concatenated train_labels \", train_labels.shape)","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#visualize an image\n\nfig = plt.figure()\nplt.imshow(X_train[6][:,:,0], cmap='gray', interpolation='none')\nplt.xticks([])\nplt.yticks([])","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"scale = np.max(train_images)\ntrain_images /= scale\nX_test /= scale\n\n#visualize scales\n\nprint(\"Max: {}\".format(scale))","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"X_train, X_val, y_train, y_val = train_test_split(train_images, train_labels, test_size=0.10)","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Here we define the input and output layer sizes\ninput_size = X_train.shape\nn_logits = y_train.shape[1]\n\nprint(\"Input: {}\".format(input_size))\nprint(\"Output: {}\".format(n_logits))","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"def build_model(hp):\n \"\"\"Function that build a TF model based on hyperparameters values.\n Args:\n hp (HyperParameter): hyperparameters values\n Returns:\n Model: Compiled model\n \"\"\"\n num_layers = hp.Int('num_layers', min_value=2, max_value=16, step=2)\n \n lr = hp.Choice('learning_rate', [1e-3, 5e-4])\n\n inputs = layers.Input(shape=(28, 28, 1))\n x = inputs\n\n for idx in range(num_layers):\n idx = str(idx)\n\n filters = hp.Int('filters_' + idx, 32, 256, step=32, default=64)\n x = layers.Conv2D(filters=filters, kernel_size=3, padding='same',\n activation='relu')(x)\n\n # add a pooling layers if needed\n if x.shape[1] >= 8:\n pool_type = hp.Choice('pool_' + idx, values=['max', 'avg'])\n if pool_type == 'max':\n x = layers.MaxPooling2D(2)(x)\n elif pool_type == 'avg':\n x = layers.AveragePooling2D(2)(x)\n\n # My dense layer\n \n x = layers.Flatten()(x)\n x = layers.Dense(256, activation='relu')(x)\n x = layers.Dense(256, activation='relu')(x)\n x = layers.Dense(256, activation='relu')(x)\n x = layers.Dropout(0.5)(x)\n outputs = layers.Dense(n_logits, activation='softmax')(x)\n \n # Build model\n model = keras.Model(inputs, outputs)\n model.compile(optimizer=Adam(lr),\n loss='categorical_crossentropy',\n metrics=['accuracy'])\n return model","metadata":{"_uuid":"d629ff2d2480ee46fbb7e2d37f6b5fab8052498a","_cell_guid":"79c7e3d0-c299-4dcb-8224-4455121ee9b0","trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"tuner = RandomSearch(\n build_model,\n objective='val_accuracy',\n max_trials=8,\n executions_per_trial=3,\n directory='my_dir',\n project_name='mnist')\n\ntuner.search_space_summary()","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"tuner.search(X_train, y_train,\n epochs=30,\n validation_data=(X_val, y_val))","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"model = tuner.get_best_models(num_models=1)[0]\nmodel.summary()","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# generate predictions\npredictions_vector = model.predict(X_test, verbose=0)\npredictions = np.argmax(predictions_vector,axis=1)\n\npd.DataFrame({\"ImageId\": list(range(1,len(predictions)+1)), \"Label\": predictions}).to_csv(\"preds.csv\", index=False, header=True)","metadata":{"trusted":true},"execution_count":null,"outputs":[]}]}
11 changes: 7 additions & 4 deletions Agent/workspace/hyperopt/fstp2/code/code.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@

# ### Import Necessary Libraries and Data Sets.

from subprocess import check_output
# from subprocess import check_output

print(check_output(["ls", "../input"]).decode("utf8"))
# print(check_output(["ls", "../input"]).decode("utf8"))

# Import the necessary packages
import numpy as np
Expand Down Expand Up @@ -67,7 +67,7 @@
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score

# FILE_PATH = "../data/"
FILE_PATH = "./workspace/hyperopt/fstp2/data/"
Expand All @@ -88,8 +88,11 @@
# Combine train and test sets
concat_data = pd.concat((train, test), sort=False).reset_index(drop=True)
# Drop the target "Cover_Type" and Id columns
print(concat_data.columns)

concat_data.drop(["Cover_Type"], axis=1, inplace=True)
concat_data.drop(["Id"], axis=1, inplace=True)
# print(concat_data.columns)
# concat_data.drop(["Id"], axis=1, inplace=True)
# print("Total size is :",concat_data.shape)


Expand Down
2 changes: 1 addition & 1 deletion Agent/workspace/hyperopt/higgs-boson2/code/code.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

FILE_PATH = "./workspace/hyperopt/higgs-boson2/data/"
# FILE_PATH ="../data/"
TARGET = "NObeyesdad"
# TARGET = "NObeyesdad"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

better to remove this as it is related to a completely different competition

submission_path = "best_submission.csv"
RANDOM_SEED = 73

Expand Down
6 changes: 3 additions & 3 deletions Agent/workspace/hyperopt/mercedes2/code/code.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@

# FILE_PATH = "../data/"
FILE_PATH = "./workspace/hyperopt/mercedes2/data/"
TARGET = "NObeyesdad"
# TARGET = "NObeyesdad"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

better to remove this as it is related to a completely different competition

submission_path = "ori_submission.csv"
n_splits = 9
RANDOM_SEED = 73

train = pd.read_csv(FILE_PATH + "train.csv")
test = pd.read_csv(FILE_PATH + "test.csv")
train = pd.read_csv(FILE_PATH + "train.csv.zip")
test = pd.read_csv(FILE_PATH + "test.csv.zip")

y_train = train["y"].values
y_mean = np.mean(y_train)
Expand Down
Loading