huawei-noah · ZeyuBa · Aug 29, 2024 · Aug 29, 2024 · Aug 29, 2024 · Sep 3, 2024
diff --git a/Agent/workspace/hyperopt/abalone/code/code.py b/Agent/workspace/hyperopt/abalone/code/code.py
@@ -105,19 +105,15 @@
 
 # List of models to evaluate
 catboost_model = CatBoostRegressor(random_state=1, verbose=False)
-# lgbm_model = LGBMRegressor(verbose=-1, random_state=1)
-# xgb_model = XGBRegressor(verbose=0, random_state=1, enable_categorical=True)
+
 
 # # Fit the models on the training data
 # catboost_model.fit(X_train, y_train)
-# lgbm_model.fit(X_train, y_train)
-# xgb_model.fit(X_train, y_train)
 
 
 # # Evaluate the models
 # catboost_preds = catboost_model.predict(X_val)
-# lgbm_preds = lgbm_model.predict(X_val)
-# xgb_preds = xgb_model.predict(X_val)
+
 
 # final_preds = np.round((catboost_preds + lgbm_preds + xgb_preds) / 3).astype("int")
 

diff --git a/Agent/workspace/hyperopt/abalone2/code/code.py b/Agent/workspace/hyperopt/abalone2/code/code.py
@@ -33,7 +33,7 @@
 
 np.random.seed(RANDOM_SEED)
 random.seed(RANDOM_SEED)
-FILE_PATH = "./workspace/hyperopt/abalone/data/"
+FILE_PATH = "./workspace/hyperopt/abalone2/data/"
 # FILE_PATH="../data/"
 submmision_file = "submission.csv"
 train = pd.read_csv(FILE_PATH + "train.csv")
@@ -317,14 +317,15 @@ def log_transformation(data, columns):
     "xgboost_weight": 0.48550637896530635,
     "catboost_weight": 4.189724537494019,
 }
-
-voting_regressor = VotingRegressor(
-    estimators=cv_estimators,
-    weights=[
+weights_list=[
         weight_best_params["lgbm_weight"],
         weight_best_params["xgboost_weight"],
         weight_best_params["catboost_weight"]
-    ]
+
+]
+voting_regressor = VotingRegressor(
+    estimators=cv_estimators,
+    weights=weights_list
 )
 
 # cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=RANDOM_SEED)

diff --git a/Agent/workspace/hyperopt/bank-churn2/code/code.py b/Agent/workspace/hyperopt/bank-churn2/code/code.py
@@ -110,8 +110,8 @@ def plot_kde_for_all_columns(df):
 # Below are the parameters for xgboost.
 
 xgb_params = {"booster": "gbtree",
-              "lambda": 0.8611971458776956,
-              "alpha": 3.3684132992886347e-07,
+              "reg_lambda": 0.8611971458776956,
+              "reg_alpha": 3.3684132992886347e-07,
               "max_depth": 3,
               "eta": 0.17374299923922656,
               "gamma": 1.2505690952357777e-06,
@@ -144,9 +144,9 @@ def plot_kde_for_all_columns(df):
 
 
 # ## Voting Ensemble
-
+weight_list=[0.2,0.4,0.4]
 voter = VotingClassifier(estimators=[("m1", xgb_model), ("m2", lgbm_model), ("m3", cb_model)], voting="soft",
-                         weights=[0.2, 0.4, 0.4])
+                         weights=weight_list)
 # voter.fit(X,y)
 
 

diff --git a/Agent/workspace/hyperopt/digit-recognizer/code/code.py b/Agent/workspace/hyperopt/digit-recognizer/code/code.py
@@ -0,0 +1,219 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+# get_ipython().system('pip install keras-tuner')
+
+
+import numpy as np # linear algebra
+import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
+
+import tensorflow as tf
+from tensorflow import keras
+from tensorflow.keras.utils import to_categorical
+from tensorflow.keras.optimizers import Adam
+from tensorflow.keras import layers
+from tensorflow.keras.datasets import mnist
+
+# from kerastuner import RandomSearch
+
+import matplotlib.pyplot as plt
+from sklearn.model_selection import train_test_split
+# from keras.callbacks import ReduceLROnPlateau
+# from keras.optimizers import RMSprop
+# Input data files are available in the "../input/" directory.
+# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory
+from keras.datasets import mnist
+FILE_PATH = "./workspace/hyperopt/digit-recognizer/data/"
+
+# import os
+# for dirname, _, filenames in os.walk('/kaggle/input'):
+#     for filename in filenames:
+#         print(os.path.join(dirname, filename))
+
+# Any results you write to the current directory are saved as output.
+
+
+# Load the data
+train = pd.read_csv(FILE_PATH+'train.csv')
+labels = train.iloc[:,0].values.astype('int32')
+
+X_train = (train.iloc[:,1:].values).astype('float32')
+X_test = (pd.read_csv(FILE_PATH+'test.csv').values).astype('float32')
+
+#reshape into images
+X_train = X_train.reshape(-1,28,28,1)
+X_test = X_test.reshape(-1,28,28,1)
+
+# one hot encoding
+y_train = tf.keras.utils.to_categorical(labels) 
+
+# print("Check data")
+# print(labels)
+# print(X_train[0].shape)
+# print(y_train)
+
+
+# Load Data from Keras MNIST
+(train_imagesRaw, train_labelsRaw), (test_imagesRaw, test_labelsRaw) = mnist.load_data()
+
+
+#reshape into images
+X_train_keras = train_imagesRaw.reshape(-1,28,28,1)
+X_test_keras = test_imagesRaw.reshape(-1,28,28,1)
+
+# print("X_train_keras",X_train_keras.shape)
+# print("X_test_keras",X_test_keras.shape)
+
+train_labels_keras = tf.keras.utils.to_categorical(train_labelsRaw)
+test_labels_keras = tf.keras.utils.to_categorical(test_labelsRaw)
+# print("train_labels_keras ",train_labels_keras.shape)
+# print("test_labels_keras ", test_labels_keras.shape)
+
+
+# merge datasets
+
+train_images = np.concatenate((X_train_keras,X_train,X_test_keras), axis=0)
+# print("new Concatenated train_images ", train_images.shape)
+# print("_"*50)
+
+train_labels = np.concatenate((train_labels_keras,y_train,test_labels_keras), axis=0)
+# print("new Concatenated train_labels ", train_labels.shape)
+
+
+#visualize an image
+
+# fig = plt.figure()
+# plt.imshow(X_train[6][:,:,0], cmap='gray', interpolation='none')
+# plt.xticks([])
+# plt.yticks([])
+
+
+scale = np.max(train_images)
+train_images /= scale
+X_test /= scale
+
+#visualize scales
+
+# print("Max: {}".format(scale))
+
+
+# X_train, X_val, y_train, y_val = train_test_split(train_images, train_labels, test_size=0.10)
+
+
+# # Here we define the input and output layer sizes
+input_size = X_train.shape
+n_logits = y_train.shape[1]
+
+# print("Input: {}".format(input_size))
+# print("Output: {}".format(n_logits))
+
+num_layers = 8 #hp.Int('num_layers', min_value=2, max_value=16, step=2)
+
+lr = 1e-4 #hp.Choice('learning_rate', [1e-3, 5e-4])
+filters = 128 #hp.Int('filters_' + idx, 32, 256, step=32, default=64)
+pool_type = 'max' #hp.Choice('pool_' + idx, values=['max', 'avg'])
+
+inputs = layers.Input(shape=(28, 28, 1))
+x = inputs
+for idx in range(num_layers):
+    idx = str(idx)
+    x = layers.Conv2D(filters=filters, kernel_size=3, padding='same',
+                        activation='relu')(x)
+
+    # add a pooling layers if needed
+    if x.shape[1] >= 8:
+        if pool_type == 'max':
+            x = layers.MaxPooling2D(2)(x)
+        elif pool_type == 'avg':
+            x = layers.AveragePooling2D(2)(x)
+
+# My dense layer
+
+x = layers.Flatten()(x)
+x = layers.Dense(256, activation='relu')(x)
+x = layers.Dense(256, activation='relu')(x)
+x = layers.Dense(256, activation='relu')(x)
+x = layers.Dropout(0.5)(x)
+outputs = layers.Dense(n_logits, activation='softmax')(x)
+
+# Build model
+model = keras.Model(inputs, outputs)
+model.compile(optimizer=Adam(lr),
+                loss='categorical_crossentropy',
+                metrics=['accuracy'])
+
+
+
+
+# def build_model(hp):
+#     """Function that build a TF model based on hyperparameters values.
+#     Args:
+#         hp (HyperParameter): hyperparameters values
+#     Returns:
+#         Model: Compiled model
+#     """
+#     num_layers = hp.Int('num_layers', min_value=2, max_value=16, step=2)
+
+#     lr = hp.Choice('learning_rate', [1e-3, 5e-4])
+
+#     inputs = layers.Input(shape=(28, 28, 1))
+#     x = inputs
+
+#     for idx in range(num_layers):
+#         idx = str(idx)
+
+#         filters = hp.Int('filters_' + idx, 32, 256, step=32, default=64)
+#         x = layers.Conv2D(filters=filters, kernel_size=3, padding='same',
+#                           activation='relu')(x)
+
+#         # add a pooling layers if needed
+#         if x.shape[1] >= 8:
+#             pool_type = hp.Choice('pool_' + idx, values=['max', 'avg'])
+#             if pool_type == 'max':
+#                 x = layers.MaxPooling2D(2)(x)
+#             elif pool_type == 'avg':
+#                 x = layers.AveragePooling2D(2)(x)
+
+#     # My dense layer
+
+#     x = layers.Flatten()(x)
+#     x = layers.Dense(256, activation='relu')(x)
+#     x = layers.Dense(256, activation='relu')(x)
+#     x = layers.Dense(256, activation='relu')(x)
+#     x = layers.Dropout(0.5)(x)
+#     outputs = layers.Dense(n_logits, activation='softmax')(x)
+
+#     # Build model
+#     model = keras.Model(inputs, outputs)
+#     model.compile(optimizer=Adam(lr),
+#                   loss='categorical_crossentropy',
+#                   metrics=['accuracy'])
+#     return model
+
+
+# tuner = RandomSearch(
+#     build_model,
+#     objective='val_accuracy',
+#     max_trials=8,
+#     executions_per_trial=3,
+#     directory='my_dir',
+#     project_name='mnist')
+
+# tuner.search_space_summary()
+
+
+# tuner.search(X_train, y_train,
+#              epochs=30,
+#              validation_data=(X_val, y_val))
+
+
+# model = tuner.get_best_models(num_models=1)[0]
+# model.summary()
+
+
+# # generate predictions
+# predictions_vector = model.predict(X_test, verbose=0)
+# predictions = np.argmax(predictions_vector,axis=1)
+
+# pd.DataFrame({"ImageId": list(range(1,len(predictions)+1)), "Label": predictions}).to_csv("preds.csv", index=False, header=True)
+
diff --git a/Agent/workspace/hyperopt/digit-recognizer/code/keras-auto-hypertuning-a-cnn.ipynb b/Agent/workspace/hyperopt/digit-recognizer/code/keras-auto-hypertuning-a-cnn.ipynb
@@ -0,0 +1 @@
+{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"pygments_lexer":"ipython3","nbconvert_exporter":"python","version":"3.6.4","file_extension":".py","codemirror_mode":{"name":"ipython","version":3},"name":"python","mimetype":"text/x-python"},"kaggle":{"accelerator":"gpu","dataSources":[{"sourceId":3004,"databundleVersionId":861823,"sourceType":"competition"}],"dockerImageVersionId":29841,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":true}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"!pip install keras-tuner","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"import numpy as np # linear algebra\nimport pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n\nimport tensorflow as tf\nfrom tensorflow import keras\nfrom tensorflow.keras.utils import to_categorical\nfrom tensorflow.keras.optimizers import Adam\nfrom tensorflow.keras import layers\nfrom tensorflow.keras.datasets import mnist\n\nfrom kerastuner import RandomSearch\n\nimport matplotlib.pyplot as plt\nfrom sklearn.model_selection import train_test_split\nfrom keras.callbacks import ReduceLROnPlateau\nfrom keras.optimizers import RMSprop\n# Input data files are available in the \"../input/\" directory.\n# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory\nfrom keras.datasets import mnist\n\nimport os\nfor dirname, _, filenames in os.walk('/kaggle/input'):\n    for filename in filenames:\n        print(os.path.join(dirname, filename))\n\n# Any results you write to the current directory are saved as output.","metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Load the data\ntrain = pd.read_csv('../input/digit-recognizer/train.csv')\nlabels = train.iloc[:,0].values.astype('int32')\n\nX_train = (train.iloc[:,1:].values).astype('float32')\nX_test = (pd.read_csv('../input/digit-recognizer/test.csv').values).astype('float32')\n\n#reshape into images\nX_train = X_train.reshape(-1,28,28,1)\nX_test = X_test.reshape(-1,28,28,1)\n\n# one hot encoding\ny_train = tf.keras.utils.to_categorical(labels) \n\nprint(\"Check data\")\nprint(labels)\nprint(X_train[0].shape)\nprint(y_train)","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Load Data from Keras MNIST\n(train_imagesRaw, train_labelsRaw), (test_imagesRaw, test_labelsRaw) = mnist.load_data()","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#reshape into images\nX_train_keras = train_imagesRaw.reshape(-1,28,28,1)\nX_test_keras = test_imagesRaw.reshape(-1,28,28,1)\n\nprint(\"X_train_keras\",X_train_keras.shape)\nprint(\"X_test_keras\",X_test_keras.shape)\n\ntrain_labels_keras = tf.keras.utils.to_categorical(train_labelsRaw)\ntest_labels_keras = tf.keras.utils.to_categorical(test_labelsRaw)\nprint(\"train_labels_keras \",train_labels_keras.shape)\nprint(\"test_labels_keras \", test_labels_keras.shape)","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# merge datasets\n\ntrain_images = np.concatenate((X_train_keras,X_train,X_test_keras), axis=0)\nprint(\"new Concatenated train_images \", train_images.shape)\nprint(\"_\"*50)\n\ntrain_labels = np.concatenate((train_labels_keras,y_train,test_labels_keras), axis=0)\nprint(\"new Concatenated train_labels \", train_labels.shape)","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#visualize an image\n\nfig = plt.figure()\nplt.imshow(X_train[6][:,:,0], cmap='gray', interpolation='none')\nplt.xticks([])\nplt.yticks([])","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"scale = np.max(train_images)\ntrain_images /= scale\nX_test /= scale\n\n#visualize scales\n\nprint(\"Max: {}\".format(scale))","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"X_train, X_val, y_train, y_val = train_test_split(train_images, train_labels, test_size=0.10)","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Here we define the input and output layer sizes\ninput_size = X_train.shape\nn_logits = y_train.shape[1]\n\nprint(\"Input: {}\".format(input_size))\nprint(\"Output: {}\".format(n_logits))","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"def build_model(hp):\n    \"\"\"Function that build a TF model based on hyperparameters values.\n    Args:\n        hp (HyperParameter): hyperparameters values\n    Returns:\n        Model: Compiled model\n    \"\"\"\n    num_layers = hp.Int('num_layers', min_value=2, max_value=16, step=2)\n    \n    lr = hp.Choice('learning_rate', [1e-3, 5e-4])\n\n    inputs = layers.Input(shape=(28, 28, 1))\n    x = inputs\n\n    for idx in range(num_layers):\n        idx = str(idx)\n\n        filters = hp.Int('filters_' + idx, 32, 256, step=32, default=64)\n        x = layers.Conv2D(filters=filters, kernel_size=3, padding='same',\n                          activation='relu')(x)\n\n        # add a pooling layers if needed\n        if x.shape[1] >= 8:\n            pool_type = hp.Choice('pool_' + idx, values=['max', 'avg'])\n            if pool_type == 'max':\n                x = layers.MaxPooling2D(2)(x)\n            elif pool_type == 'avg':\n                x = layers.AveragePooling2D(2)(x)\n\n    # My dense layer\n    \n    x = layers.Flatten()(x)\n    x = layers.Dense(256, activation='relu')(x)\n    x = layers.Dense(256, activation='relu')(x)\n    x = layers.Dense(256, activation='relu')(x)\n    x = layers.Dropout(0.5)(x)\n    outputs = layers.Dense(n_logits, activation='softmax')(x)\n              \n    # Build model\n    model = keras.Model(inputs, outputs)\n    model.compile(optimizer=Adam(lr),\n                  loss='categorical_crossentropy',\n                  metrics=['accuracy'])\n    return model","metadata":{"_uuid":"d629ff2d2480ee46fbb7e2d37f6b5fab8052498a","_cell_guid":"79c7e3d0-c299-4dcb-8224-4455121ee9b0","trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"tuner = RandomSearch(\n    build_model,\n    objective='val_accuracy',\n    max_trials=8,\n    executions_per_trial=3,\n    directory='my_dir',\n    project_name='mnist')\n\ntuner.search_space_summary()","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"tuner.search(X_train, y_train,\n             epochs=30,\n             validation_data=(X_val, y_val))","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"model = tuner.get_best_models(num_models=1)[0]\nmodel.summary()","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# generate predictions\npredictions_vector = model.predict(X_test, verbose=0)\npredictions = np.argmax(predictions_vector,axis=1)\n\npd.DataFrame({\"ImageId\": list(range(1,len(predictions)+1)), \"Label\": predictions}).to_csv(\"preds.csv\", index=False, header=True)","metadata":{"trusted":true},"execution_count":null,"outputs":[]}]}
diff --git a/Agent/workspace/hyperopt/fstp2/code/code.py b/Agent/workspace/hyperopt/fstp2/code/code.py
@@ -28,9 +28,9 @@
 
 # ### Import Necessary Libraries and Data Sets.
 
-from subprocess import check_output
+# from subprocess import check_output
 
-print(check_output(["ls", "../input"]).decode("utf8"))
+# print(check_output(["ls", "../input"]).decode("utf8"))
 
 # Import the necessary packages
 import numpy as np
@@ -67,7 +67,7 @@
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.model_selection import StratifiedKFold
 from sklearn.ensemble import VotingClassifier
-from sklearn.metrics import mean_squared_error
+from sklearn.metrics import accuracy_score
 
 # FILE_PATH = "../data/"
 FILE_PATH = "./workspace/hyperopt/fstp2/data/"
@@ -88,8 +88,11 @@
 # Combine train and test sets
 concat_data = pd.concat((train, test), sort=False).reset_index(drop=True)
 # Drop the target "Cover_Type" and Id columns
+print(concat_data.columns)
+
 concat_data.drop(["Cover_Type"], axis=1, inplace=True)
-concat_data.drop(["Id"], axis=1, inplace=True)
+# print(concat_data.columns)
+# concat_data.drop(["Id"], axis=1, inplace=True)
 # print("Total size is :",concat_data.shape)
 
 

diff --git a/Agent/workspace/hyperopt/higgs-boson2/code/code.py b/Agent/workspace/hyperopt/higgs-boson2/code/code.py
@@ -26,7 +26,7 @@
 
 FILE_PATH = "./workspace/hyperopt/higgs-boson2/data/"
 # FILE_PATH ="../data/"
-TARGET = "NObeyesdad"
+# TARGET = "NObeyesdad"
 submission_path = "best_submission.csv"
 RANDOM_SEED = 73
 

diff --git a/Agent/workspace/hyperopt/mercedes2/code/code.py b/Agent/workspace/hyperopt/mercedes2/code/code.py
@@ -14,13 +14,13 @@
 
 # FILE_PATH = "../data/"
 FILE_PATH = "./workspace/hyperopt/mercedes2/data/"
-TARGET = "NObeyesdad"
+# TARGET = "NObeyesdad"
 submission_path = "ori_submission.csv"
 n_splits = 9
 RANDOM_SEED = 73
 
-train = pd.read_csv(FILE_PATH + "train.csv")
-test = pd.read_csv(FILE_PATH + "test.csv")
+train = pd.read_csv(FILE_PATH + "train.csv.zip")
+test = pd.read_csv(FILE_PATH + "test.csv.zip")
 
 y_train = train["y"].values
 y_mean = np.mean(y_train)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"pygments_lexer":"ipython3","nbconvert_exporter":"python","version":"3.6.4","file_extension":".py","codemirror_mode":{"name":"ipython","version":3},"name":"python","mimetype":"text/x-python"},"kaggle":{"accelerator":"gpu","dataSources":[{"sourceId":3004,"databundleVersionId":861823,"sourceType":"competition"}],"dockerImageVersionId":29841,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":true}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"!pip install keras-tuner","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"import numpy as np # linear algebra\nimport pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n\nimport tensorflow as tf\nfrom tensorflow import keras\nfrom tensorflow.keras.utils import to_categorical\nfrom tensorflow.keras.optimizers import Adam\nfrom tensorflow.keras import layers\nfrom tensorflow.keras.datasets import mnist\n\nfrom kerastuner import RandomSearch\n\nimport matplotlib.pyplot as plt\nfrom sklearn.model_selection import train_test_split\nfrom keras.callbacks import ReduceLROnPlateau\nfrom keras.optimizers import RMSprop\n# Input data files are available in the \"../input/\" directory.\n# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory\nfrom keras.datasets import mnist\n\nimport os\nfor dirname, _, filenames in os.walk('/kaggle/input'):\n for filename in filenames:\n print(os.path.join(dirname, filename))\n\n# Any results you write to the current directory are saved as output.","metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Load the data\ntrain = pd.read_csv('../input/digit-recognizer/train.csv')\nlabels = train.iloc[:,0].values.astype('int32')\n\nX_train = (train.iloc[:,1:].values).astype('float32')\nX_test = (pd.read_csv('../input/digit-recognizer/test.csv').values).astype('float32')\n\n#reshape into images\nX_train = X_train.reshape(-1,28,28,1)\nX_test = X_test.reshape(-1,28,28,1)\n\n# one hot encoding\ny_train = tf.keras.utils.to_categorical(labels) \n\nprint(\"Check data\")\nprint(labels)\nprint(X_train[0].shape)\nprint(y_train)","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Load Data from Keras MNIST\n(train_imagesRaw, train_labelsRaw), (test_imagesRaw, test_labelsRaw) = mnist.load_data()","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#reshape into images\nX_train_keras = train_imagesRaw.reshape(-1,28,28,1)\nX_test_keras = test_imagesRaw.reshape(-1,28,28,1)\n\nprint(\"X_train_keras\",X_train_keras.shape)\nprint(\"X_test_keras\",X_test_keras.shape)\n\ntrain_labels_keras = tf.keras.utils.to_categorical(train_labelsRaw)\ntest_labels_keras = tf.keras.utils.to_categorical(test_labelsRaw)\nprint(\"train_labels_keras \",train_labels_keras.shape)\nprint(\"test_labels_keras \", test_labels_keras.shape)","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# merge datasets\n\ntrain_images = np.concatenate((X_train_keras,X_train,X_test_keras), axis=0)\nprint(\"new Concatenated train_images \", train_images.shape)\nprint(\"_\"*50)\n\ntrain_labels = np.concatenate((train_labels_keras,y_train,test_labels_keras), axis=0)\nprint(\"new Concatenated train_labels \", train_labels.shape)","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#visualize an image\n\nfig = plt.figure()\nplt.imshow(X_train[6][:,:,0], cmap='gray', interpolation='none')\nplt.xticks([])\nplt.yticks([])","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"scale = np.max(train_images)\ntrain_images /= scale\nX_test /= scale\n\n#visualize scales\n\nprint(\"Max: {}\".format(scale))","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"X_train, X_val, y_train, y_val = train_test_split(train_images, train_labels, test_size=0.10)","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# Here we define the input and output layer sizes\ninput_size = X_train.shape\nn_logits = y_train.shape[1]\n\nprint(\"Input: {}\".format(input_size))\nprint(\"Output: {}\".format(n_logits))","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"def build_model(hp):\n \"\"\"Function that build a TF model based on hyperparameters values.\n Args:\n hp (HyperParameter): hyperparameters values\n Returns:\n Model: Compiled model\n \"\"\"\n num_layers = hp.Int('num_layers', min_value=2, max_value=16, step=2)\n \n lr = hp.Choice('learning_rate', [1e-3, 5e-4])\n\n inputs = layers.Input(shape=(28, 28, 1))\n x = inputs\n\n for idx in range(num_layers):\n idx = str(idx)\n\n filters = hp.Int('filters_' + idx, 32, 256, step=32, default=64)\n x = layers.Conv2D(filters=filters, kernel_size=3, padding='same',\n activation='relu')(x)\n\n # add a pooling layers if needed\n if x.shape[1] >= 8:\n pool_type = hp.Choice('pool_' + idx, values=['max', 'avg'])\n if pool_type == 'max':\n x = layers.MaxPooling2D(2)(x)\n elif pool_type == 'avg':\n x = layers.AveragePooling2D(2)(x)\n\n # My dense layer\n \n x = layers.Flatten()(x)\n x = layers.Dense(256, activation='relu')(x)\n x = layers.Dense(256, activation='relu')(x)\n x = layers.Dense(256, activation='relu')(x)\n x = layers.Dropout(0.5)(x)\n outputs = layers.Dense(n_logits, activation='softmax')(x)\n \n # Build model\n model = keras.Model(inputs, outputs)\n model.compile(optimizer=Adam(lr),\n loss='categorical_crossentropy',\n metrics=['accuracy'])\n return model","metadata":{"_uuid":"d629ff2d2480ee46fbb7e2d37f6b5fab8052498a","_cell_guid":"79c7e3d0-c299-4dcb-8224-4455121ee9b0","trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"tuner = RandomSearch(\n build_model,\n objective='val_accuracy',\n max_trials=8,\n executions_per_trial=3,\n directory='my_dir',\n project_name='mnist')\n\ntuner.search_space_summary()","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"tuner.search(X_train, y_train,\n epochs=30,\n validation_data=(X_val, y_val))","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"model = tuner.get_best_models(num_models=1)[0]\nmodel.summary()","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"# generate predictions\npredictions_vector = model.predict(X_test, verbose=0)\npredictions = np.argmax(predictions_vector,axis=1)\n\npd.DataFrame({\"ImageId\": list(range(1,len(predictions)+1)), \"Label\": predictions}).to_csv(\"preds.csv\", index=False, header=True)","metadata":{"trusted":true},"execution_count":null,"outputs":[]}]}