remove smart_cond

carusyte · carusyte · commit 70d8ebee4c83 · 2021-02-10T22:45:11.000+08:00
diff --git a/common/common.py b/common/common.py
@@ -131,6 +131,9 @@ def parseArgs():
     return parser.parse_args()
 
 
+def next_power_of_2(x):
+    return 1 if x == 0 else 2**(x - 1).bit_length()
+
 def setupPath():
     p1 = os.path.dirname(os.path.abspath(__file__))
     p2 = os.path.dirname(p1)
diff --git a/corl/model/tf2/common.py b/corl/model/tf2/common.py
@@ -64,7 +64,7 @@ def dropped_inputs(inputs=inputs, rate=self.rate, seed=self.seed):  # pylint: di
         x = inputs * kept_idx + alpha_p * (1 - kept_idx)
         # Do affine transformation
         return a * x + b
-    return tf_utils.smart_cond(
+    return tf.cond(
         tf.math.logical_and(
             tf.math.greater(self.rate, 0.),
             tf.math.less(self.rate, 1.)
@@ -168,15 +168,15 @@ def call(self, inputs, training=None):
 
         def dropout():
             self.global_step.assign_add(1)
-            rate = tf_utils.smart_cond(
+            rate = tf.cond(
                 tf.math.less(self.global_step, self._decay_start),
                 lambda: self.initial_dropout_rate,
                 lambda: self.cosine_decay_restarts(self.global_step-self._decay_start+1)
             )
             self.dropout_layer.rate = rate
             return self.dropout_layer(inputs, training)
 
-        output = tf_utils.smart_cond(
+        output = tf.cond(
             training,
             dropout,
             lambda: tf.identity(inputs)
diff --git a/model/common.py b/model/common.py
@@ -64,7 +64,7 @@ def dropped_inputs(inputs=inputs, rate=self.rate, seed=self.seed):  # pylint: di
         x = inputs * kept_idx + alpha_p * (1 - kept_idx)
         # Do affine transformation
         return a * x + b
-    return tf_utils.smart_cond(
+    return tf.cond(
         tf.math.logical_and(
             tf.math.greater(self.rate, 0.),
             tf.math.less(self.rate, 1.)
@@ -168,15 +168,15 @@ def call(self, inputs, training=None):
 
         def dropout():
             self.global_step.assign_add(1)
-            rate = tf_utils.smart_cond(
+            rate = tf.cond(
                 tf.math.less(self.global_step, self._decay_start),
                 lambda: self.initial_dropout_rate,
                 lambda: self.cosine_decay_restarts(self.global_step-self._decay_start+1)
             )
             self.dropout_layer.rate = rate
             return self.dropout_layer(inputs, training)
 
-        output = tf_utils.smart_cond(
+        output = tf.cond(
             training,
             dropout,
             lambda: tf.identity(inputs)
diff --git a/pstk/data/data15.py b/pstk/data/data15.py
@@ -0,0 +1,213 @@
+
+
+import os
+import sys
+import psutil
+import ray
+
+import tensorflow as tf
+
+from time import strftime
+from mysql.connector.pooling import MySQLConnectionPool
+
+k_cols = ["lr"]
+
+idxlst = None
+feat_cols = []
+parallel = None
+time_shift = None
+max_step = None
+_prefetch = None
+db_pool_size = None
+db_host = None
+db_port = None
+db_pwd = None
+shared_args = None
+check_input = False
+
+cnxpool = None
+
+maxbno_query = ("SELECT  "
+                "    vmax "
+                "FROM "
+                "    fs_stats "
+                "WHERE "
+                "    method = 'standardization' "
+                "    AND tab = 'wcc_trn' "
+                "    AND fields = %s ")
+
+
+def _getIndex():
+    '''
+    Returns a set of index codes from idxlst table.
+    '''
+    global idxlst
+    if idxlst is not None:
+        return idxlst
+    print("{} loading index...".format(strftime("%H:%M:%S")))
+    cnx = cnxpool.get_connection()
+    try:
+        cursor = cnx.cursor(buffered=True)
+        query = ('SELECT distinct code COLLATE utf8mb4_0900_as_cs FROM idxlst')
+        cursor.execute(query)
+        rows = cursor.fetchall()
+        cursor.close()
+        idxlst = {c[0] for c in rows}
+        return idxlst
+    except:
+        print(sys.exc_info()[0])
+        raise
+    finally:
+        cnx.close()
+
+def _init(db_pool_size=None, db_host=None, db_port=None, db_pwd=None):
+    global cnxpool
+    print("{} [PID={}]: initializing mysql connection pool...".format(
+        strftime("%H:%M:%S"), os.getpid()))
+    cnxpool = MySQLConnectionPool(
+        pool_name="dbpool",
+        pool_size=db_pool_size or 5,
+        host=db_host or '127.0.0.1',
+        port=db_port or 3306,
+        user='mysql',
+        database='secu',
+        password=db_pwd or '123456',
+        # ssl_ca='',
+        # use_pure=True,
+        connect_timeout=90000)
+    ray.init(
+        num_cpus=psutil.cpu_count(logical=False),
+        webui_host='127.0.0.1',
+        memory=4 * 1024 * 1024 * 1024,  # 4G
+        object_store_memory=4 * 1024 * 1024 * 1024,  # 4G
+        driver_object_store_memory=256 * 1024 * 1024    # 256M
+    )
+
+
+def _getDataSetMeta(flag):
+    global cnxpool
+    cnx = cnxpool.get_connection()
+    max_bno, batch_size = None, None
+    try:
+        print('{} querying max batch no for {} set...'.format(
+            strftime("%H:%M:%S"), flag))
+        cursor = cnx.cursor()
+        cursor.execute(maxbno_query, (flag + "_BNO", ))
+        row = cursor.fetchone()
+        max_bno = int(row[0])
+        print('{} max batch no: {}'.format(strftime("%H:%M:%S"), max_bno))
+        query = ("SELECT  "
+                 "    COUNT(*) "
+                 "FROM "
+                 "    wcc_trn "
+                 "WHERE "
+                 "    flag = %s "
+                 "    AND bno = 1 ")
+        cursor.execute(query, (flag, ))
+        row = cursor.fetchone()
+        batch_size = row[0]
+        print('{} batch size: {}'.format(strftime("%H:%M:%S"), batch_size))
+        if batch_size == 0:
+            print('{} no more data for {}.'.format(strftime("%H:%M:%S"),
+                                                   flag.lower()))
+            return None, None
+        cursor.close()
+    except:
+        print(sys.exc_info()[0])
+        raise
+    finally:
+        cnx.close()
+    return max_bno, batch_size
+
+def getInputs(start_bno=0,
+                 shift=0,
+                 cols=None,
+                 step=30,
+                 cores=psutil.cpu_count(logical=False),
+                 pfetch=2,
+                 pool=None,
+                 host=None,
+                 port=None,
+                 pwd=None,
+                 vset=None,
+                 check=False):
+    """Input function for the stock trend prediction dataset.
+
+    Returns:
+        A dictionary of the following:
+        'train': dataset for training
+        'test': dataset for test/validation
+        'train_batches': total batch of train set
+        'test_batches': total batch of test set
+        'train_batch_size': size of a single train set batch
+        'test_batch_size': size of a single test set batch
+    """
+    # Create dataset for training
+    global feat_cols, max_step, time_shift
+    global parallel, _prefetch, db_pool_size
+    global db_host, db_port, db_pwd, shared_args, check_input
+    time_shift = shift
+    feat_cols = cols or k_cols
+    max_step = step
+    feat_size = len(feat_cols) * 2 * (time_shift + 1)
+    parallel = cores
+    _prefetch = pfetch
+    db_pool_size = pool
+    db_host = host
+    db_port = port
+    db_pwd = pwd
+    check_input = check
+    print("{} Using parallel: {}, prefetch: {} db_host: {} port: {}".format(
+        strftime("%H:%M:%S"), parallel, _prefetch, db_host, db_port))
+    _init(db_pool_size, db_host, db_port, db_pwd)
+    qk, qd, qd_idx, qk2 = _getFtQuery()
+    shared_args = ray.put({
+        'max_step': max_step,
+        'time_shift': time_shift,
+        'qk': qk,
+        'qk2': qk2,
+        'qd': qd,
+        'qd_idx': qd_idx,
+        'index_list': _getIndex(),
+        'db_host': db_host,
+        'db_port': db_port,
+        'db_pwd': db_pwd
+    })
+    # query max flag from wcc_trn and fill a slice with flags between start and max
+    train_batches, train_batch_size = _getDataSetMeta("TR")
+    if train_batches is None:
+        return None
+    bnums = [bno for bno in range(start_bno, train_batches + 1)]
+
+    def mapfunc(bno):
+        ret = tf.numpy_function(func=_loadTrainingData_v2,
+                                inp=[bno],
+                                Tout=[tf.float32, tf.float32])
+        feat, corl = ret
+        feat.set_shape((None, max_step, feat_size))
+        corl.set_shape((None, 1))
+        return feat, corl
+
+    ds_train = tf.data.Dataset.from_tensor_slices(bnums).map(
+        lambda bno: tuple(mapfunc(bno)),
+        # num_parallel_calls=tf.data.experimental.AUTOTUNE
+        num_parallel_calls=parallel
+    ).prefetch(
+        # tf.data.experimental.AUTOTUNE
+        _prefetch
+    )
+
+    # Create dataset for testing
+    test_batches, test_batch_size = _getDataSetMeta("TS")
+    ds_test = tf.data.Dataset.from_tensor_slices(
+        _loadTestSet_v2(step, test_batches + 1,
+                        vset)).batch(test_batch_size).cache().repeat()
+
+    return {
+        'train': ds_train,
+        'test': ds_test,
+        'train_batches': train_batches,
+        'test_batches': test_batches,
+        'train_batch_size': train_batch_size,
+        'test_batch_size': test_batch_size
+    }
diff --git a/test/test24_mdnc.py b/test/test24_mdnc.py
@@ -1,8 +1,7 @@
 
-from corl.wc_test.test_runner import run
-from corl.model.tf2 import dnc_regressor
-from corl.wc_test.common import next_power_of_2
-from time import strftime
+from common.train_runner import run
+from model import dnc_regressor
+from common.common import next_power_of_2
 # Path hack.
 import sys
 import os
@@ -38,6 +37,8 @@
 MEMORY_SIZE = 32
 NUM_READ_HEADS = 8
 
+NUM_CLASSES = 5
+
 VAL_SAVE_FREQ = 500
 STEPS_PER_EPOCH = 500
 
@@ -48,7 +49,7 @@
 
 
 def create_regressor():
-    regressor = dnc_regressor.DNC_Model_V8(
+    regressor = dnc_regressor.BaseModel(
         num_cnn_layers=NUM_CNN_LAYERS,
         num_dnc_layers=NUM_DNC_LAYERS,
         num_fcn_layers=NUM_FCN_LAYERS,
@@ -70,6 +71,7 @@ def create_regressor():
         decayed_lr_start=DECAYED_LR_START,
         lr_decay_steps=LR_DECAY_STEPS,
         clipvalue=CLIP_VALUE,
+        num_classes=NUM_CLASSES,
         seed=SEED,
     )
     return regressor
@@ -78,15 +80,15 @@ def create_regressor():
 if __name__ == '__main__':
 
     np.random.seed(SEED)
+
     regressor = create_regressor()
 
-    run(
-        id="test27_mdnc",
+    run(id="stock_trend_test24_mdnc",
         regressor=regressor,
+        vset=None,
         max_step=MAX_STEP,
         time_shift=TIME_SHIFT,
         feat_cols=FEAT_COLS,
         val_save_freq=VAL_SAVE_FREQ,
         steps_per_epoch=STEPS_PER_EPOCH,
-        include_seqlens=INCLUDE_SEQLENS,
-    )
+        data_pipeline=None)