cleanup todo list and query workload from tag table

carusyte · carusyte · commit c007319d42ef · 2021-04-12T21:10:33.000+08:00
diff --git a/corl/model/tf2/dnc_regressor.py b/corl/model/tf2/dnc_regressor.py
@@ -301,8 +301,6 @@ def getModel(self):
             # name='features',
             dtype=tf.float32)
 
-        #TODO add CNN before RNN?
-
         # create sequence of DNC layers
         layer = inputs
         for i in range(self._num_dnc_layers):
@@ -319,11 +317,8 @@ def getModel(self):
                 return_sequences=True if i+1 < self._num_dnc_layers else False,
                 name='rnn_{}'.format(i),
             )
-            # TODO use separate dnc cell for forward & backward pass?
             layer = keras.layers.Bidirectional(layer=rnn, name='bidir_{}'.format(i))(layer)
         
-        # TODO add batch normalization layer before FCN?
-
         if self._dropout_rate > 0:
             layer = keras.layers.AlphaDropout(self._dropout_rate)(layer)
 
diff --git a/corl/model/tf2/lstm.py b/corl/model/tf2/lstm.py
@@ -314,7 +314,6 @@ def getModel(self):
         return self.model
 
     def compile(self):
-        # TODO study how to use ReduceLROnPlateau and CosineDecayRestarts on adam optimizer
         # decay = tf.keras.experimental.CosineDecayRestarts(self._lr,
         #                                                   self._lr_decay_steps,
         #                                                   t_mul=1.02,
diff --git a/corl/wc_data/input_fn.py b/corl/wc_data/input_fn.py
@@ -60,6 +60,8 @@
 
 def _init_db(db_pool_size=None, db_host=None, db_port=None, db_pwd=None):
     global cnxpool
+    if cnxpool is not None:
+        return
     print("{} [PID={}]: initializing mysql connection pool...".format(
         strftime("%H:%M:%S"), os.getpid()))
     cnxpool = MySQLConnectionPool(
@@ -624,13 +626,79 @@ def mapfunc(bno):
     }
 
 
+def getWorkloadForPredictionFromTags(actor_pool, start_anchor, stop_anchor, corl_prior, max_step, time_shift, host, port, pwd):
+    ##TODO realize me. query workload from tag table
+    '''
+    Returns list of tuples (code, date, klid)
+    '''
+    global cnxpool
+    _init_db(1, host, port, pwd)
+    qry = (
+        "SELECT "
+        "    partition_name "
+        "FROM "
+        "    information_schema.partitions "
+        "WHERE "
+        "    table_schema = 'secu' "
+        "        AND table_name = 'kline_d_b_lr' "
+    )
+    cond = ''
+    if start_anchor is not None:
+        c1, k1 = start_anchor
+        cond += '''
+            and (
+                t.code > '{}'
+                or (t.code = '{}' and t.klid >= {})
+            )
+        '''.format(c1, c1, k1)
+    if stop_anchor is not None:
+        c2, k2 = stop_anchor
+        cond += '''
+            and (
+                t.code < '{}'
+                or (t.code = '{}' and t.klid < {})
+            )
+        '''.format(c2, c2, k2)
+    cnx = cnxpool.get_connection()
+    cursor = None
+    try:
+        print('{} querying partitions for kline_d_b_lr'.format(strftime("%H:%M:%S")))
+        cursor = cnx.cursor()
+        cursor.execute(qry)
+        rows = cursor.fetchall()
+        total = cursor.rowcount
+        print('{} #partitions: {}'.format(strftime("%H:%M:%S"), total))
+    except:
+        print(sys.exc_info()[0])
+        raise
+    finally:
+        if cursor is not None:
+            cursor.close()
+        cnx.close()
+
+    tasks = actor_pool.map(
+        lambda a, part: a.get_wcc_infer_work_request.remote(part, cond),
+        rows
+    )
+
+    # remove empty sublists
+    workloads = [t for t in list(tasks) if t]
+    # flatten the list and remove empty tuples
+    workloads = [val for sublist in workloads for val in sublist if val]
+    # sort by code and klid in ascending order
+    workloads.sort(key=lambda tup: (tup[0], tup[3]))
+
+    print('{} total workloads: {}'.format(
+        strftime("%H:%M:%S"), len(workloads)))
+
+    return workloads
+
 def getWorkloadForPrediction(actor_pool, start_anchor, stop_anchor, corl_prior, max_step, time_shift, host, port, pwd):
     '''
     Returns list of tuples (code, date, klid)
     '''
     global cnxpool
-    if cnxpool is None:
-        _init_db(1, host, port, pwd)
+    _init_db(1, host, port, pwd)
     qry = (
         "SELECT "
         "    partition_name "
diff --git a/corl/wcc/Dockerfile b/corl/wcc/Dockerfile
@@ -1,7 +1,20 @@
 FROM tensorflow/tensorflow:2.4.1-gpu
 
-COPY requirements.txt .
+# set timezone to Asia/Shanghai
+#ENV TZ Asia/Shanghai
 
+#RUN echo $TZ > /etc/timezone && \
+#     apt-get update && apt-get install -y tzdata && \
+#     rm /etc/localtime && \
+#     ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && \
+#     dpkg-reconfigure -f noninteractive tzdata && \
+#     apt-get clean
+
+# install timedatectl & dependencies, and enable time synchronization
+#RUN apt-get install -y systemd dbus
+#RUN timedatectl set-ntp on
+
+# upgrade pip and install requirements
 RUN python3 -m pip install --upgrade pip
 RUN pip install -r requirements.txt;
 RUN rm -rf requirements.txt
diff --git a/corl/wcc/worker.py b/corl/wcc/worker.py
@@ -12,7 +12,7 @@
 from mysql.connector.pooling import MySQLConnectionPool
 from corl.wc_data.series import DataLoader, getSeries_v2
 from corl.wc_test.test27_mdnc import create_regressor
-from corl.wc_data.input_fn import getWorkloadForPrediction
+from corl.wc_data.input_fn import getWorkloadForPrediction, getWorkloadForPredictionFromTags
 
 REGRESSOR = create_regressor()
 cnxpool = None
@@ -36,6 +36,18 @@
         %s,%s,%s)
 """
 
+KLINE_TAG_UPSERT = """
+    INSERT INTO `secu`.`kline_d_b_lr_tags`
+        (`code`,`date`,`klid`,`tags`,`udate`,`utime`)
+    VALUES
+        (%s,%s,%s,%s,%s,%s)
+    ON DUPLICATE KEY
+        UPDATE 
+            `tags`=concat(`tags`,';',VALUES(`tags`)),
+            `udate`=VALUES(`udate`),
+            `utime`=VALUES(`utime`)
+"""
+
 
 def _init(db_pool_size=None, db_host=None, db_port=None, db_pwd=None):
     # FIXME too many db initialization message in the log and 'aborted clients' in mysql dashboard
@@ -54,13 +66,6 @@ def _init(db_pool_size=None, db_host=None, db_port=None, db_pwd=None):
         # ssl_ca='',
         # use_pure=True,
         connect_timeout=90000)
-    # ray.init(
-    #     num_cpus=psutil.cpu_count(logical=False),
-    #     webui_host='127.0.0.1',  # TODO need a different port?
-    #     memory=2 * 1024 * 1024 * 1024,  # 2G
-    #     object_store_memory=512 * 1024 * 1024,  # 512M
-    #     driver_object_store_memory=256 * 1024 * 1024    # 256M
-    # )
 
 
 def _get_rcodes_for(code, table, dates):
@@ -206,6 +211,8 @@ def _save_prediction(code=None, klid=None, date=None, rcodes=None, top_k=None, p
     try:
         cursor = cnx.cursor()
         cursor.executemany(WCC_INSERT, bucket)
+        cursor.executemany(KLINE_TAG_UPSERT,
+                           [(t[0], t[1], t[2], 'wcc_predict', t[-2], t[-1]) for t in bucket])
         cnx.commit()
     except:
         print(sys.exc_info()[0])
@@ -352,15 +359,11 @@ def _predict(model_path, max_batch_size, data_queue, infer_queue, args):
     def predict():
         try:
             next_work = data_queue.get()
+
             if isinstance(next_work, str) and next_work == 'done':
-                if data_queue.empty():
-                    infer_queue.put('done')
-                    return True
-                else:
-                    print('{} warning, data_queue is still not empty when ''done'' signal is received. qsize: {}'.format(
-                        strftime("%H:%M:%S"), data_queue.size()))
-                    data_queue.put_nowait('done')
-                    return False
+                infer_queue.put('done')
+                return True
+
             batch = next_work['batch']
             p = model.predict(batch, batch_size=max_batch_size)
             p = np.squeeze(p)
@@ -399,7 +402,7 @@ def predict():
             if c == 2000:
                 print('{} predict average: {}'.format(
                     strftime("%H:%M:%S"), elapsed/1000), file=sys.stderr)
-            predict()
+            done = predict()
         c += 1
 
     return done
@@ -411,44 +414,42 @@ def _save_infer_result(top_k, shared_args, infer_queue):
     db_host = shared_args['db_host']
     db_port = shared_args['db_port']
     db_pwd = shared_args['db_pwd']
+    parallel = shared_args['args'].parallel
+
     if cnxpool is None:
         _init(1, db_host, db_port, db_pwd)
 
     def _inner_work():
         # poll work request from 'infer_queue' for saving inference result and handle persistence
         if infer_queue.empty():
             sleep(5)
-            return False
+            return 0
         try:
             next_result = infer_queue.get()
+
             if isinstance(next_result, str) and next_result == 'done':
-                if infer_queue.empty():
-                    # flush bucket
-                    _save_prediction()
-                    return True
-                else:
-                    print('{} warning, infer_queue is still not empty when ''done'' signal is received. qsize: {}'.format(
-                        strftime("%H:%M:%S"), infer_queue.size()))
-                    infer_queue.put_nowait('done')
-            else:
-                result = next_result['result']
-                rcodes = next_result['rcodes']
-                code = next_result['code']
-                date = next_result['date']
-                klid = next_result['klid']
-                udate = next_result['udate']
-                utime = next_result['utime']
-                _save_prediction(code, klid, date, rcodes,
-                                 top_k, result, udate, utime)
+                # flush bucket
+                _save_prediction()
+                return 1
+
+            result = next_result['result']
+            rcodes = next_result['rcodes']
+            code = next_result['code']
+            date = next_result['date']
+            klid = next_result['klid']
+            udate = next_result['udate']
+            utime = next_result['utime']
+            _save_prediction(code, klid, date, rcodes,
+                             top_k, result, udate, utime)
         except Exception:
             sleep(2)
             pass
-        
-        return False
 
-    done = False
-    while not done:
-        done = _inner_work()
+        return 0
+
+    done = 0
+    while done < parallel:
+        done += _inner_work()
 
     cnxpool._remove_connections()
 
@@ -477,7 +478,7 @@ def predict_wcc(num_actors, min_rcode, max_batch_size, model_path, top_k, shared
             shared_args) for i in range(num_actors)]
     )
 
-    work = getWorkloadForPrediction(actor_pool,
+    work = getWorkloadForPredictionFromTags(actor_pool,
                                     start_anchor,
                                     stop_anchor,
                                     corl_prior,
diff --git a/pstk/data/data15.py b/pstk/data/data15.py
@@ -62,19 +62,20 @@ def _getIndex():
 
 def _init(db_pool_size=None, db_host=None, db_port=None, db_pwd=None):
     global cnxpool
-    print("{} [PID={}]: initializing mysql connection pool...".format(
-        strftime("%H:%M:%S"), os.getpid()))
-    cnxpool = MySQLConnectionPool(
-        pool_name="dbpool",
-        pool_size=db_pool_size or 5,
-        host=db_host or '127.0.0.1',
-        port=db_port or 3306,
-        user='mysql',
-        database='secu',
-        password=db_pwd or '123456',
-        # ssl_ca='',
-        # use_pure=True,
-        connect_timeout=90000)
+    if cnxpool is None:
+        print("{} [PID={}]: initializing mysql connection pool...".format(
+            strftime("%H:%M:%S"), os.getpid()))
+        cnxpool = MySQLConnectionPool(
+            pool_name="dbpool",
+            pool_size=db_pool_size or 5,
+            host=db_host or '127.0.0.1',
+            port=db_port or 3306,
+            user='mysql',
+            database='secu',
+            password=db_pwd or '123456',
+            # ssl_ca='',
+            # use_pure=True,
+            connect_timeout=90000)
     ray.init(
         num_cpus=psutil.cpu_count(logical=False),
         webui_host='127.0.0.1',
diff --git a/pstk/model/model10.py b/pstk/model/model10.py
@@ -59,7 +59,6 @@ def logits(self):
     @staticmethod
     def tcn(self, inputs):
         # Temporal Convolutional Network
-        #TODO: implement me
         return  None
 
 
diff --git a/pstk/model/model11.py b/pstk/model/model11.py
@@ -88,7 +88,6 @@ def fcn(self, inputs):
     @staticmethod
     def rnn(self, inputs):
         # Deep Residual RNN
-        # TODO: try MultiRNNCell of MultiRNNCell, wrapped in a residual wrapper
         cells = []
         feat_size = int(inputs.get_shape()[-1])
         # p = int(round(self._rnn_layers ** 0.5))
diff --git a/pstk/model/model3.py b/pstk/model/model3.py
@@ -174,11 +174,9 @@ def prediction(self):
     @staticmethod
     def rnn(self, input):
         # Recurrent network.
-        # TODO add tf.contrib.rnn.ConvLSTMCell?
         step = int(input.get_shape()[1])
         feat = int(input.get_shape()[2])
         c = feat // self._input_width  # channel
-        # TODO step & width must equal?
         input = tf.reshape(input, [-1, step, self._input_width, c])
         clc = tf.contrib.rnn.ConvLSTMCell(
             conv_ndims=1,
diff --git a/pstk/model/wavenet/model.py b/pstk/model/wavenet/model.py
@@ -337,7 +337,7 @@ def _create_dilation_layer(self, input_batch, layer_index, dilation,
 
     def _generator_conv(self, input_batch, state_batch, weights):
         '''Perform convolution for a single convolutional processing step.'''
-        # TODO generalize to filter_width > 2
+        # TD: generalize to filter_width > 2
         past_weights = weights[0, :, :]
         curr_weights = weights[1, :, :]
         output = tf.matmul(state_batch, past_weights) + tf.matmul(
diff --git a/test.py b/test.py
@@ -363,7 +363,7 @@ def testTasklist():
     # print('#talst: {}'.format(len(talst)))
     # for i in range(50):
     #     print_talst_element(i, talst)
-    # TODO test efficient status update
+    # test efficient status update
     # for i in range(50):
     #     delayed_write_talst(i, talst)
     # print("job done")