From f13e3bc4b6bb42be4717232612d0d41c94ecdf23 Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Fri, 13 Mar 2020 14:39:28 +0100 Subject: [PATCH 01/20] Write tensorboard log files. --- rasa/core/policies/ted_policy.py | 10 ++++- rasa/nlu/classifiers/diet_classifier.py | 10 ++++- rasa/nlu/selectors/response_selector.py | 4 ++ rasa/utils/tensorflow/constants.py | 2 + rasa/utils/tensorflow/models.py | 56 ++++++++++++++++++++++--- 5 files changed, 75 insertions(+), 7 deletions(-) diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index bd7b8625899b..20d5893bfe7c 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -58,6 +58,7 @@ SOFTMAX, AUTO, BALANCED, + TENSORBOARD_LOG_DIR, ) @@ -169,6 +170,9 @@ class TEDPolicy(Policy): # How many examples to use for hold out validation set # Large values may hurt performance, e.g. model accuracy. EVAL_NUM_EXAMPLES: 0, + # If you want to use tensorboard to visualize training and validation metrics, + # set this option to a valid output directory. + TENSORBOARD_LOG_DIR: None, } @staticmethod @@ -447,7 +451,11 @@ def __init__( max_history_tracker_featurizer_used: bool, label_data: RasaModelData, ) -> None: - super().__init__(name="TED", random_seed=config[RANDOM_SEED]) + super().__init__( + name="TED", + random_seed=config[RANDOM_SEED], + tensorboard_log_dir=config[TENSORBOARD_LOG_DIR], + ) self.config = config self.max_history_tracker_featurizer_used = max_history_tracker_featurizer_used diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py index 778ac9f4b015..44c91746943e 100644 --- a/rasa/nlu/classifiers/diet_classifier.py +++ b/rasa/nlu/classifiers/diet_classifier.py @@ -56,6 +56,7 @@ SPARSE_INPUT_DROPOUT, MASKED_LM, ENTITY_RECOGNITION, + TENSORBOARD_LOG_DIR, INTENT_CLASSIFICATION, EVAL_NUM_EXAMPLES, EVAL_NUM_EPOCHS, @@ -207,6 +208,9 @@ def required_components(cls) -> List[Type[Component]]: # examples per entity are required. # Rule of thumb: you should have more than 100 examples per entity. BILOU_FLAG: True, + # If you want to use tensorboard to visualize training and validation metrics, + # set this option to a valid output directory. + TENSORBOARD_LOG_DIR: None, } # init helpers @@ -937,7 +941,11 @@ def __init__( index_tag_id_mapping: Optional[Dict[int, Text]], config: Dict[Text, Any], ) -> None: - super().__init__(name="DIET", random_seed=config[RANDOM_SEED]) + super().__init__( + name="DIET", + random_seed=config[RANDOM_SEED], + tensorboard_log_dir=config[TENSORBOARD_LOG_DIR], + ) self.config = config diff --git a/rasa/nlu/selectors/response_selector.py b/rasa/nlu/selectors/response_selector.py index 52f595ea6827..f61c5a462d6a 100644 --- a/rasa/nlu/selectors/response_selector.py +++ b/rasa/nlu/selectors/response_selector.py @@ -60,6 +60,7 @@ SOFTMAX, AUTO, BALANCED, + TENSORBOARD_LOG_DIR, ) from rasa.nlu.constants import ( RESPONSE, @@ -186,6 +187,9 @@ def required_components(cls) -> List[Type[Component]]: MASKED_LM: False, # Name of the intent for which this response selector is to be trained RETRIEVAL_INTENT: None, + # If you want to use tensorboard to visualize training and validation metrics, + # set this option to a valid output directory. + TENSORBOARD_LOG_DIR: None, } def __init__( diff --git a/rasa/utils/tensorflow/constants.py b/rasa/utils/tensorflow/constants.py index 3e13221041d0..611ec2e7dcd7 100644 --- a/rasa/utils/tensorflow/constants.py +++ b/rasa/utils/tensorflow/constants.py @@ -65,3 +65,5 @@ POOLING = "pooling" MAX_POOLING = "max" MEAN_POOLING = "mean" + +TENSORBOARD_LOG_DIR = "tensorboard_log_directory" diff --git a/rasa/utils/tensorflow/models.py b/rasa/utils/tensorflow/models.py index 48dafd731f21..42b08d8caf59 100644 --- a/rasa/utils/tensorflow/models.py +++ b/rasa/utils/tensorflow/models.py @@ -1,3 +1,5 @@ +import datetime + import tensorflow as tf import numpy as np import logging @@ -18,7 +20,12 @@ class RasaModel(tf.keras.models.Model): Cannot be used as tf.keras.Model """ - def __init__(self, random_seed: Optional[int] = None, **kwargs) -> None: + def __init__( + self, + random_seed: Optional[int] = None, + tensorboard_log_dir: Optional[Text] = None, + **kwargs, + ) -> None: """Initialize the RasaModel. Args: @@ -35,6 +42,25 @@ def __init__(self, random_seed: Optional[int] = None, **kwargs) -> None: self.random_seed = random_seed + if tensorboard_log_dir is None: + self.tensorboard_usage = False + else: + current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") + + train_log_dir = ( + f"{tensorboard_log_dir}/logs/gradient_tape/{current_time}/train" + ) + test_log_dir = ( + f"{tensorboard_log_dir}/logs/gradient_tape/{current_time}/test" + ) + func_log_dir = f"{tensorboard_log_dir}/logs/func/{current_time}/func" + + self.train_summary_writer = tf.summary.create_file_writer(train_log_dir) + self.test_summary_writer = tf.summary.create_file_writer(test_log_dir) + self.func_summary_writer = tf.summary.create_file_writer(func_log_dir) + + self.tensorboard_usage = True + def batch_loss( self, batch_in: Union[Tuple[tf.Tensor], Tuple[np.ndarray]] ) -> tf.Tensor: @@ -99,7 +125,7 @@ def fit( True, ) - postfix_dict = self._get_metric_results() + postfix_dict = self._get_metric_results(self.train_summary_writer, epoch) if evaluate_on_num_examples > 0: if self._should_evaluate(evaluate_every_num_epochs, epochs, epoch): @@ -109,7 +135,9 @@ def fit( epoch_batch_size, False, ) - val_results = self._get_metric_results(prefix="val_") + val_results = self._get_metric_results( + self.test_summary_writer, epoch, prefix="val_" + ) postfix_dict.update(val_results) @@ -202,8 +230,8 @@ def _batch_loop( for batch_in in dataset_function(batch_size): call_model_function(batch_in) - @staticmethod def _get_tf_call_model_function( + self, dataset_function: Callable, call_model_function: Callable, eager: bool, @@ -220,8 +248,16 @@ def _get_tf_call_model_function( tf_call_model_function = tf.function( call_model_function, input_signature=[init_dataset.element_spec] ) + + if self.tensorboard_usage: + tf.summary.trace_on(graph=True) + tf_call_model_function(next(iter(init_dataset))) + if self.tensorboard_usage: + with self.func_summary_writer.as_default(): + tf.summary.trace_export(name="rasa_model", step=0) + logger.debug(f"Finished building tensorflow {phase} graph.") return tf_call_model_function @@ -263,11 +299,21 @@ def evaluation_dataset_function(_batch_size: int) -> tf.data.Dataset: ), ) - def _get_metric_results(self, prefix: Optional[Text] = None) -> Dict[Text, Text]: + def _get_metric_results( + self, writer, epoch: int, prefix: Optional[Text] = None + ) -> Dict[Text, Text]: """Get the metrics results""" prefix = prefix or "" + if self.tensorboard_usage: + with writer.as_default(): + for metric in self.metrics: + if metric.name in self.metrics_to_log: + tf.summary.scalar( + f"{prefix}{metric.name}", metric.result(), step=epoch + ) + return { f"{prefix}{metric.name}": f"{metric.result().numpy():.3f}" for metric in self.metrics From 5c2b6202d15b6fa43e588ec5eef94b7c654b234a Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Fri, 13 Mar 2020 14:45:02 +0100 Subject: [PATCH 02/20] add changelog --- changelog/56.misc.rst | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 changelog/56.misc.rst diff --git a/changelog/56.misc.rst b/changelog/56.misc.rst new file mode 100644 index 000000000000..55f9d031c7f1 --- /dev/null +++ b/changelog/56.misc.rst @@ -0,0 +1,7 @@ +Add option ``tensorboard_log_directory`` to ``DIETClasifier``, ``ResponseSelector`` and +``TEDPolicy``. + +By default ``tensorboard_log_directory`` is ``None``. If a valid directory is provided, +metrics are written during training. After the model is trained you can take a look +at the training metrics in tensorboard. Execute ``tensorboard --logdir ``. +Make sure tensorboard is installed (``pip install tensorboard``). \ No newline at end of file From 10e40f7b67fcb91f4304ff2ff12bc9856fcbfa56 Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Fri, 13 Mar 2020 14:46:48 +0100 Subject: [PATCH 03/20] don't log tf.function --- rasa/utils/tensorflow/models.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/rasa/utils/tensorflow/models.py b/rasa/utils/tensorflow/models.py index 42b08d8caf59..6a87ca01037b 100644 --- a/rasa/utils/tensorflow/models.py +++ b/rasa/utils/tensorflow/models.py @@ -53,11 +53,9 @@ def __init__( test_log_dir = ( f"{tensorboard_log_dir}/logs/gradient_tape/{current_time}/test" ) - func_log_dir = f"{tensorboard_log_dir}/logs/func/{current_time}/func" self.train_summary_writer = tf.summary.create_file_writer(train_log_dir) self.test_summary_writer = tf.summary.create_file_writer(test_log_dir) - self.func_summary_writer = tf.summary.create_file_writer(func_log_dir) self.tensorboard_usage = True @@ -248,16 +246,8 @@ def _get_tf_call_model_function( tf_call_model_function = tf.function( call_model_function, input_signature=[init_dataset.element_spec] ) - - if self.tensorboard_usage: - tf.summary.trace_on(graph=True) - tf_call_model_function(next(iter(init_dataset))) - if self.tensorboard_usage: - with self.func_summary_writer.as_default(): - tf.summary.trace_export(name="rasa_model", step=0) - logger.debug(f"Finished building tensorflow {phase} graph.") return tf_call_model_function From 6ea1a34583cb0ac081953545057b61dd919ccf63 Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Fri, 13 Mar 2020 14:49:45 +0100 Subject: [PATCH 04/20] update docs --- docs/core/policies.rst | 5 +++++ docs/nlu/components.rst | 10 ++++++++++ 2 files changed, 15 insertions(+) diff --git a/docs/core/policies.rst b/docs/core/policies.rst index 1f176b80fc00..e4f6dab7506d 100644 --- a/docs/core/policies.rst +++ b/docs/core/policies.rst @@ -425,6 +425,11 @@ It is recommended to use ``state_featurizer=LabelTokenizerSingleStateFeaturizer( # How many examples to use for hold out validation set # Large values may hurt performance, e.g. model accuracy. "evaluate_on_number_of_examples": 0 + # If you want to use tensorboard to visualize training metrics, + # set this option to a valid output directory. + # You can view the training metrics after training via tensorboard via + # ``tensorboard --logdir `` + "tensorboard_log_directory": None .. note:: diff --git a/docs/nlu/components.rst b/docs/nlu/components.rst index b0b8ef8f4793..f4090f2eb434 100644 --- a/docs/nlu/components.rst +++ b/docs/nlu/components.rst @@ -1100,6 +1100,11 @@ ResponseSelector "use_masked_language_model": False # Name of the intent for which this response selector is to be trained "retrieval_intent: None + # If you want to use tensorboard to visualize training metrics, + # set this option to a valid output directory. + # You can view the training metrics after training via tensorboard via + # ``tensorboard --logdir `` + "tensorboard_log_directory": None Entity Extractors @@ -1659,3 +1664,8 @@ DIETClassifier # examples per entity are required. # Rule of thumb: you should have more than 100 examples per entity. "BILOU_flag": True + # If you want to use tensorboard to visualize training metrics, + # set this option to a valid output directory. + # You can view the training metrics after training via tensorboard via + # ``tensorboard --logdir `` + "tensorboard_log_directory": None From e9707b5aa291b247fd1abef5b5ada661b9d0425c Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Fri, 13 Mar 2020 15:02:49 +0100 Subject: [PATCH 05/20] clean up --- rasa/utils/tensorflow/models.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/rasa/utils/tensorflow/models.py b/rasa/utils/tensorflow/models.py index 6a87ca01037b..6529191fcbb8 100644 --- a/rasa/utils/tensorflow/models.py +++ b/rasa/utils/tensorflow/models.py @@ -5,6 +5,8 @@ import logging from collections import defaultdict from typing import List, Text, Dict, Tuple, Union, Optional, Callable + +from tensorflow_core.python.ops.summary_ops_v2 import ResourceSummaryWriter from tqdm import tqdm from rasa.utils.common import is_logging_disabled from rasa.utils.tensorflow.model_data import RasaModelData, FeatureSignature @@ -42,9 +44,14 @@ def __init__( self.random_seed = random_seed - if tensorboard_log_dir is None: - self.tensorboard_usage = False - else: + self.train_summary_writer = None + self.test_summary_writer = None + self._set_up_tensorboard_writer(tensorboard_log_dir) + + def _set_up_tensorboard_writer( + self, tensorboard_log_dir: Optional[Text] = None + ) -> None: + if tensorboard_log_dir is not None: current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") train_log_dir = ( @@ -57,8 +64,6 @@ def __init__( self.train_summary_writer = tf.summary.create_file_writer(train_log_dir) self.test_summary_writer = tf.summary.create_file_writer(test_log_dir) - self.tensorboard_usage = True - def batch_loss( self, batch_in: Union[Tuple[tf.Tensor], Tuple[np.ndarray]] ) -> tf.Tensor: @@ -123,7 +128,7 @@ def fit( True, ) - postfix_dict = self._get_metric_results(self.train_summary_writer, epoch) + postfix_dict = self._get_metric_results(epoch, self.train_summary_writer) if evaluate_on_num_examples > 0: if self._should_evaluate(evaluate_every_num_epochs, epochs, epoch): @@ -134,7 +139,7 @@ def fit( False, ) val_results = self._get_metric_results( - self.test_summary_writer, epoch, prefix="val_" + epoch, self.test_summary_writer, prefix="val_" ) postfix_dict.update(val_results) @@ -290,13 +295,16 @@ def evaluation_dataset_function(_batch_size: int) -> tf.data.Dataset: ) def _get_metric_results( - self, writer, epoch: int, prefix: Optional[Text] = None + self, + epoch: int, + writer: Optional[ResourceSummaryWriter] = None, + prefix: Optional[Text] = None, ) -> Dict[Text, Text]: """Get the metrics results""" prefix = prefix or "" - if self.tensorboard_usage: + if writer is not None: with writer.as_default(): for metric in self.metrics: if metric.name in self.metrics_to_log: From b6f3ccd377c4fe9a5801c9c51ffd581f4db4e859 Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Fri, 13 Mar 2020 15:04:21 +0100 Subject: [PATCH 06/20] make method static --- rasa/utils/tensorflow/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rasa/utils/tensorflow/models.py b/rasa/utils/tensorflow/models.py index 6529191fcbb8..f7a767b8e515 100644 --- a/rasa/utils/tensorflow/models.py +++ b/rasa/utils/tensorflow/models.py @@ -233,8 +233,8 @@ def _batch_loop( for batch_in in dataset_function(batch_size): call_model_function(batch_in) + @staticmethod def _get_tf_call_model_function( - self, dataset_function: Callable, call_model_function: Callable, eager: bool, From 3b34aded4abd05724806659fd89dcd88367ba8ac Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Fri, 13 Mar 2020 15:05:18 +0100 Subject: [PATCH 07/20] update changelog file name. --- changelog/{56.misc.rst => 5422.feature.rst} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename changelog/{56.misc.rst => 5422.feature.rst} (100%) diff --git a/changelog/56.misc.rst b/changelog/5422.feature.rst similarity index 100% rename from changelog/56.misc.rst rename to changelog/5422.feature.rst From 49b5d0fae2ae8cd76f8c2acd28f79742a454a585 Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Fri, 13 Mar 2020 15:22:46 +0100 Subject: [PATCH 08/20] add tensorboard log dir option --- rasa/core/policies/embedding_policy.py | 4 ++++ rasa/nlu/classifiers/embedding_intent_classifier.py | 6 +++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/rasa/core/policies/embedding_policy.py b/rasa/core/policies/embedding_policy.py index b3ad427db0ca..4892791515db 100644 --- a/rasa/core/policies/embedding_policy.py +++ b/rasa/core/policies/embedding_policy.py @@ -38,6 +38,7 @@ SOFTMAX, AUTO, BALANCED, + TENSORBOARD_LOG_DIR, ) from rasa.utils.tensorflow.models import RasaModel import rasa.utils.common as common_utils @@ -140,6 +141,9 @@ class EmbeddingPolicy(TEDPolicy): # How many examples to use for hold out validation set # Large values may hurt performance, e.g. model accuracy. EVAL_NUM_EXAMPLES: 0, + # If you want to use tensorboard to visualize training and validation metrics, + # set this option to a valid output directory. + TENSORBOARD_LOG_DIR: None, } def __init__( diff --git a/rasa/nlu/classifiers/embedding_intent_classifier.py b/rasa/nlu/classifiers/embedding_intent_classifier.py index 43b485df5e3c..5f395593bbce 100644 --- a/rasa/nlu/classifiers/embedding_intent_classifier.py +++ b/rasa/nlu/classifiers/embedding_intent_classifier.py @@ -40,6 +40,7 @@ SOFTMAX, AUTO, BALANCED, + TENSORBOARD_LOG_DIR, ) import rasa.utils.common as common_utils from rasa.utils.tensorflow.models import RasaModel @@ -122,7 +123,7 @@ def required_components(cls) -> List[Type[Component]]: # Dropout rate for encoder DROP_RATE: 0.2, # Sparsity of the weights in dense layers - WEIGHT_SPARSITY: 0.8, + WEIGHT_SPARSITY: 0.0, # If 'True' apply dropout to sparse tensors SPARSE_INPUT_DROPOUT: False, # ## Evaluation parameters @@ -132,6 +133,9 @@ def required_components(cls) -> List[Type[Component]]: # How many examples to use for hold out validation set # Large values may hurt performance, e.g. model accuracy. EVAL_NUM_EXAMPLES: 0, + # If you want to use tensorboard to visualize training and validation metrics, + # set this option to a valid output directory. + TENSORBOARD_LOG_DIR: None, } def __init__( From 6703790d86f3c8e163b2a4d324c6e4865de9da3f Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Fri, 13 Mar 2020 17:11:47 +0100 Subject: [PATCH 09/20] add method to write model summary --- rasa/utils/tensorflow/models.py | 38 +++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/rasa/utils/tensorflow/models.py b/rasa/utils/tensorflow/models.py index f7a767b8e515..8f02866df788 100644 --- a/rasa/utils/tensorflow/models.py +++ b/rasa/utils/tensorflow/models.py @@ -46,6 +46,8 @@ def __init__( self.train_summary_writer = None self.test_summary_writer = None + self.model_summary_file = None + self._set_up_tensorboard_writer(tensorboard_log_dir) def _set_up_tensorboard_writer( @@ -64,6 +66,8 @@ def _set_up_tensorboard_writer( self.train_summary_writer = tf.summary.create_file_writer(train_log_dir) self.test_summary_writer = tf.summary.create_file_writer(test_log_dir) + self.model_summary_file = f"{tensorboard_log_dir}/model_summary.txt" + def batch_loss( self, batch_in: Union[Tuple[tf.Tensor], Tuple[np.ndarray]] ) -> tf.Tensor: @@ -146,6 +150,9 @@ def fit( progress_bar.set_postfix(postfix_dict) + if self.model_summary_file is not None: + self._write_model_summary() + self._training = None # training phase should be defined when building a graph if not disable: logger.info("Finished training.") @@ -386,6 +393,37 @@ def linearly_increasing_batch_size( else: return int(batch_size[0]) + def _write_model_summary(self): + total_number_of_variables = np.sum( + [np.prod(v.shape) for v in self.trainable_variables] + ) + layers = [ + f"{layer.name} ({layer.dtype.name}) " + f"[{'x'.join([str(s) for s in layer.shape])}]" + for layer in self.trainable_variables + ] + layers.reverse() + + file = open(self.model_summary_file, "w") + + file.write("-" * 100) + file.write("\n") + file.write("Variables: name (type) [shape]") + file.write("\n") + file.write("-" * 100) + file.write("\n") + for layer in layers: + file.write(layer) + file.write("\n") + file.write("-" * 100) + file.write("\n") + file.write(f"Total size of variables: {total_number_of_variables}") + file.write("\n") + file.write("-" * 100) + file.write("\n") + + file.close() + def compile(self, *args, **kwargs) -> None: raise Exception( "This method should neither be called nor implemented in our code." From 45f7622ef5c381425357f5cf4778992a8525ac92 Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Fri, 13 Mar 2020 17:24:31 +0100 Subject: [PATCH 10/20] do not change default value --- rasa/nlu/classifiers/embedding_intent_classifier.py | 2 +- rasa/utils/tensorflow/models.py | 11 +---------- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/rasa/nlu/classifiers/embedding_intent_classifier.py b/rasa/nlu/classifiers/embedding_intent_classifier.py index 5f395593bbce..1b3338063f98 100644 --- a/rasa/nlu/classifiers/embedding_intent_classifier.py +++ b/rasa/nlu/classifiers/embedding_intent_classifier.py @@ -123,7 +123,7 @@ def required_components(cls) -> List[Type[Component]]: # Dropout rate for encoder DROP_RATE: 0.2, # Sparsity of the weights in dense layers - WEIGHT_SPARSITY: 0.0, + WEIGHT_SPARSITY: 0.8, # If 'True' apply dropout to sparse tensors SPARSE_INPUT_DROPOUT: False, # ## Evaluation parameters diff --git a/rasa/utils/tensorflow/models.py b/rasa/utils/tensorflow/models.py index 8f02866df788..ca2efc023ea7 100644 --- a/rasa/utils/tensorflow/models.py +++ b/rasa/utils/tensorflow/models.py @@ -406,21 +406,12 @@ def _write_model_summary(self): file = open(self.model_summary_file, "w") - file.write("-" * 100) - file.write("\n") - file.write("Variables: name (type) [shape]") - file.write("\n") - file.write("-" * 100) - file.write("\n") + file.write("Variables: name (type) [shape]\n\n") for layer in layers: file.write(layer) file.write("\n") - file.write("-" * 100) file.write("\n") file.write(f"Total size of variables: {total_number_of_variables}") - file.write("\n") - file.write("-" * 100) - file.write("\n") file.close() From 14010e1cd206d61db81cd9c1a57655c472872b0e Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Mon, 16 Mar 2020 09:40:23 +0100 Subject: [PATCH 11/20] review comments --- changelog/5422.feature.rst | 5 ++--- docs/core/policies.rst | 2 +- docs/nlu/components.rst | 9 +++++++-- rasa/utils/tensorflow/models.py | 28 ++++++++++++++-------------- 4 files changed, 24 insertions(+), 20 deletions(-) diff --git a/changelog/5422.feature.rst b/changelog/5422.feature.rst index 55f9d031c7f1..31674ce63d91 100644 --- a/changelog/5422.feature.rst +++ b/changelog/5422.feature.rst @@ -1,7 +1,6 @@ -Add option ``tensorboard_log_directory`` to ``DIETClasifier``, ``ResponseSelector`` and -``TEDPolicy``. +Add option ``tensorboard_log_directory`` to ``EmbeddingIntentClassifier``, ``DIETClasifier``, ``ResponseSelector``, +``EmbeddingPolicy`` and ``TEDPolicy``. By default ``tensorboard_log_directory`` is ``None``. If a valid directory is provided, metrics are written during training. After the model is trained you can take a look at the training metrics in tensorboard. Execute ``tensorboard --logdir ``. -Make sure tensorboard is installed (``pip install tensorboard``). \ No newline at end of file diff --git a/docs/core/policies.rst b/docs/core/policies.rst index e4f6dab7506d..2cfbe4087149 100644 --- a/docs/core/policies.rst +++ b/docs/core/policies.rst @@ -427,7 +427,7 @@ It is recommended to use ``state_featurizer=LabelTokenizerSingleStateFeaturizer( "evaluate_on_number_of_examples": 0 # If you want to use tensorboard to visualize training metrics, # set this option to a valid output directory. - # You can view the training metrics after training via tensorboard via + # You can view the training metrics after training in tensorboard via # ``tensorboard --logdir `` "tensorboard_log_directory": None diff --git a/docs/nlu/components.rst b/docs/nlu/components.rst index f4090f2eb434..2e39beb92059 100644 --- a/docs/nlu/components.rst +++ b/docs/nlu/components.rst @@ -919,6 +919,11 @@ EmbeddingIntentClassifier # How many examples to use for hold out validation set # Large values may hurt performance, e.g. model accuracy. "evaluate_on_number_of_examples": 0 + # If you want to use tensorboard to visualize training metrics, + # set this option to a valid output directory. + # You can view the training metrics after training in tensorboard via + # ``tensorboard --logdir `` + "tensorboard_log_directory": None .. _keyword_intent_classifier: @@ -1102,7 +1107,7 @@ ResponseSelector "retrieval_intent: None # If you want to use tensorboard to visualize training metrics, # set this option to a valid output directory. - # You can view the training metrics after training via tensorboard via + # You can view the training metrics after training in tensorboard via # ``tensorboard --logdir `` "tensorboard_log_directory": None @@ -1666,6 +1671,6 @@ DIETClassifier "BILOU_flag": True # If you want to use tensorboard to visualize training metrics, # set this option to a valid output directory. - # You can view the training metrics after training via tensorboard via + # You can view the training metrics after training in tensorboard via # ``tensorboard --logdir `` "tensorboard_log_directory": None diff --git a/rasa/utils/tensorflow/models.py b/rasa/utils/tensorflow/models.py index ca2efc023ea7..eace28933cee 100644 --- a/rasa/utils/tensorflow/models.py +++ b/rasa/utils/tensorflow/models.py @@ -56,17 +56,15 @@ def _set_up_tensorboard_writer( if tensorboard_log_dir is not None: current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") - train_log_dir = ( - f"{tensorboard_log_dir}/logs/gradient_tape/{current_time}/train" - ) - test_log_dir = ( - f"{tensorboard_log_dir}/logs/gradient_tape/{current_time}/test" - ) + train_log_dir = f"{tensorboard_log_dir}/{current_time}/train" + test_log_dir = f"{tensorboard_log_dir}/{current_time}/test" self.train_summary_writer = tf.summary.create_file_writer(train_log_dir) self.test_summary_writer = tf.summary.create_file_writer(test_log_dir) - self.model_summary_file = f"{tensorboard_log_dir}/model_summary.txt" + self.model_summary_file = ( + f"{tensorboard_log_dir}/{current_time}/model_summary.txt" + ) def batch_loss( self, batch_in: Union[Tuple[tf.Tensor], Tuple[np.ndarray]] @@ -308,16 +306,10 @@ def _get_metric_results( prefix: Optional[Text] = None, ) -> Dict[Text, Text]: """Get the metrics results""" - prefix = prefix or "" if writer is not None: - with writer.as_default(): - for metric in self.metrics: - if metric.name in self.metrics_to_log: - tf.summary.scalar( - f"{prefix}{metric.name}", metric.result(), step=epoch - ) + self._log_metrics_for_tensorboard(epoch, writer) return { f"{prefix}{metric.name}": f"{metric.result().numpy():.3f}" @@ -325,6 +317,14 @@ def _get_metric_results( if metric.name in self.metrics_to_log } + def _log_metrics_for_tensorboard( + self, step: int, writer: ResourceSummaryWriter + ) -> None: + with writer.as_default(): + for metric in self.metrics: + if metric.name in self.metrics_to_log: + tf.summary.scalar(f"{metric.name}", metric.result(), step=step) + @staticmethod def _should_evaluate( evaluate_every_num_epochs: int, epochs: int, current_epoch: int From b88ee67c8d748d561e8ba80e22a5234c095e57ae Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Mon, 16 Mar 2020 13:28:49 +0100 Subject: [PATCH 12/20] Add option to log on minibatches. --- changelog/5422.feature.rst | 10 ++++- docs/core/policies.rst | 4 ++ docs/nlu/components.rst | 12 +++++ rasa/core/policies/embedding_policy.py | 5 +++ rasa/core/policies/ted_policy.py | 6 +++ rasa/nlu/classifiers/diet_classifier.py | 6 +++ .../embedding_intent_classifier.py | 5 +++ rasa/nlu/selectors/response_selector.py | 5 +++ rasa/utils/tensorflow/constants.py | 1 + rasa/utils/tensorflow/models.py | 44 ++++++++++++++++--- 10 files changed, 89 insertions(+), 9 deletions(-) diff --git a/changelog/5422.feature.rst b/changelog/5422.feature.rst index 31674ce63d91..5f612027d351 100644 --- a/changelog/5422.feature.rst +++ b/changelog/5422.feature.rst @@ -1,6 +1,12 @@ -Add option ``tensorboard_log_directory`` to ``EmbeddingIntentClassifier``, ``DIETClasifier``, ``ResponseSelector``, -``EmbeddingPolicy`` and ``TEDPolicy``. +Add options ``tensorboard_log_directory`` and ``tensorboard_log_level`` to ``EmbeddingIntentClassifier``, +``DIETClasifier``, ``ResponseSelector``, ``EmbeddingPolicy`` and ``TEDPolicy``. By default ``tensorboard_log_directory`` is ``None``. If a valid directory is provided, metrics are written during training. After the model is trained you can take a look at the training metrics in tensorboard. Execute ``tensorboard --logdir ``. + +Metrics can either be written after every epoch (default) or for every training step. +You can specify when to write metrics using the variable ``tensorboard_log_level``. +Valid values are 'epoch' and 'minibatch'. + +We also write down a model summary, i.e. layers with inputs and types, to the given directory. diff --git a/docs/core/policies.rst b/docs/core/policies.rst index 2cfbe4087149..5964c6987c1d 100644 --- a/docs/core/policies.rst +++ b/docs/core/policies.rst @@ -430,6 +430,10 @@ It is recommended to use ``state_featurizer=LabelTokenizerSingleStateFeaturizer( # You can view the training metrics after training in tensorboard via # ``tensorboard --logdir `` "tensorboard_log_directory": None + # Define when training metrics for tensorboard should be logged. + # Either after every epoch or for every training step. + # Valid values: 'epoch' and 'minibatch' + "tensorboard_log_level": "epoch", .. note:: diff --git a/docs/nlu/components.rst b/docs/nlu/components.rst index 2e39beb92059..2865dc225870 100644 --- a/docs/nlu/components.rst +++ b/docs/nlu/components.rst @@ -924,6 +924,10 @@ EmbeddingIntentClassifier # You can view the training metrics after training in tensorboard via # ``tensorboard --logdir `` "tensorboard_log_directory": None + # Define when training metrics for tensorboard should be logged. + # Either after every epoch or for every training step. + # Valid values: 'epoch' and 'minibatch' + "tensorboard_log_level": "epoch", .. _keyword_intent_classifier: @@ -1110,6 +1114,10 @@ ResponseSelector # You can view the training metrics after training in tensorboard via # ``tensorboard --logdir `` "tensorboard_log_directory": None + # Define when training metrics for tensorboard should be logged. + # Either after every epoch or for every training step. + # Valid values: 'epoch' and 'minibatch' + "tensorboard_log_level": "epoch", Entity Extractors @@ -1674,3 +1682,7 @@ DIETClassifier # You can view the training metrics after training in tensorboard via # ``tensorboard --logdir `` "tensorboard_log_directory": None + # Define when training metrics for tensorboard should be logged. + # Either after every epoch or for every training step. + # Valid values: 'epoch' and 'minibatch' + "tensorboard_log_level": "epoch", diff --git a/rasa/core/policies/embedding_policy.py b/rasa/core/policies/embedding_policy.py index 4892791515db..25eb5f036ad0 100644 --- a/rasa/core/policies/embedding_policy.py +++ b/rasa/core/policies/embedding_policy.py @@ -39,6 +39,7 @@ AUTO, BALANCED, TENSORBOARD_LOG_DIR, + TENSORBOARD_LOG_LEVEL, ) from rasa.utils.tensorflow.models import RasaModel import rasa.utils.common as common_utils @@ -144,6 +145,10 @@ class EmbeddingPolicy(TEDPolicy): # If you want to use tensorboard to visualize training and validation metrics, # set this option to a valid output directory. TENSORBOARD_LOG_DIR: None, + # Define when training metrics for tensorboard should be logged. + # Either after every epoch or for every training step. + # Valid values: 'epoch' and 'minibatch' + TENSORBOARD_LOG_LEVEL: "epoch", } def __init__( diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index 20d5893bfe7c..a1790d52d2e7 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -59,6 +59,7 @@ AUTO, BALANCED, TENSORBOARD_LOG_DIR, + TENSORBOARD_LOG_LEVEL, ) @@ -173,6 +174,10 @@ class TEDPolicy(Policy): # If you want to use tensorboard to visualize training and validation metrics, # set this option to a valid output directory. TENSORBOARD_LOG_DIR: None, + # Define when training metrics for tensorboard should be logged. + # Either after every epoch or for every training step. + # Valid values: 'epoch' and 'minibatch' + TENSORBOARD_LOG_LEVEL: "epoch", } @staticmethod @@ -455,6 +460,7 @@ def __init__( name="TED", random_seed=config[RANDOM_SEED], tensorboard_log_dir=config[TENSORBOARD_LOG_DIR], + tensorboard_log_level=config[TENSORBOARD_LOG_LEVEL], ) self.config = config diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py index 44c91746943e..73025797d058 100644 --- a/rasa/nlu/classifiers/diet_classifier.py +++ b/rasa/nlu/classifiers/diet_classifier.py @@ -78,6 +78,7 @@ SOFTMAX, AUTO, BALANCED, + TENSORBOARD_LOG_LEVEL, ) @@ -211,6 +212,10 @@ def required_components(cls) -> List[Type[Component]]: # If you want to use tensorboard to visualize training and validation metrics, # set this option to a valid output directory. TENSORBOARD_LOG_DIR: None, + # Define when training metrics for tensorboard should be logged. + # Either after every epoch or for every training step. + # Valid values: 'epoch' and 'minibatch' + TENSORBOARD_LOG_LEVEL: "epoch", } # init helpers @@ -945,6 +950,7 @@ def __init__( name="DIET", random_seed=config[RANDOM_SEED], tensorboard_log_dir=config[TENSORBOARD_LOG_DIR], + tensorboard_log_level=config[TENSORBOARD_LOG_LEVEL], ) self.config = config diff --git a/rasa/nlu/classifiers/embedding_intent_classifier.py b/rasa/nlu/classifiers/embedding_intent_classifier.py index 1b3338063f98..7e91e8e6d594 100644 --- a/rasa/nlu/classifiers/embedding_intent_classifier.py +++ b/rasa/nlu/classifiers/embedding_intent_classifier.py @@ -41,6 +41,7 @@ AUTO, BALANCED, TENSORBOARD_LOG_DIR, + TENSORBOARD_LOG_LEVEL, ) import rasa.utils.common as common_utils from rasa.utils.tensorflow.models import RasaModel @@ -136,6 +137,10 @@ def required_components(cls) -> List[Type[Component]]: # If you want to use tensorboard to visualize training and validation metrics, # set this option to a valid output directory. TENSORBOARD_LOG_DIR: None, + # Define when training metrics for tensorboard should be logged. + # Either after every epoch or for every training step. + # Valid values: 'epoch' and 'minibatch' + TENSORBOARD_LOG_LEVEL: "epoch", } def __init__( diff --git a/rasa/nlu/selectors/response_selector.py b/rasa/nlu/selectors/response_selector.py index f61c5a462d6a..a7be78a303f3 100644 --- a/rasa/nlu/selectors/response_selector.py +++ b/rasa/nlu/selectors/response_selector.py @@ -61,6 +61,7 @@ AUTO, BALANCED, TENSORBOARD_LOG_DIR, + TENSORBOARD_LOG_LEVEL, ) from rasa.nlu.constants import ( RESPONSE, @@ -190,6 +191,10 @@ def required_components(cls) -> List[Type[Component]]: # If you want to use tensorboard to visualize training and validation metrics, # set this option to a valid output directory. TENSORBOARD_LOG_DIR: None, + # Define when training metrics for tensorboard should be logged. + # Either after every epoch or for every training step. + # Valid values: 'epoch' and 'minibatch' + TENSORBOARD_LOG_LEVEL: "epoch", } def __init__( diff --git a/rasa/utils/tensorflow/constants.py b/rasa/utils/tensorflow/constants.py index 611ec2e7dcd7..8563ff8d7c19 100644 --- a/rasa/utils/tensorflow/constants.py +++ b/rasa/utils/tensorflow/constants.py @@ -67,3 +67,4 @@ MEAN_POOLING = "mean" TENSORBOARD_LOG_DIR = "tensorboard_log_directory" +TENSORBOARD_LOG_LEVEL = "tensorboard_log_level" diff --git a/rasa/utils/tensorflow/models.py b/rasa/utils/tensorflow/models.py index eace28933cee..20c0895e5f57 100644 --- a/rasa/utils/tensorflow/models.py +++ b/rasa/utils/tensorflow/models.py @@ -10,11 +10,14 @@ from tqdm import tqdm from rasa.utils.common import is_logging_disabled from rasa.utils.tensorflow.model_data import RasaModelData, FeatureSignature -from rasa.utils.tensorflow.constants import SEQUENCE +from rasa.utils.tensorflow.constants import SEQUENCE, TENSORBOARD_LOG_LEVEL logger = logging.getLogger(__name__) +TENSORBOARD_LOG_LEVELS = ["epoch", "minibatch"] + + # noinspection PyMethodOverriding class RasaModel(tf.keras.models.Model): """Completely override all public methods of keras Model. @@ -26,6 +29,7 @@ def __init__( self, random_seed: Optional[int] = None, tensorboard_log_dir: Optional[Text] = None, + tensorboard_log_level: Optional[Text] = "epoch", **kwargs, ) -> None: """Initialize the RasaModel. @@ -48,10 +52,10 @@ def __init__( self.test_summary_writer = None self.model_summary_file = None - self._set_up_tensorboard_writer(tensorboard_log_dir) + self._set_up_tensorboard_writer(tensorboard_log_level, tensorboard_log_dir) def _set_up_tensorboard_writer( - self, tensorboard_log_dir: Optional[Text] = None + self, tensorboard_log_level: Text, tensorboard_log_dir: Optional[Text] = None ) -> None: if tensorboard_log_dir is not None: current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") @@ -66,6 +70,14 @@ def _set_up_tensorboard_writer( f"{tensorboard_log_dir}/{current_time}/model_summary.txt" ) + if tensorboard_log_level not in TENSORBOARD_LOG_LEVELS: + raise ValueError( + f"Provided '{TENSORBOARD_LOG_LEVEL}' ('{tensorboard_log_level}') " + f"is invalid! Valid values are: {TENSORBOARD_LOG_LEVELS}" + ) + + self.tensorboard_log_on_epochs = tensorboard_log_level == "epoch" + def batch_loss( self, batch_in: Union[Tuple[tf.Tensor], Tuple[np.ndarray]] ) -> tf.Tensor: @@ -118,27 +130,34 @@ def fit( val_results = {} # validation is not performed every epoch progress_bar = tqdm(range(epochs), desc="Epochs", disable=disable) + train_steps = 0 + evaluation_steps = 0 + for epoch in progress_bar: epoch_batch_size = self.linearly_increasing_batch_size( epoch, batch_size, epochs ) - self._batch_loop( + train_steps = self._batch_loop( train_dataset_function, tf_train_on_batch_function, epoch_batch_size, True, + train_steps, + self.train_summary_writer, ) postfix_dict = self._get_metric_results(epoch, self.train_summary_writer) if evaluate_on_num_examples > 0: if self._should_evaluate(evaluate_every_num_epochs, epochs, epoch): - self._batch_loop( + evaluation_steps = self._batch_loop( evaluation_dataset_function, tf_evaluation_on_batch_function, epoch_batch_size, False, + evaluation_steps, + self.test_summary_writer, ) val_results = self._get_metric_results( epoch, self.test_summary_writer, prefix="val_" @@ -230,14 +249,25 @@ def _batch_loop( call_model_function: Callable, batch_size: int, training: bool, - ) -> None: + offset: int, + writer: Optional[ResourceSummaryWriter] = None, + ) -> int: """Run on batches""" self.reset_metrics() + + step = offset + self._training = training # needed for eager mode for batch_in in dataset_function(batch_size): call_model_function(batch_in) + if writer is not None and not self.tensorboard_log_on_epochs: + self._log_metrics_for_tensorboard(step, writer) + step += 1 + + return step + @staticmethod def _get_tf_call_model_function( dataset_function: Callable, @@ -308,7 +338,7 @@ def _get_metric_results( """Get the metrics results""" prefix = prefix or "" - if writer is not None: + if writer is not None and self.tensorboard_log_on_epochs: self._log_metrics_for_tensorboard(epoch, writer) return { From fad988afbb878dd94c2ea113ff406fdfbaddcd10 Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Mon, 16 Mar 2020 13:33:00 +0100 Subject: [PATCH 13/20] clean up --- docs/nlu/components.rst | 2 +- rasa/utils/tensorflow/models.py | 17 +++++++++-------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/docs/nlu/components.rst b/docs/nlu/components.rst index 2865dc225870..cf9d34655fcf 100644 --- a/docs/nlu/components.rst +++ b/docs/nlu/components.rst @@ -1108,7 +1108,7 @@ ResponseSelector # should predict those tokens. "use_masked_language_model": False # Name of the intent for which this response selector is to be trained - "retrieval_intent: None + "retrieval_intent": None # If you want to use tensorboard to visualize training metrics, # set this option to a valid output directory. # You can view the training metrics after training in tensorboard via diff --git a/rasa/utils/tensorflow/models.py b/rasa/utils/tensorflow/models.py index 20c0895e5f57..463165de9dac 100644 --- a/rasa/utils/tensorflow/models.py +++ b/rasa/utils/tensorflow/models.py @@ -51,6 +51,7 @@ def __init__( self.train_summary_writer = None self.test_summary_writer = None self.model_summary_file = None + self.tensorboard_log_on_epochs = True self._set_up_tensorboard_writer(tensorboard_log_level, tensorboard_log_dir) @@ -58,6 +59,14 @@ def _set_up_tensorboard_writer( self, tensorboard_log_level: Text, tensorboard_log_dir: Optional[Text] = None ) -> None: if tensorboard_log_dir is not None: + if tensorboard_log_level not in TENSORBOARD_LOG_LEVELS: + raise ValueError( + f"Provided '{TENSORBOARD_LOG_LEVEL}' ('{tensorboard_log_level}') " + f"is invalid! Valid values are: {TENSORBOARD_LOG_LEVELS}" + ) + + self.tensorboard_log_on_epochs = tensorboard_log_level == "epoch" + current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") train_log_dir = f"{tensorboard_log_dir}/{current_time}/train" @@ -70,14 +79,6 @@ def _set_up_tensorboard_writer( f"{tensorboard_log_dir}/{current_time}/model_summary.txt" ) - if tensorboard_log_level not in TENSORBOARD_LOG_LEVELS: - raise ValueError( - f"Provided '{TENSORBOARD_LOG_LEVEL}' ('{tensorboard_log_level}') " - f"is invalid! Valid values are: {TENSORBOARD_LOG_LEVELS}" - ) - - self.tensorboard_log_on_epochs = tensorboard_log_level == "epoch" - def batch_loss( self, batch_in: Union[Tuple[tf.Tensor], Tuple[np.ndarray]] ) -> tf.Tensor: From 454c6f2110d699964c4fab50ec2e452bb99ec976 Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Mon, 16 Mar 2020 14:46:33 +0100 Subject: [PATCH 14/20] fix docs --- docs/core/policies.rst | 2 +- docs/nlu/components.rst | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/core/policies.rst b/docs/core/policies.rst index 5964c6987c1d..2926359dde8b 100644 --- a/docs/core/policies.rst +++ b/docs/core/policies.rst @@ -433,7 +433,7 @@ It is recommended to use ``state_featurizer=LabelTokenizerSingleStateFeaturizer( # Define when training metrics for tensorboard should be logged. # Either after every epoch or for every training step. # Valid values: 'epoch' and 'minibatch' - "tensorboard_log_level": "epoch", + "tensorboard_log_level": "epoch" .. note:: diff --git a/docs/nlu/components.rst b/docs/nlu/components.rst index cf9d34655fcf..4fd62ea5e6bd 100644 --- a/docs/nlu/components.rst +++ b/docs/nlu/components.rst @@ -927,7 +927,7 @@ EmbeddingIntentClassifier # Define when training metrics for tensorboard should be logged. # Either after every epoch or for every training step. # Valid values: 'epoch' and 'minibatch' - "tensorboard_log_level": "epoch", + "tensorboard_log_level": "epoch" .. _keyword_intent_classifier: @@ -1117,7 +1117,7 @@ ResponseSelector # Define when training metrics for tensorboard should be logged. # Either after every epoch or for every training step. # Valid values: 'epoch' and 'minibatch' - "tensorboard_log_level": "epoch", + "tensorboard_log_level": "epoch" Entity Extractors @@ -1685,4 +1685,4 @@ DIETClassifier # Define when training metrics for tensorboard should be logged. # Either after every epoch or for every training step. # Valid values: 'epoch' and 'minibatch' - "tensorboard_log_level": "epoch", + "tensorboard_log_level": "epoch" From 8e893f615d156d587a650338cdab005bef44bdb9 Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Tue, 17 Mar 2020 13:37:08 +0100 Subject: [PATCH 15/20] use correct step value for test curve --- rasa/nlu/classifiers/diet_classifier.py | 4 ++-- rasa/utils/tensorflow/models.py | 32 +++++++++++++++---------- 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py index 73025797d058..7af6b85c0c93 100644 --- a/rasa/nlu/classifiers/diet_classifier.py +++ b/rasa/nlu/classifiers/diet_classifier.py @@ -211,11 +211,11 @@ def required_components(cls) -> List[Type[Component]]: BILOU_FLAG: True, # If you want to use tensorboard to visualize training and validation metrics, # set this option to a valid output directory. - TENSORBOARD_LOG_DIR: None, + TENSORBOARD_LOG_DIR: "tensorboard", # Define when training metrics for tensorboard should be logged. # Either after every epoch or for every training step. # Valid values: 'epoch' and 'minibatch' - TENSORBOARD_LOG_LEVEL: "epoch", + TENSORBOARD_LOG_LEVEL: "minibatch", } # init helpers diff --git a/rasa/utils/tensorflow/models.py b/rasa/utils/tensorflow/models.py index 463165de9dac..57958ac689f4 100644 --- a/rasa/utils/tensorflow/models.py +++ b/rasa/utils/tensorflow/models.py @@ -131,37 +131,42 @@ def fit( val_results = {} # validation is not performed every epoch progress_bar = tqdm(range(epochs), desc="Epochs", disable=disable) - train_steps = 0 - evaluation_steps = 0 + training_steps = 0 for epoch in progress_bar: epoch_batch_size = self.linearly_increasing_batch_size( epoch, batch_size, epochs ) - train_steps = self._batch_loop( + training_steps = self._batch_loop( train_dataset_function, tf_train_on_batch_function, epoch_batch_size, True, - train_steps, + training_steps, self.train_summary_writer, ) - postfix_dict = self._get_metric_results(epoch, self.train_summary_writer) + postfix_dict = self._get_metric_results( + tensorboard_logging=self.tensorboard_log_on_epochs, + step=epoch, + writer=self.train_summary_writer, + ) if evaluate_on_num_examples > 0: if self._should_evaluate(evaluate_every_num_epochs, epochs, epoch): - evaluation_steps = self._batch_loop( + self._batch_loop( evaluation_dataset_function, tf_evaluation_on_batch_function, epoch_batch_size, False, - evaluation_steps, - self.test_summary_writer, ) + step = epoch if self.tensorboard_log_on_epochs else training_steps val_results = self._get_metric_results( - epoch, self.test_summary_writer, prefix="val_" + step=step, + writer=self.test_summary_writer, + prefix="val_", + tensorboard_logging=True, ) postfix_dict.update(val_results) @@ -250,7 +255,7 @@ def _batch_loop( call_model_function: Callable, batch_size: int, training: bool, - offset: int, + offset: Optional[int] = 0, writer: Optional[ResourceSummaryWriter] = None, ) -> int: """Run on batches""" @@ -332,15 +337,16 @@ def evaluation_dataset_function(_batch_size: int) -> tf.data.Dataset: def _get_metric_results( self, - epoch: int, + step: int, + tensorboard_logging: bool = False, writer: Optional[ResourceSummaryWriter] = None, prefix: Optional[Text] = None, ) -> Dict[Text, Text]: """Get the metrics results""" prefix = prefix or "" - if writer is not None and self.tensorboard_log_on_epochs: - self._log_metrics_for_tensorboard(epoch, writer) + if writer is not None and tensorboard_logging: + self._log_metrics_for_tensorboard(step, writer) return { f"{prefix}{metric.name}": f"{metric.result().numpy():.3f}" From 67f1bb05b155bbd3c17e05fac5d122a292476ce8 Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Tue, 17 Mar 2020 13:40:07 +0100 Subject: [PATCH 16/20] update default values --- rasa/nlu/classifiers/diet_classifier.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py index 7af6b85c0c93..73025797d058 100644 --- a/rasa/nlu/classifiers/diet_classifier.py +++ b/rasa/nlu/classifiers/diet_classifier.py @@ -211,11 +211,11 @@ def required_components(cls) -> List[Type[Component]]: BILOU_FLAG: True, # If you want to use tensorboard to visualize training and validation metrics, # set this option to a valid output directory. - TENSORBOARD_LOG_DIR: "tensorboard", + TENSORBOARD_LOG_DIR: None, # Define when training metrics for tensorboard should be logged. # Either after every epoch or for every training step. # Valid values: 'epoch' and 'minibatch' - TENSORBOARD_LOG_LEVEL: "minibatch", + TENSORBOARD_LOG_LEVEL: "epoch", } # init helpers From 76e64ba6ce5d7bde4edd230678841b1e0bce29f0 Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Tue, 17 Mar 2020 14:51:56 +0100 Subject: [PATCH 17/20] logging on minibatches for evluation set --- rasa/utils/tensorflow/models.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/rasa/utils/tensorflow/models.py b/rasa/utils/tensorflow/models.py index 57958ac689f4..4924b3022f2c 100644 --- a/rasa/utils/tensorflow/models.py +++ b/rasa/utils/tensorflow/models.py @@ -148,9 +148,7 @@ def fit( ) postfix_dict = self._get_metric_results( - tensorboard_logging=self.tensorboard_log_on_epochs, - step=epoch, - writer=self.train_summary_writer, + step=epoch, writer=self.train_summary_writer ) if evaluate_on_num_examples > 0: @@ -160,13 +158,11 @@ def fit( tf_evaluation_on_batch_function, epoch_batch_size, False, + training_steps, + self.test_summary_writer, ) - step = epoch if self.tensorboard_log_on_epochs else training_steps val_results = self._get_metric_results( - step=step, - writer=self.test_summary_writer, - prefix="val_", - tensorboard_logging=True, + step=epoch, writer=self.test_summary_writer, prefix="val_" ) postfix_dict.update(val_results) @@ -338,14 +334,13 @@ def evaluation_dataset_function(_batch_size: int) -> tf.data.Dataset: def _get_metric_results( self, step: int, - tensorboard_logging: bool = False, writer: Optional[ResourceSummaryWriter] = None, prefix: Optional[Text] = None, ) -> Dict[Text, Text]: """Get the metrics results""" prefix = prefix or "" - if writer is not None and tensorboard_logging: + if writer is not None and self.tensorboard_log_on_epochs: self._log_metrics_for_tensorboard(step, writer) return { From 1580d3b3a3581902c80812ed797aebc50910c181 Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Tue, 17 Mar 2020 17:50:18 +0100 Subject: [PATCH 18/20] review comments --- rasa/utils/tensorflow/models.py | 48 ++++++++++++++++----------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/rasa/utils/tensorflow/models.py b/rasa/utils/tensorflow/models.py index 4924b3022f2c..36fc48dfec30 100644 --- a/rasa/utils/tensorflow/models.py +++ b/rasa/utils/tensorflow/models.py @@ -68,15 +68,16 @@ def _set_up_tensorboard_writer( self.tensorboard_log_on_epochs = tensorboard_log_level == "epoch" current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") + class_name = self.__class__.__name__ - train_log_dir = f"{tensorboard_log_dir}/{current_time}/train" - test_log_dir = f"{tensorboard_log_dir}/{current_time}/test" + train_log_dir = f"{tensorboard_log_dir}/{class_name}/{current_time}/train" + test_log_dir = f"{tensorboard_log_dir}/{class_name}/{current_time}/test" self.train_summary_writer = tf.summary.create_file_writer(train_log_dir) self.test_summary_writer = tf.summary.create_file_writer(test_log_dir) self.model_summary_file = ( - f"{tensorboard_log_dir}/{current_time}/model_summary.txt" + f"{tensorboard_log_dir}/{class_name}/{current_time}/model_summary.txt" ) def batch_loss( @@ -147,9 +148,10 @@ def fit( self.train_summary_writer, ) - postfix_dict = self._get_metric_results( - step=epoch, writer=self.train_summary_writer - ) + if self.tensorboard_log_on_epochs: + self._log_metrics_for_tensorboard(epoch, self.train_summary_writer) + + postfix_dict = self._get_metric_results() if evaluate_on_num_examples > 0: if self._should_evaluate(evaluate_every_num_epochs, epochs, epoch): @@ -161,9 +163,13 @@ def fit( training_steps, self.test_summary_writer, ) - val_results = self._get_metric_results( - step=epoch, writer=self.test_summary_writer, prefix="val_" - ) + + if self.tensorboard_log_on_epochs: + self._log_metrics_for_tensorboard( + epoch, self.test_summary_writer + ) + + val_results = self._get_metric_results(prefix="val_") postfix_dict.update(val_results) @@ -264,8 +270,9 @@ def _batch_loop( for batch_in in dataset_function(batch_size): call_model_function(batch_in) - if writer is not None and not self.tensorboard_log_on_epochs: + if not self.tensorboard_log_on_epochs: self._log_metrics_for_tensorboard(step, writer) + step += 1 return step @@ -331,18 +338,10 @@ def evaluation_dataset_function(_batch_size: int) -> tf.data.Dataset: ), ) - def _get_metric_results( - self, - step: int, - writer: Optional[ResourceSummaryWriter] = None, - prefix: Optional[Text] = None, - ) -> Dict[Text, Text]: + def _get_metric_results(self, prefix: Optional[Text] = None) -> Dict[Text, Text]: """Get the metrics results""" prefix = prefix or "" - if writer is not None and self.tensorboard_log_on_epochs: - self._log_metrics_for_tensorboard(step, writer) - return { f"{prefix}{metric.name}": f"{metric.result().numpy():.3f}" for metric in self.metrics @@ -350,12 +349,13 @@ def _get_metric_results( } def _log_metrics_for_tensorboard( - self, step: int, writer: ResourceSummaryWriter + self, step: int, writer: Optional[ResourceSummaryWriter] = None ) -> None: - with writer.as_default(): - for metric in self.metrics: - if metric.name in self.metrics_to_log: - tf.summary.scalar(f"{metric.name}", metric.result(), step=step) + if self.train_summary_writer is not None: + with writer.as_default(): + for metric in self.metrics: + if metric.name in self.metrics_to_log: + tf.summary.scalar(metric.name, metric.result(), step=step) @staticmethod def _should_evaluate( From 5d301213511fbb3ef9dab93b23d2fb9b7fb37431 Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Tue, 17 Mar 2020 18:07:03 +0100 Subject: [PATCH 19/20] add test --- tests/nlu/classifiers/test_diet_classifier.py | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tests/nlu/classifiers/test_diet_classifier.py b/tests/nlu/classifiers/test_diet_classifier.py index 4c38733607c2..3e5d3de3d903 100644 --- a/tests/nlu/classifiers/test_diet_classifier.py +++ b/tests/nlu/classifiers/test_diet_classifier.py @@ -13,6 +13,8 @@ RANKING_LENGTH, EPOCHS, MASKED_LM, + TENSORBOARD_LOG_LEVEL, + TENSORBOARD_LOG_DIR, ) from rasa.nlu.classifiers.diet_classifier import DIETClassifier from rasa.nlu.model import Interpreter @@ -300,3 +302,37 @@ async def test_set_random_seed(component_builder, tmpdir): result_b = loaded_b.parse("hello")["intent"]["confidence"] assert result_a == result_b + + +async def test_train_tensorboard_logging(component_builder, tmpdir): + import os + + tensorboard_log_dir = os.path.join(tmpdir.strpath, "tensorboard") + + assert not os.path.exists(tensorboard_log_dir) + + _config = RasaNLUModelConfig( + { + "pipeline": [ + {"name": "WhitespaceTokenizer"}, + {"name": "CountVectorsFeaturizer"}, + { + "name": "DIETClassifier", + EPOCHS: 1, + TENSORBOARD_LOG_LEVEL: "epoch", + TENSORBOARD_LOG_DIR: tensorboard_log_dir, + }, + ], + "language": "en", + } + ) + + await train( + _config, + path=tmpdir.strpath, + data="data/examples/rasa/demo-rasa-multi-intent.md", + component_builder=component_builder, + ) + + assert os.path.exists(tensorboard_log_dir) + assert os.path.exists(f"{tensorboard_log_dir}/DIET") From d750f5d775cd818a44cf84d5f81e64f96e15fac1 Mon Sep 17 00:00:00 2001 From: Tanja Bergmann Date: Wed, 18 Mar 2020 09:10:41 +0100 Subject: [PATCH 20/20] fix type, style issues --- rasa/utils/tensorflow/models.py | 27 +++++++++---------- tests/nlu/classifiers/test_diet_classifier.py | 20 +++++++++----- 2 files changed, 25 insertions(+), 22 deletions(-) diff --git a/rasa/utils/tensorflow/models.py b/rasa/utils/tensorflow/models.py index 36fc48dfec30..d4fa4f63e880 100644 --- a/rasa/utils/tensorflow/models.py +++ b/rasa/utils/tensorflow/models.py @@ -257,7 +257,7 @@ def _batch_loop( call_model_function: Callable, batch_size: int, training: bool, - offset: Optional[int] = 0, + offset: int, writer: Optional[ResourceSummaryWriter] = None, ) -> int: """Run on batches""" @@ -351,7 +351,7 @@ def _get_metric_results(self, prefix: Optional[Text] = None) -> Dict[Text, Text] def _log_metrics_for_tensorboard( self, step: int, writer: Optional[ResourceSummaryWriter] = None ) -> None: - if self.train_summary_writer is not None: + if writer is not None: with writer.as_default(): for metric in self.metrics: if metric.name in self.metrics_to_log: @@ -375,9 +375,9 @@ def batch_to_model_data_format( """Convert input batch tensors into batch data format. Batch contains any number of batch data. The order is equal to the - key-value pairs in session data. As sparse data were converted into indices, data, - shape before, this methods converts them into sparse tensors. Dense data is - kept. + key-value pairs in session data. As sparse data were converted into indices, + data, shape before, this methods converts them into sparse tensors. Dense data + is kept. """ batch_data = defaultdict(list) @@ -431,21 +431,18 @@ def _write_model_summary(self): ) layers = [ f"{layer.name} ({layer.dtype.name}) " - f"[{'x'.join([str(s) for s in layer.shape])}]" + f"[{'x'.join(str(s) for s in layer.shape)}]" for layer in self.trainable_variables ] layers.reverse() - file = open(self.model_summary_file, "w") - - file.write("Variables: name (type) [shape]\n\n") - for layer in layers: - file.write(layer) + with open(self.model_summary_file, "w") as file: + file.write("Variables: name (type) [shape]\n\n") + for layer in layers: + file.write(layer) + file.write("\n") file.write("\n") - file.write("\n") - file.write(f"Total size of variables: {total_number_of_variables}") - - file.close() + file.write(f"Total size of variables: {total_number_of_variables}") def compile(self, *args, **kwargs) -> None: raise Exception( diff --git a/tests/nlu/classifiers/test_diet_classifier.py b/tests/nlu/classifiers/test_diet_classifier.py index 3e5d3de3d903..587f8de6a054 100644 --- a/tests/nlu/classifiers/test_diet_classifier.py +++ b/tests/nlu/classifiers/test_diet_classifier.py @@ -15,6 +15,8 @@ MASKED_LM, TENSORBOARD_LOG_LEVEL, TENSORBOARD_LOG_DIR, + EVAL_NUM_EPOCHS, + EVAL_NUM_EXAMPLES, ) from rasa.nlu.classifiers.diet_classifier import DIETClassifier from rasa.nlu.model import Interpreter @@ -305,11 +307,11 @@ async def test_set_random_seed(component_builder, tmpdir): async def test_train_tensorboard_logging(component_builder, tmpdir): - import os + from pathlib import Path - tensorboard_log_dir = os.path.join(tmpdir.strpath, "tensorboard") + tensorboard_log_dir = Path(tmpdir.strpath) / "tensorboard" - assert not os.path.exists(tensorboard_log_dir) + assert not tensorboard_log_dir.exists() _config = RasaNLUModelConfig( { @@ -318,9 +320,11 @@ async def test_train_tensorboard_logging(component_builder, tmpdir): {"name": "CountVectorsFeaturizer"}, { "name": "DIETClassifier", - EPOCHS: 1, + EPOCHS: 3, TENSORBOARD_LOG_LEVEL: "epoch", - TENSORBOARD_LOG_DIR: tensorboard_log_dir, + TENSORBOARD_LOG_DIR: str(tensorboard_log_dir), + EVAL_NUM_EXAMPLES: 15, + EVAL_NUM_EPOCHS: 1, }, ], "language": "en", @@ -334,5 +338,7 @@ async def test_train_tensorboard_logging(component_builder, tmpdir): component_builder=component_builder, ) - assert os.path.exists(tensorboard_log_dir) - assert os.path.exists(f"{tensorboard_log_dir}/DIET") + assert tensorboard_log_dir.exists() + + all_files = list(tensorboard_log_dir.rglob("*.*")) + assert len(all_files) == 3