google · tvmarino · Dec 4, 2024 · Nov 20, 2024 · Nov 21, 2024 · Nov 21, 2024
diff --git a/compiler_opt/rl/generate_bc_trajectories.py b/compiler_opt/rl/generate_bc_trajectories.py
@@ -16,12 +16,10 @@
 
 import concurrent.futures
 import contextlib
-import functools
 import gin
 from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Generator, Union
 import json
 
-from absl import app
 from absl import flags
 from absl import logging
 import bisect
@@ -40,7 +38,6 @@
 from tf_agents.trajectories import policy_step
 from tf_agents.trajectories import time_step
 from tf_agents.specs import tensor_spec
-from tf_agents.system import system_multiprocessing as multiprocessing
 
 from compiler_opt.rl import corpus
 from compiler_opt.rl import env
@@ -341,6 +338,8 @@ def __init__(
           'reward_key not specified in ModuleExplorer initialization.')
     self._reward_key = reward_key
     kwargs.pop('reward_key', None)
+    gin_config_str = kwargs.pop('gin_config_str', None)
+    gin.parse_config(gin_config_str)
     self._working_dir = None
 
     self._env = env.MLGOEnvironmentBase(
@@ -742,7 +741,7 @@ def __init__(
       clang_path: str = gin.REQUIRED,
       mlgo_task_type: Type[env.MLGOTask] = gin.REQUIRED,
       policy_paths: List[Optional[str]] = [],
-      exploration_frac: float = gin.REQUIRED,
+      exploration_frac: float = 1.0,
       max_exploration_steps: int = 7,
       callable_policies: List[Optional[Callable[[Any], np.ndarray]]] = [],
       exploration_policy_paths: Optional[str] = None,
@@ -869,7 +868,7 @@ def select_best_exploration(
 def gen_trajectories(
     #  pylint: disable=dangerous-default-value
     data_path: str = gin.REQUIRED,
-    delete_flags: Tuple[str, ...] = gin.REQUIRED,
+    delete_flags: Tuple[str, ...] = (),
     output_file_name: str = gin.REQUIRED,
     output_path: str = gin.REQUIRED,
     mlgo_task_type: Type[env.MLGOTask] = gin.REQUIRED,
@@ -933,6 +932,7 @@ def gen_trajectories(
       obs_action_specs=obs_action_spec,
       mlgo_task_type=mlgo_task_type,
       callable_policies=callable_policies,
+      gin_config_str=gin.config_str(),
   ) as lwm:
 
     _, result_futures = buffered_scheduler.schedule_on_worker_pool(
@@ -1013,15 +1013,3 @@ def gen_trajectories(
       modules_processed,
       time_compiler_calls,
   )
-
-
-def main(_):
-  gin.parse_config_files_and_bindings(
-      FLAGS.gin_files, bindings=FLAGS.gin_bindings, skip_unknown=True)
-  logging.info(gin.config_str())
-
-  gen_trajectories()
-
-
-if __name__ == '__main__':
-  multiprocessing.handle_main(functools.partial(app.run, main))
diff --git a/compiler_opt/rl/generate_bc_trajectories_main.py b/compiler_opt/rl/generate_bc_trajectories_main.py
@@ -0,0 +1,43 @@
+# coding=utf-8
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Module for running compilation and collect data for behavior cloning."""
+
+import functools
+from absl import app
+from absl import flags
+from absl import logging
+import gin
+
+from compiler_opt.rl import generate_bc_trajectories
+from compiler_opt.tools import generate_test_model  # pylint:disable=unused-import
+
+from tf_agents.system import system_multiprocessing as multiprocessing
+
+flags.FLAGS['gin_files'].allow_override = True
+flags.FLAGS['gin_bindings'].allow_override = True
+
+FLAGS = flags.FLAGS
+
+
+def main(_):
+  gin.parse_config_files_and_bindings(
+      FLAGS.gin_files, bindings=FLAGS.gin_bindings, skip_unknown=True)
+  logging.info(gin.config_str())
+
+  generate_bc_trajectories.gen_trajectories()
+
+
+if __name__ == '__main__':
+  multiprocessing.handle_main(functools.partial(app.run, main))