-Original file line number
+Diff line change
@@ Expand Up / @@ -76,6 +76,7 @@ worker_config: @@
           - 2048
           - 256
         print_iter_log: true
+        trust_remote_code: true
         kv_cache_config:
           enable_block_reuse: false
           free_gpu_memory_fraction: 0.8
@@ Expand All / @@ -102,6 +103,7 @@ worker_config: @@
         enable_attention_dp: true
         pipeline_parallel_size: 1
         print_iter_log: true
+        trust_remote_code: true
         cuda_graph_config: null
         disable_overlap_scheduler: true
         kv_cache_config:
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -52,6 +52,7 @@ for concurrency in ${concurrency_list}; do @@
             --dataset-path ${dataset_file} \
             --num-prompts ${num_prompts} \
             --max-concurrency ${concurrency} \
+            --trust-remote-code \
             --ignore-eos \
             --no-test-input \
             --save-result \
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -75,6 +75,7 @@ for concurrency in ${concurrency_list}; do @@
             --dataset-name random \
             --num-prompts "${num_prompts}" \
             --max-concurrency "${concurrency}" \
+            --trust-remote-code \
             --ignore-eos \
             --random-input-len "${input_seq_len}" \
             --random-output-len "${output_seq_len}" \
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -105,8 +105,13 @@ def submit_job(config): @@
         log_base = os.path.join(env_config['work_dir'], f"{isl}-{osl}")
         # Get eplb num_slots for gen worker
-        eplb_num_slots = (config['worker_config']['gen'].get('moe_config', {}).get(
-            'load_balancer', {}).get('num_slots', 0))
+        load_balancer_config = config['worker_config']['gen'].get(
+            'moe_config', {}).get('load_balancer', {})
+        if isinstance(load_balancer_config, str):
+            with open(load_balancer_config, 'r') as f:
+                load_balancer_config = yaml.safe_load(f)
+        eplb_num_slots = load_balancer_config.get('num_slots', 0)
         # Determine directory suffix based on attention_dp
         if gen_enable_attention_dp:
             dir_suffix = f"ctx{ctx_num}_gen{gen_num}_dep{gen_tp_size}_batch{gen_batch_size}_eplb{eplb_num_slots}_mtp{mtp_size}"
@@ Expand Down @@

tensorrt_llm/executor/result.py

-Original file line number
+Diff line change
@@ Expand Up @@
                                 beam_output.finish_reason = 'stop'
                                 beam_output.stop_reason = stop_reason
-                                self.abort()
                                 self._done = True
                                 break
@@ Expand Down @@

[https://nvbugs/5687820][fix] Remove self.abort() in DetokenizedGenerationResult #9449

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged

syuoni merged 1 commit into NVIDIA:main from syuoni:fix-result

Nov 27, 2025

+11 −3

-Original file line number
+Diff line change
@@ Expand Up / @@ -76,6 +76,7 @@ worker_config: @@
           - 2048
           - 256
         print_iter_log: true
+        trust_remote_code: true
         kv_cache_config:
           enable_block_reuse: false
           free_gpu_memory_fraction: 0.8
@@ Expand All / @@ -102,6 +103,7 @@ worker_config: @@
         enable_attention_dp: true
         pipeline_parallel_size: 1
         print_iter_log: true
+        trust_remote_code: true
         cuda_graph_config: null
         disable_overlap_scheduler: true
         kv_cache_config:
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -52,6 +52,7 @@ for concurrency in ${concurrency_list}; do @@
             --dataset-path ${dataset_file} \
             --num-prompts ${num_prompts} \
             --max-concurrency ${concurrency} \
+            --trust-remote-code \
             --ignore-eos \
             --no-test-input \
             --save-result \
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -75,6 +75,7 @@ for concurrency in ${concurrency_list}; do @@
             --dataset-name random \
             --num-prompts "${num_prompts}" \
             --max-concurrency "${concurrency}" \
+            --trust-remote-code \
             --ignore-eos \
             --random-input-len "${input_seq_len}" \
             --random-output-len "${output_seq_len}" \
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -105,8 +105,13 @@ def submit_job(config): @@
         log_base = os.path.join(env_config['work_dir'], f"{isl}-{osl}")
         # Get eplb num_slots for gen worker
-        eplb_num_slots = (config['worker_config']['gen'].get('moe_config', {}).get(
-            'load_balancer', {}).get('num_slots', 0))
+        load_balancer_config = config['worker_config']['gen'].get(
+            'moe_config', {}).get('load_balancer', {})
+        if isinstance(load_balancer_config, str):
+            with open(load_balancer_config, 'r') as f:
+                load_balancer_config = yaml.safe_load(f)
+        eplb_num_slots = load_balancer_config.get('num_slots', 0)
         # Determine directory suffix based on attention_dp
         if gen_enable_attention_dp:
             dir_suffix = f"ctx{ctx_num}_gen{gen_num}_dep{gen_tp_size}_batch{gen_batch_size}_eplb{eplb_num_slots}_mtp{mtp_size}"
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up @@
                                 beam_output.finish_reason = 'stop'
                                 beam_output.stop_reason = stop_reason
-                                self.abort()
                                 self._done = True
                                 break
@@ Expand Down @@

Provide feedback