From 1ef1ea73f0791f01f332313a245fccbc0ae109c8 Mon Sep 17 00:00:00 2001
From: Brian Yu <bxyu@nvidia.com>
Date: Sat, 27 Sep 2025 09:56:50 -0700
Subject: [PATCH 1/4] clean

Signed-off-by: Brian Yu <bxyu@nvidia.com>
---
 resources_servers/comp_coding/README.md       |  68 +----
 .../comp_coding/scripts/build_examples.py     | 259 ------------------
 .../comp_coding/scripts/validate_dataset.py   | 236 ----------------
 3 files changed, 7 insertions(+), 556 deletions(-)
 delete mode 100644 resources_servers/comp_coding/scripts/build_examples.py
 delete mode 100644 resources_servers/comp_coding/scripts/validate_dataset.py

diff --git a/resources_servers/comp_coding/README.md b/resources_servers/comp_coding/README.md
index 5c94b2bba..2f354c5e5 100644
--- a/resources_servers/comp_coding/README.md
+++ b/resources_servers/comp_coding/README.md
@@ -2,7 +2,9 @@
 
 ### Overview
 Verifies competitive programming solutions by executing submitted code against unit tests. The server consumes agent trajectories and returns a reward based on whether the assistant's code produces the correct outputs for given test inputs.
-Model registry link: https://gitlab-master.nvidia.com/bxyu/nemo-gym/-/ml/models/53#/
+Model registry link:
+- Train: https://gitlab-master.nvidia.com/bxyu/nemo-gym/-/ml/models/80#/
+- Validation: https://gitlab-master.nvidia.com/bxyu/nemo-gym/-/ml/models/76#/
 
 ### Input schema
 - `responses_create_params`: OpenAI Responses create params
@@ -11,64 +13,21 @@ Model registry link: https://gitlab-master.nvidia.com/bxyu/nemo-gym/-/ml/models/
     - `unit_tests` (required): dict with `inputs` and `outputs` arrays containing test cases.
       - `inputs`: list of strings representing stdin input for each test case
       - `outputs`: list of strings representing expected stdout output for each test case
-    - `problem_id` (optional): unique identifier for the problem
 
 **Notes**
 - All test cases must pass for a solution to receive a reward of 1.0
 - Failed test cases result in a reward of 0.0 with detailed error information
 
 ### Test execution (for now)
-- Code is executed using Python's `exec()` function in a controlled environment
-- Each test case runs with redirected stdin/stdout:
-  - `stdin` is populated with the test input
-  - `stdout` is captured for comparison with expected output
-- Available built-ins include common functions: `input`, `print`, `range`, `len`, `int`, `str`, `list`, etc.
-- Newlines in test data are properly handled (converts `\\n` to actual newlines)
-
-### Example dataset row
-```json
-{
-    "responses_create_params": {
-        "input": [
-            {
-                "role": "user",
-                "content": "You are an expert competitive programmer. You will be given a problem statement and must output a complete Python solution that reads from stdin and writes to stdout.\n\nPolycarp has $n$ different binary words. A word called binary if it contains only characters '0' and '1'. For example, these words are binary: \"0001\", \"11\", \"0\" and \"0011100\".\n\nPolycarp wants to offer his set of $n$ binary words to play a game \"words\". In this game, players name words and each next word (starting from the second) must start with the last character of the previous word. The first word can be any. For example, these sequence of words can be named during the game: \"0101\", \"1\", \"10\", \"00\", \"00001\".\n\nWord reversal is the operation of reversing the order of the characters. For example, the word \"0111\" after the reversal becomes \"1110\", the word \"11010\" after the reversal becomes \"01011\".\n\nProbably, Polycarp has such a set of words that there is no way to put them in the order correspondent to the game rules. In this situation, he wants to reverse some words from his set so that:  the final set of $n$ words still contains different words (i.e. all words are unique);  there is a way to put all words of the final set of words in the order so that the final sequence of $n$ words is consistent with the game rules. \n\nPolycarp wants to reverse minimal number of words. Please, help him.\n\n\n-----Input-----\n\nThe first line of the input contains one integer $t$ ($1 \\le t \\le 10^4$) — the number of test cases in the input. Then $t$ test cases follow.\n\nThe first line of a test case contains one integer $n$ ($1 \\le n \\le 2\\cdot10^5$) — the number of words in the Polycarp's set. Next $n$ lines contain these words. All of $n$ words aren't empty and contains only characters '0' and '1'. The sum of word lengths doesn't exceed $4\\cdot10^6$. All words are different.\n\nGuaranteed, that the sum of $n$ for all test cases in the input doesn't exceed $2\\cdot10^5$. Also, guaranteed that the sum of word lengths for all test cases in the input doesn't exceed $4\\cdot10^6$.\n\n\n-----Output-----\n\nPrint answer for all of $t$ test cases in the order they appear.\n\nIf there is no answer for the test case, print -1. Otherwise, the first line of the output should contain $k$ ($0 \\le k \\le n$) — the minimal number of words in the set which should be reversed. The second line of the output should contain $k$ distinct integers — the indexes of the words in the set which should be reversed. Words are numerated from $1$ to $n$ in the order they appear. If $k=0$ you can skip this line (or you can print an empty line). If there are many answers you can print any of them.\n\n\n-----Example-----\nInput\n4\n4\n0001\n1000\n0011\n0111\n3\n010\n101\n0\n2\n00000\n00001\n4\n01\n001\n0001\n00001\n\nOutput\n1\n3 \n-1\n0\n\n2\n1 2"
-            }
-        ]
-    },
-    "verifier_metadata": {
-        "problem_id": "c69268d8bdb4da0685d7b187c88296c1",
-        "unit_tests": {
-            "inputs": ["4\n4\n0001\n1000\n0011\n0111\n3\n010\n101\n0\n2\n00000\n00001\n4\n01\n001\n0001\n00001\n"],
-            "outputs": ["1\n3 \n-1\n0\n\n2\n1 2 \n"]
-        }
-    }
-}
-```
+We use the LiveCodeBench execution code.
 
 ### Example of rollouts and usage
 
 ```bash
-config_paths="responses_api_agents/simple_agent/configs/simple_agent.yaml,\
-responses_api_models/openai_model/configs/openai_model.yaml,\
-resources_servers/comp_coding/configs/comp_coding.yaml"
-
 # Running the server
-ng_run "+config_paths=[$config_paths]" \
-    +simple_agent.responses_api_agents.simple_agent.resources_server.name=comp_coding
-
-# Prepare example data for validation
-ng_prepare_data "+config_paths=[$config_paths]" \
-    +output_dirpath=resources_servers/comp_coding/data/ \
-    +mode=example_validation
-
-# Download train data from gitlab model registry
-ng_download_dataset_from_gitlab \
-    +dataset_name=comp_coding \
-    +version=0.1.1 \
-    +run_id=5a1167ef-3533-486f-9c0e-49d1e97fc887 \
-    +artifact_fpath=train.jsonl \
-    +output_fpath=resources_servers/comp_coding/data/train.jsonl
+config_paths="responses_api_models/openai_model/configs/openai_model.yaml,\
+resources_servers/comp_coding/configs/comp_coding.yaml"
+ng_run "+config_paths=[$config_paths]"
 
 # Collect rollouts from example problems
 ng_collect_rollouts +agent_name=comp_coding_simple_agent \
@@ -77,18 +36,5 @@ ng_collect_rollouts +agent_name=comp_coding_simple_agent \
     +limit=null
 ```
 
-### Optional data preperation/validation scripts
-
-```bash
-# Build training dataset from collected examples
-uv run python resources_servers/comp_coding/scripts/build_examples.py \
-    --out resources_servers/comp_coding/data/train.jsonl \
-    --split train[:5000]
-
-# Validate and pre-process train dataset
-uv run python resources_servers/comp_coding/scripts/validate_dataset.py \
-    --in data/comp_coding/train.jsonl --fail-fast
-```
-
 ## Licensing information
 Apache 2.0
diff --git a/resources_servers/comp_coding/scripts/build_examples.py b/resources_servers/comp_coding/scripts/build_examples.py
deleted file mode 100644
index 8cc0d3044..000000000
--- a/resources_servers/comp_coding/scripts/build_examples.py
+++ /dev/null
@@ -1,259 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-r"""
-Build N rows of train/validation/example data from either:
-  1) a Hugging Face dataset (streaming), or
-  2) a local JSONL file (optionally .gz) with rows shaped like:
-     {
-       "hash_id": "...",
-       "question": "...",
-       "unit_tests": "{\"inputs\": [...], \"outputs\": [...]}",
-       ...
-     }
-     (The script tolerates both proper JSON and Python-dict-style lines with single quotes.)
-
-Each output row conforms to NeMo-Gym dataset requirements:
-- responses_create_params: OpenAI Responses-compatible input
-- verifier_metadata.unit_tests: {inputs: [...], outputs: [...]} (strings)
-
-Usage examples:
-  # From HF dataset (default):
-  uv run python resources_servers/comp_coding/scripts/build_examples.py \
-    --out resources_servers/comp_coding/data/example.jsonl \
-    --count 5000
-
-  # From a local JSONL:
-  uv run python resources_servers/comp_coding/scripts/build_examples.py \
-    --in-jsonl /path/to/source.jsonl \
-    --out resources_servers/comp_coding/data/example.jsonl \
-    --count 5000
-
-  # Pretty sample and gzip output:
-  uv run python resources_servers/comp_coding/scripts/build_examples.py \
-    --in-jsonl /path/to/source.jsonl.gz \
-    --out resources_servers/comp_coding/data/example.jsonl.gz \
-    --count 5000 \
-    --pretty-sample resources_servers/comp_coding/data/sample.json \
-    --pretty-k 10
-
-Sanity checks after writing:
-  jq -c . resources_servers/comp_coding/data/example.jsonl | wc -l
-  head -n 3 resources_servers/comp_coding/data/example.jsonl | jq .
-  awk 'NR==1{print; exit}' resources_servers/comp_coding/data/example.jsonl | tr -d '\n' | wc -c
-  grep -nP '\x{2028}|\x{2029}' resources_servers/comp_coding/data/example.jsonl || echo "No LS/PS found"
-"""
-
-import argparse
-import ast
-import gzip
-import json
-import re
-from itertools import islice
-from typing import Any, Dict, Iterable, List, Optional
-
-from datasets import load_dataset
-from tqdm import tqdm
-
-
-SYSTEM_PREFIX = (
-    "You are an expert competitive programmer. You will be given a problem statement "
-    "and must output a complete Python solution that reads from stdin and writes to stdout."
-)
-
-CODEFENCE_RE = re.compile(r"^```(?:\w+)?\s*|\s*```$", re.MULTILINE)
-
-
-def _strip_codefences(s: str) -> str:
-    return CODEFENCE_RE.sub("", s).strip()
-
-
-def _normalize_scalar(s: Any) -> str:
-    s = "" if s is None else str(s)
-    s = s.replace("\r\n", "\n").replace("\r", "\n")
-    return s.rstrip("\n").strip()
-
-
-def _normalize_list(lst: Any) -> List[str]:
-    if lst is None:
-        return []
-    if isinstance(lst, (str, bytes)):
-        lst = [lst]
-    if not isinstance(lst, list):
-        return []
-    out = []
-    for v in lst:
-        sv = _normalize_scalar(v)
-        sv = _strip_codefences(sv)
-        out.append(sv)
-    return out
-
-
-def _safe_literal_eval(s: str) -> Any:
-    try:
-        return ast.literal_eval(s)
-    except Exception:
-        return None
-
-
-def _parse_unit_tests(raw: Any) -> Dict[str, List[str]]:
-    parsed: Dict[str, Any] = {}
-    if isinstance(raw, dict):
-        parsed = raw
-    elif isinstance(raw, str) and raw.strip():
-        try:
-            parsed = json.loads(raw)
-        except Exception:
-            maybe = _safe_literal_eval(raw)
-            if isinstance(maybe, dict):
-                parsed = maybe
-            else:
-                parsed = {}
-    else:
-        parsed = {}
-
-    return {
-        "inputs": _normalize_list(parsed.get("inputs", [])),
-        "outputs": _normalize_list(parsed.get("outputs", [])),
-    }
-
-
-def make_row(q: str, unit_tests: Dict[str, List[str]], problem_id: Optional[str] = None) -> dict:
-    q_norm = _normalize_scalar(q)
-    return {
-        "responses_create_params": {"input": [{"role": "user", "content": f"{SYSTEM_PREFIX}\n\n{q_norm}"}]},
-        "verifier_metadata": {
-            "problem_id": _normalize_scalar(problem_id) if problem_id is not None else None,
-            "unit_tests": {
-                "inputs": unit_tests.get("inputs", []),
-                "outputs": unit_tests.get("outputs", []),
-            },
-        },
-    }
-
-
-def _open_out(path: str):
-    if path.endswith(".gz"):
-        return gzip.open(path, "wt", encoding="utf-8")
-    return open(path, "w", encoding="utf-8")
-
-
-def _open_in(path: str):
-    if path.endswith(".gz"):
-        return gzip.open(path, "rt", encoding="utf-8")
-    return open(path, "r", encoding="utf-8")
-
-
-def json_safe_dumps(obj: dict) -> str:
-    """
-    Dump JSON compactly and escape problematic Unicode line separators.
-    Ensures no raw U+2028/U+2029 appear in output (they become \\u2028/\\u2029).
-    """
-    s = json.dumps(obj, ensure_ascii=False, separators=(",", ":"))
-    # Escape LS (U+2028) and PS (U+2029)
-    return s.replace("\u2028", "\\u2028").replace("\u2029", "\\u2029")
-
-
-def stream_dataset(ds_name: str, split: str = "train") -> Iterable[dict]:
-    ds = load_dataset(ds_name, split=split, streaming=True)
-    for ex in ds:
-        yield ex
-
-
-def _parse_jsonl_line(line: str) -> Optional[dict]:
-    """
-    Robustly parse a JSONL line that might be:
-    - proper JSON (double quotes), or
-    - a Python dict-like string with single quotes.
-
-    Returns a dict or None if it can't be parsed.
-    """
-    s = line.strip()
-    if not s:
-        return None
-    # Try JSON first
-    try:
-        obj = json.loads(s)
-        if isinstance(obj, dict):
-            return obj
-    except Exception:
-        pass
-    # Try Python literal (single-quoted dicts, etc.)
-    maybe = _safe_literal_eval(s)
-    if isinstance(maybe, dict):
-        return maybe
-    return None
-
-
-def stream_jsonl(path: str) -> Iterable[dict]:
-    with _open_in(path) as f:
-        for raw in f:
-            obj = _parse_jsonl_line(raw)
-            if obj is None:
-                continue
-            yield obj
-
-
-def main():
-    ap = argparse.ArgumentParser()
-    ap.add_argument("--out", required=True, help="Output .jsonl or .jsonl.gz")
-    ap.add_argument("--count", type=int, default=5000, help="Number of rows to write")
-    ap.add_argument("--split", default="train", help="HF split name (default: train)")
-    ap.add_argument("--pretty-sample", default=None, help="Optional pretty JSON of first K rows")
-    ap.add_argument("--pretty-k", type=int, default=10, help="How many rows to pretty-print")
-    ap.add_argument("--ds-name", default="Nexusflow/comp_prog_filtered_no_function")
-    ap.add_argument(
-        "--in-jsonl",
-        default=None,
-        help="Optional input JSONL (.jsonl or .jsonl.gz). If provided, overrides HF dataset input.",
-    )
-    args = ap.parse_args()
-
-    rows_for_pretty = []
-    total = 0
-
-    # Choose input stream
-    if args.in_jsonl:
-        source_iter: Iterable[dict] = stream_jsonl(args.in_jsonl)
-    else:
-        source_iter = stream_dataset(args.ds_name, args.split)
-
-    with _open_out(args.out) as f:
-        for ex in tqdm(islice(source_iter, args.count), total=args.count):
-            q = ex.get("question", "")
-            raw_ut = ex.get("unit_tests", {}) or {}
-            ut = _parse_unit_tests(raw_ut)
-            pid = ex.get("hash_id")
-
-            row = make_row(q, ut, pid)
-            f.write(json_safe_dumps(row) + "\n")
-
-            if args.pretty_sample and len(rows_for_pretty) < args.pretty_k:
-                rows_for_prety_limit = args.pretty_k  # alias to clarify intent
-                if len(rows_for_pretty) < rows_for_prety_limit:
-                    rows_for_pretty.append(row)
-
-            total += 1
-
-    if args.pretty_sample and rows_for_pretty:
-        with open(args.pretty_sample, "w", encoding="utf-8") as ps:
-            json.dump(rows_for_pretty, ps, ensure_ascii=False, indent=2)
-
-    print(f"wrote {total} rows to {args.out}")
-    if args.pretty_sample:
-        print(f"wrote pretty sample ({len(rows_for_pretty)} rows) to {args.pretty_sample}")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/resources_servers/comp_coding/scripts/validate_dataset.py b/resources_servers/comp_coding/scripts/validate_dataset.py
deleted file mode 100644
index bcd3280c2..000000000
--- a/resources_servers/comp_coding/scripts/validate_dataset.py
+++ /dev/null
@@ -1,236 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-r"""
-Validate and (optionally) normalize comp_coding JSONL datasets **before** runtime.
-
-What it does:
-- Ensures each row has:
-  - responses_create_params.input (non-empty)
-  - verifier_metadata.unit_tests with:
-      - "inputs":  list[str] (non-empty)
-      - "outputs": list[str] (same length as inputs)
-- Optionally coerces stringified unit_tests into a dict (e.g., when stored as JSON string)
-- Optionally normalizes newlines by converting literal "\\n" to "\n" (inputs/outputs)
-- Can write out a cleaned JSONL (dropping bad rows or failing fast)
-
-Usage:
-  uv run python resources_servers/comp_coding/scripts/validate_dataset.py \
-    --in data/comp_coding/train.jsonl --fail-fast
-
-  uv run python resources_servers/comp_coding/scripts/validate_dataset.py \
-    --in data/comp_coding/train.jsonl \
-    --out data/comp_coding/train.cleaned.jsonl \
-    --autofix --normalize-newlines --drop-bad
-
-CLI flags:
-  --in PATH [--in PATH ...]         One or more JSONL files to validate
-  --out PATH                        Where to write a cleaned JSONL (optional)
-  --autofix                         Try to parse stringified unit_tests to dict
-  --normalize-newlines              Replace literal "\\n" with "\n" in tests
-  --fail-fast                       Stop at first error (default: keep scanning)
-  --drop-bad                        When --out is set, skip invalid rows instead of failing
-  --pretty-sample PATH              Write a small pretty-printed sample (first 100 ok rows)
-
-Exit codes:
-  0 on success (or successful write when --out provided)
-  1 on validation error (unless --drop-bad used and output written)
-"""
-
-import argparse
-import json
-from pathlib import Path
-from typing import Any, Dict, List, Tuple
-
-
-def _is_list_of_str(x: Any) -> bool:
-    return isinstance(x, list) and all(isinstance(s, str) for s in x)
-
-
-def _maybe_parse_unit_tests(ut: Any, autofix: bool) -> Dict[str, Any]:
-    if isinstance(ut, dict):
-        return ut
-    if isinstance(ut, str) and autofix:
-        # Try strict JSON first
-        try:
-            parsed = json.loads(ut)
-            if isinstance(parsed, dict):
-                return parsed
-        except Exception:
-            pass
-        # Try lenient: strip whitespace and single quotes
-        try:
-            s = ut.strip().replace("'", '"')
-            parsed = json.loads(s)
-            if isinstance(parsed, dict):
-                return parsed
-        except Exception:
-            pass
-    raise ValueError("unit_tests must be a dict (or a JSON string if --autofix).")
-
-
-def _normalize_newlines_in_tests(ut: Dict[str, Any]) -> Dict[str, Any]:
-    def fix(s: str) -> str:
-        return s.replace("\\n", "\n")
-
-    inputs = ut.get("inputs", [])
-    outputs = ut.get("outputs", [])
-    if isinstance(inputs, list):
-        inputs = [fix(s) if isinstance(s, str) else s for s in inputs]
-    if isinstance(outputs, list):
-        outputs = [fix(s) if isinstance(s, str) else s for s in outputs]
-    ut["inputs"] = inputs
-    ut["outputs"] = outputs
-    return ut
-
-
-def _validate_unit_tests(ut: Dict[str, Any]) -> Tuple[bool, str]:
-    inputs = ut.get("inputs")
-    outputs = ut.get("outputs")
-    if not _is_list_of_str(inputs):
-        return False, "unit_tests.inputs must be list[str] and non-empty"
-    if not _is_list_of_str(outputs):
-        return False, "unit_tests.outputs must be list[str]"
-    if len(inputs) == 0:
-        return False, "unit_tests.inputs cannot be empty"
-    if len(inputs) != len(outputs):
-        return False, f"inputs/outputs length mismatch: {len(inputs)} vs {len(outputs)}"
-    return True, "ok"
-
-
-def _validate_row(
-    row: Dict[str, Any], idx: int, autofix: bool, normalize_newlines: bool
-) -> Tuple[bool, Dict[str, Any], str]:
-    # responses_create_params sanity
-    rcp = row.get("responses_create_params")
-    if not isinstance(rcp, dict):
-        return False, row, "missing responses_create_params"
-    input_blocks = rcp.get("input")
-    if not isinstance(input_blocks, list) or len(input_blocks) == 0:
-        return False, row, "responses_create_params.input must be a non-empty list"
-
-    # unit_tests presence + structure
-    vm = row.get("verifier_metadata")
-    if not isinstance(vm, dict):
-        return False, row, "missing verifier_metadata"
-    if "unit_tests" not in vm:
-        return False, row, "missing verifier_metadata.unit_tests"
-
-    try:
-        ut = _maybe_parse_unit_tests(vm["unit_tests"], autofix=autofix)
-    except Exception as e:
-        return False, row, f"unit_tests parse error: {e}"
-
-    if normalize_newlines:
-        ut = _normalize_newlines_in_tests(ut)
-
-    ok, msg = _validate_unit_tests(ut)
-    if not ok:
-        return False, row, msg
-
-    # If we fixed ut, write it back normalized
-    vm["unit_tests"] = {"inputs": ut["inputs"], "outputs": ut["outputs"]}
-    row["verifier_metadata"] = vm
-    return True, row, "ok"
-
-
-def main():
-    ap = argparse.ArgumentParser()
-    ap.add_argument("--in", dest="inputs", nargs="+", required=True)
-    ap.add_argument("--out", dest="out", type=str, default=None)
-    ap.add_argument("--autofix", action="store_true")
-    ap.add_argument("--normalize-newlines", action="store_true")
-    ap.add_argument("--fail-fast", action="store_true")
-    ap.add_argument("--drop-bad", action="store_true")
-    ap.add_argument("--pretty-sample", type=str, default=None)
-    args = ap.parse_args()
-
-    in_paths = [Path(p) for p in args.inputs]
-    out_path = Path(args.out) if args.out else None
-    sample_path = Path(args.pretty_sample) if args.pretty_sample else None
-
-    if out_path:
-        out_path.parent.mkdir(parents=True, exist_ok=True)
-    if sample_path:
-        sample_path.parent.mkdir(parents=True, exist_ok=True)
-
-    total = 0
-    ok_count = 0
-    bad_count = 0
-    written = 0
-    sample: List[Dict[str, Any]] = []
-
-    out_f = open(out_path, "w", encoding="utf-8") if out_path else None
-    try:
-        for in_file in in_paths:
-            with open(in_file, "r", encoding="utf-8") as f:
-                for line_idx, line in enumerate(f, start=1):
-                    line = line.strip()
-                    if not line:
-                        continue
-                    total += 1
-                    try:
-                        row = json.loads(line)
-                    except Exception as e:
-                        bad_count += 1
-                        msg = f"{in_file}:{line_idx} invalid JSON: {e}"
-                        if args.fail_fast:
-                            raise SystemExit(msg)
-                        else:
-                            print("ERROR:", msg)
-                            continue
-
-                    ok, fixed, msg = _validate_row(
-                        row,
-                        total,
-                        autofix=args.autofix,
-                        normalize_newlines=args.normalize_newlines,
-                    )
-                    if ok:
-                        ok_count += 1
-                        if out_f:
-                            out_f.write(json.dumps(fixed, ensure_ascii=False) + "\n")
-                            written += 1
-                        if len(sample) < 100 and sample_path:
-                            sample.append(fixed)
-                    else:
-                        bad_count += 1
-                        if args.fail_fast and not args.drop_bad:
-                            raise SystemExit(f"{in_file}:{line_idx} {msg}")
-                        print("ERROR:", f"{in_file}:{line_idx}", msg)
-                        if out_f and args.drop_bad:
-                            # skip writing this row
-                            pass
-                        elif out_f and not args.drop_bad:
-                            # fail the whole run if we plan to produce a cleaned file but don’t drop bad rows
-                            raise SystemExit(
-                                f"Refusing to write invalid row without --drop-bad: {in_file}:{line_idx} {msg}"
-                            )
-
-        if sample_path and sample:
-            with open(sample_path, "w", encoding="utf-8") as s:
-                json.dump(sample, s, ensure_ascii=False, indent=2)
-
-        print(f"Scanned rows: {total} | OK: {ok_count} | Bad: {bad_count}")
-        if out_f:
-            print(f"Wrote cleaned rows: {written} -> {out_path}")
-        if bad_count and not (out_f and args.drop_bad):
-            raise SystemExit(1)
-    finally:
-        if out_f:
-            out_f.close()
-
-
-if __name__ == "__main__":
-    main()

From d7b9f494ed7147d31cf40cda7b58bc9c5dd12fec Mon Sep 17 00:00:00 2001
From: Brian Yu <bxyu@nvidia.com>
Date: Mon, 29 Sep 2025 10:34:55 -0700
Subject: [PATCH 2/4] set soft limit

Signed-off-by: Brian Yu <bxyu@nvidia.com>
---
 .../lcb_integration/testing_util.py           | 20 +++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/resources_servers/comp_coding/lcb_integration/testing_util.py b/resources_servers/comp_coding/lcb_integration/testing_util.py
index a9b7c0dbb..522d7a3d2 100644
--- a/resources_servers/comp_coding/lcb_integration/testing_util.py
+++ b/resources_servers/comp_coding/lcb_integration/testing_util.py
@@ -440,7 +440,7 @@ def run_test(sample, test=None, debug=False, timeout=6):
 
     # Disable functionalities that can make destructive changes to the test.
     # max memory is set to 4GB
-    reliability_guard()
+    reliability_guard(4 * 1024**3)
 
     if debug:
         print(f"start = {datetime.now().time()}")
@@ -511,6 +511,17 @@ def run_test(sample, test=None, debug=False, timeout=6):
                 signal.alarm(0)
 
 
+def _set_resource_limit(resource_type: str, target_soft_limit):
+    import resource
+
+    current_soft, current_hard = resource.getrlimit(resource_type)
+    if current_soft < target_soft_limit:
+        try:
+            resource.setrlimit(resource_type, (target_soft_limit, current_hard))
+        except ValueError as e:
+            print(repr(e))
+
+
 def reliability_guard(maximum_memory_bytes=None):
     """
     This disables various destructive functions and prevents the generated code
@@ -526,10 +537,11 @@ def reliability_guard(maximum_memory_bytes=None):
     if maximum_memory_bytes is not None:
         import resource
 
-        resource.setrlimit(resource.RLIMIT_AS, (maximum_memory_bytes, maximum_memory_bytes))
-        resource.setrlimit(resource.RLIMIT_DATA, (maximum_memory_bytes, maximum_memory_bytes))
+        _set_resource_limit(resource.RLIMIT_AS, maximum_memory_bytes)
+        _set_resource_limit(resource.RLIMIT_DATA, maximum_memory_bytes)
+
         if not platform.uname().system == "Darwin":
-            resource.setrlimit(resource.RLIMIT_STACK, (maximum_memory_bytes, maximum_memory_bytes))
+            _set_resource_limit(resource.RLIMIT_STACK, maximum_memory_bytes)
 
     faulthandler.disable()
 

From de32859168cee3c91989375de31f17b938512e07 Mon Sep 17 00:00:00 2001
From: Brian Yu <bxyu@nvidia.com>
Date: Mon, 29 Sep 2025 11:03:10 -0700
Subject: [PATCH 3/4] print limit set

Signed-off-by: Brian Yu <bxyu@nvidia.com>
---
 resources_servers/comp_coding/lcb_integration/testing_util.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/resources_servers/comp_coding/lcb_integration/testing_util.py b/resources_servers/comp_coding/lcb_integration/testing_util.py
index 522d7a3d2..b731e41aa 100644
--- a/resources_servers/comp_coding/lcb_integration/testing_util.py
+++ b/resources_servers/comp_coding/lcb_integration/testing_util.py
@@ -515,6 +515,9 @@ def _set_resource_limit(resource_type: str, target_soft_limit):
     import resource
 
     current_soft, current_hard = resource.getrlimit(resource_type)
+    print(
+        f"current_soft: {current_soft} current_hard: {current_hard} target_soft_limit: {target_soft_limit} current_soft < target_soft_limit: {current_soft < target_soft_limit}"
+    )
     if current_soft < target_soft_limit:
         try:
             resource.setrlimit(resource_type, (target_soft_limit, current_hard))

From a82b69a79e366b04a3baad7679f8013f397daf5b Mon Sep 17 00:00:00 2001
From: Brian Yu <bxyu@nvidia.com>
Date: Mon, 29 Sep 2025 11:24:33 -0700
Subject: [PATCH 4/4] remove print

Signed-off-by: Brian Yu <bxyu@nvidia.com>
---
 resources_servers/comp_coding/lcb_integration/testing_util.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/resources_servers/comp_coding/lcb_integration/testing_util.py b/resources_servers/comp_coding/lcb_integration/testing_util.py
index b731e41aa..522d7a3d2 100644
--- a/resources_servers/comp_coding/lcb_integration/testing_util.py
+++ b/resources_servers/comp_coding/lcb_integration/testing_util.py
@@ -515,9 +515,6 @@ def _set_resource_limit(resource_type: str, target_soft_limit):
     import resource
 
     current_soft, current_hard = resource.getrlimit(resource_type)
-    print(
-        f"current_soft: {current_soft} current_hard: {current_hard} target_soft_limit: {target_soft_limit} current_soft < target_soft_limit: {current_soft < target_soft_limit}"
-    )
     if current_soft < target_soft_limit:
         try:
             resource.setrlimit(resource_type, (target_soft_limit, current_hard))