From 1f7ab70cd4dbb64e16bb6b38840490c2f2259cb0 Mon Sep 17 00:00:00 2001
From: Evgeny Pavlov <epavlov@mozilla.com>
Date: Thu, 24 Oct 2024 15:39:23 -0700
Subject: [PATCH] Disable bilceaner hard rules completely (#892)

---
 pipeline/bicleaner/bicleaner.sh | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/pipeline/bicleaner/bicleaner.sh b/pipeline/bicleaner/bicleaner.sh
index de075533b..af7920372 100755
--- a/pipeline/bicleaner/bicleaner.sh
+++ b/pipeline/bicleaner/bicleaner.sh
@@ -49,12 +49,6 @@ else
     export scol=2
     export tcol=1
   fi
-  # disable hard rules for multilingual model
-  if [ ${model_source_lang} == "xx" ] || [ ${model_target_lang} == "xx" ]; then
-    export hardrules="--disable_hardrules"
-  else
-    export hardrules=""
-  fi
 
   #Export cuda visible devices if empty or not set
   if [ -z "${CUDA_VISIBLE_DEVICES:-}" ]; then
@@ -76,7 +70,7 @@ else
                # to operate on the CPU very slowly. To guard against this wasting expensive
                # GPU time, always check that it can find GPUs.
                python3 -c "import tensorflow; exit(0) if tensorflow.config.list_physical_devices('GPU') else exit(9001)"
-               bicleaner-ai-classify ${hardrules} --scol ${scol} --tcol ${tcol} - - $1
+               bicleaner-ai-classify --disable_hardrules --scol ${scol} --tcol ${tcol} - - $1
        }
        export -f biclean
        # {%} is a 1-indexed job slot number from GNU parallel.  We use that as the 1-indexed offset in CUDA_VISIBLE_ARRAY
@@ -86,7 +80,7 @@ else
   else
    export BICLEANER_AI_THREADS=${threads}
    paste <(zstdmt -dc "${corpus_prefix}.${SRC}.zst") <(zstdmt -dc "${corpus_prefix}.${TRG}.zst") |
-     bicleaner-ai-classify ${hardrules} --scol ${scol} --tcol ${tcol} "${threads}"  - - "${pack_dir}"/*.yaml |
+     bicleaner-ai-classify --disable_hardrules --scol ${scol} --tcol ${tcol} "${threads}"  - - "${pack_dir}"/*.yaml |
      zstdmt >"${output_prefix}.scored.zst"
   fi