From 9e83643af325ff1f8a52abbf754feb44efdc9f68 Mon Sep 17 00:00:00 2001 From: Alok-Ranjan23 Date: Wed, 6 May 2026 11:00:02 +0000 Subject: [PATCH 1/2] QVAC-17892 fix(registry): drop phantom Bergamot pairs and stale enja vocab entry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up patch to PR #1785. The DHT registry sync after that PR landed flagged 5 failing Bergamot S3 entries (visible in Slack #1785 thread on May 6: bergamot-enja/.../vocab.enja.spm, bergamot-enko/.../vocab.enko.spm, and 3× bergamot-fire/...). Cross-checking models.prod.json against Mozilla's Firefox-Translations Remote Settings — the same upstream the runtime bergamot-model-fetcher.js queries — shows two distinct issues: 1. Three pairs in the manifest don't exist upstream at all: - "fire" — Mozilla has no `re` language anywhere in the translations-models collection. The manifest description was "Bergamot ... fi-re" but `fi` only pairs with `en` upstream. - "enzh" — Mozilla doesn't ship an English→Chinese pair. - "zhen" — Mozilla doesn't ship Chinese→English either. None of these entries can ever sync because the bytes were never created. 2. One stale entry references a file Mozilla doesn't ship for that specific pair: - bergamot-enja/.../vocab.enja.spm — Mozilla only publishes srcvocab.enja.spm + trgvocab.enja.spm for enja (the CJK split-vocab convention). The split entries are already in the manifest correctly; this is a leftover combined entry that should never have been added. Net change: -14 manifest entries (13 phantom + 1 stale), zero entries replaced, zero entries added. All 666 remaining bergamot-* entries correspond to real files Mozilla actually publishes (or `metadata.json` sidecars that our team uploads alongside). What this PR deliberately does NOT touch: - bergamot-enko/.../vocab.enko.spm — also flagged in the failing sync output. Mozilla DOES ship combined `vocab.enko.spm` upstream alongside split, so the manifest entry is theoretically valid. Failure is more likely a date / S3-upload mismatch on our side. Waiting on @Yury Samarin's S3 verification before deciding whether to drop the combined entry or keep it. Independent fix. - 90 `metadata.json` entries that Mozilla doesn't ship. These are custom files our team uploads to S3 alongside Mozilla's bytes (Yury's screenshot confirmed metadata.json IS present in our S3 for enja). Manifest is correct. Validation: - JSON syntax check: clean - Re-running the Mozilla cross-check after the edit: 0 phantom pairs and 0 vocab-naming mismatches remaining - 274 manifest entries (~half) are exact filename matches against Mozilla upstream - 91 remaining "wrong_filename" hits are all metadata.json (expected, custom internal file) Refs QVAC-17892, PR #1785. Co-authored-by: Cursor --- .../data/models.prod.json | 224 ------------------ 1 file changed, 224 deletions(-) diff --git a/packages/qvac-lib-registry-server/data/models.prod.json b/packages/qvac-lib-registry-server/data/models.prod.json index 4e279e4b41..b462e38890 100644 --- a/packages/qvac-lib-registry-server/data/models.prod.json +++ b/packages/qvac-lib-registry-server/data/models.prod.json @@ -2721,86 +2721,6 @@ "notes": "", "link": "https://github.com/mozilla/firefox-translations-models/tree/main/models/base-memory/enru" }, - { - "source": "s3:///qvac_models_compiled/bergamot/bergamot-enzh/2025-12-18/lex.50.50.enzh.s2t.bin", - "engine": "@qvac/translation-nmtcpp", - "description": "Bergamot lexical shortlist en-zh", - "quantization": "", - "params": "", - "licenseId": "MPL-2.0", - "tags": [ - "translation", - "nmt", - "bergamot", - "enzh" - ], - "notes": "", - "link": "https://github.com/mozilla/firefox-translations-models/tree/main/models/base-memory/enzh" - }, - { - "source": "s3:///qvac_models_compiled/bergamot/bergamot-enzh/2025-12-18/metadata.json", - "engine": "@qvac/translation-nmtcpp", - "description": "Bergamot metadata en-zh", - "quantization": "", - "params": "", - "licenseId": "MPL-2.0", - "tags": [ - "translation", - "nmt", - "bergamot", - "enzh" - ], - "notes": "", - "link": "https://github.com/mozilla/firefox-translations-models/tree/main/models/base-memory/enzh" - }, - { - "source": "s3:///qvac_models_compiled/bergamot/bergamot-enzh/2025-12-18/model.enzh.intgemm.alphas.bin", - "engine": "@qvac/translation-nmtcpp", - "description": "Bergamot NMT model en-zh", - "quantization": "", - "params": "", - "licenseId": "MPL-2.0", - "tags": [ - "translation", - "nmt", - "bergamot", - "enzh" - ], - "notes": "", - "link": "https://github.com/mozilla/firefox-translations-models/tree/main/models/base-memory/enzh" - }, - { - "source": "s3:///qvac_models_compiled/bergamot/bergamot-enzh/2025-12-18/srcvocab.enzh.spm", - "engine": "@qvac/translation-nmtcpp", - "description": "Bergamot vocabulary en-zh", - "quantization": "", - "params": "", - "licenseId": "MPL-2.0", - "tags": [ - "translation", - "nmt", - "bergamot", - "enzh" - ], - "notes": "", - "link": "https://github.com/mozilla/firefox-translations-models/tree/main/models/base-memory/enzh" - }, - { - "source": "s3:///qvac_models_compiled/bergamot/bergamot-enzh/2025-12-18/trgvocab.enzh.spm", - "engine": "@qvac/translation-nmtcpp", - "description": "Bergamot vocabulary en-zh", - "quantization": "", - "params": "", - "licenseId": "MPL-2.0", - "tags": [ - "translation", - "nmt", - "bergamot", - "enzh" - ], - "notes": "", - "link": "https://github.com/mozilla/firefox-translations-models/tree/main/models/base-memory/enzh" - }, { "source": "s3:///qvac_models_compiled/bergamot/bergamot-esen/2026-04-28/lex.50.50.esen.s2t.bin", "engine": "@qvac/translation-nmtcpp", @@ -3185,70 +3105,6 @@ "notes": "", "link": "https://github.com/mozilla/firefox-translations-models/tree/main/models/tiny/ruen" }, - { - "source": "s3:///qvac_models_compiled/bergamot/bergamot-zhen/2025-12-18/lex.50.50.zhen.s2t.bin", - "engine": "@qvac/translation-nmtcpp", - "description": "Bergamot lexical shortlist zh-en", - "quantization": "", - "params": "", - "licenseId": "MPL-2.0", - "tags": [ - "translation", - "nmt", - "bergamot", - "zhen" - ], - "notes": "", - "link": "https://github.com/mozilla/firefox-translations-models/tree/main/models/base-memory/zhen" - }, - { - "source": "s3:///qvac_models_compiled/bergamot/bergamot-zhen/2025-12-18/metadata.json", - "engine": "@qvac/translation-nmtcpp", - "description": "Bergamot metadata zh-en", - "quantization": "", - "params": "", - "licenseId": "MPL-2.0", - "tags": [ - "translation", - "nmt", - "bergamot", - "zhen" - ], - "notes": "", - "link": "https://github.com/mozilla/firefox-translations-models/tree/main/models/base-memory/zhen" - }, - { - "source": "s3:///qvac_models_compiled/bergamot/bergamot-zhen/2025-12-18/model.zhen.intgemm.alphas.bin", - "engine": "@qvac/translation-nmtcpp", - "description": "Bergamot NMT model zh-en", - "quantization": "", - "params": "", - "licenseId": "MPL-2.0", - "tags": [ - "translation", - "nmt", - "bergamot", - "zhen" - ], - "notes": "", - "link": "https://github.com/mozilla/firefox-translations-models/tree/main/models/base-memory/zhen" - }, - { - "source": "s3:///qvac_models_compiled/bergamot/bergamot-zhen/2025-12-18/vocab.zhen.spm", - "engine": "@qvac/translation-nmtcpp", - "description": "Bergamot vocabulary zh-en", - "quantization": "", - "params": "", - "licenseId": "MPL-2.0", - "tags": [ - "translation", - "nmt", - "bergamot", - "zhen" - ], - "notes": "", - "link": "https://github.com/mozilla/firefox-translations-models/tree/main/models/base-memory/zhen" - }, { "source": "https://huggingface.co/mav23/Llama_3.2_1B_Intruct_Tool_Calling_V2-GGUF/blob/main/llama_3.2_1b_intruct_tool_calling_v2.Q4_K.gguf", "engine": "@qvac/llm-llamacpp", @@ -7496,22 +7352,6 @@ "notes": "", "link": "https://github.com/mozilla/firefox-translations-models/tree/main/models/base-memory/enis" }, - { - "source": "s3:///qvac_models_compiled/bergamot/bergamot-enja/2025-12-18/vocab.enja.spm", - "description": "Bergamot vocabulary en-ja", - "engine": "@qvac/translation-nmtcpp", - "quantization": "", - "params": "", - "licenseId": "MPL-2.0", - "tags": [ - "translation", - "nmt", - "bergamot", - "enja" - ], - "notes": "", - "link": "https://github.com/mozilla/firefox-translations-models/tree/main/models/base-memory/enja" - }, { "source": "s3:///qvac_models_compiled/bergamot/bergamot-enkn/2025-12-18/lex.50.50.enkn.s2t.bin", "description": "Bergamot lexical shortlist en-kn", @@ -8792,70 +8632,6 @@ "notes": "", "link": "https://github.com/mozilla/firefox-translations-models/tree/main/models/base-memory/fien" }, - { - "source": "s3:///qvac_models_compiled/bergamot/bergamot-fire/2025-12-18/lex.50.50.fire.s2t.bin", - "description": "Bergamot lexical shortlist fi-re", - "engine": "@qvac/translation-nmtcpp", - "quantization": "", - "params": "", - "licenseId": "MPL-2.0", - "tags": [ - "translation", - "nmt", - "bergamot", - "fire" - ], - "notes": "", - "link": "https://github.com/mozilla/firefox-translations-models/tree/main/models/base-memory/fire" - }, - { - "source": "s3:///qvac_models_compiled/bergamot/bergamot-fire/2025-12-18/metadata.json", - "description": "Bergamot metadata fi-re", - "engine": "@qvac/translation-nmtcpp", - "quantization": "", - "params": "", - "licenseId": "MPL-2.0", - "tags": [ - "translation", - "nmt", - "bergamot", - "fire" - ], - "notes": "", - "link": "https://github.com/mozilla/firefox-translations-models/tree/main/models/base-memory/fire" - }, - { - "source": "s3:///qvac_models_compiled/bergamot/bergamot-fire/2025-12-18/model.fire.intgemm.alphas.bin", - "description": "Bergamot NMT model fi-re", - "engine": "@qvac/translation-nmtcpp", - "quantization": "", - "params": "", - "licenseId": "MPL-2.0", - "tags": [ - "translation", - "nmt", - "bergamot", - "fire" - ], - "notes": "", - "link": "https://github.com/mozilla/firefox-translations-models/tree/main/models/base-memory/fire" - }, - { - "source": "s3:///qvac_models_compiled/bergamot/bergamot-fire/2025-12-18/vocab.fire.spm", - "description": "Bergamot vocabulary fi-re", - "engine": "@qvac/translation-nmtcpp", - "quantization": "", - "params": "", - "licenseId": "MPL-2.0", - "tags": [ - "translation", - "nmt", - "bergamot", - "fire" - ], - "notes": "", - "link": "https://github.com/mozilla/firefox-translations-models/tree/main/models/base-memory/fire" - }, { "source": "s3:///qvac_models_compiled/bergamot/bergamot-guen/2025-12-18/lex.50.50.guen.s2t.bin", "description": "Bergamot lexical shortlist gu-en", From 5ca208dde77bf56ac71651a773720598b2e91613 Mon Sep 17 00:00:00 2001 From: Alok-Ranjan23 Date: Wed, 6 May 2026 11:43:58 +0000 Subject: [PATCH 2/2] QVAC-17892 fix(registry): replace combined enko vocab with split src/trg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Companion to the previous commit (9e83643a) on this branch. Yury confirmed via `aws s3 ls` that bergamot-enko/2025-12-18/ contains srcvocab.enko.spm + trgvocab.enko.spm and no combined vocab.enko.spm. This matches Mozilla's current upstream — they migrated enko from combined-vocab to split-vocab in their Remote Settings on 2025-07-22, and our team mirrored the post-migration layout to S3. Net change in this commit: - drop bergamot-enko/2025-12-18/vocab.enko.spm - add bergamot-enko/2025-12-18/srcvocab.enko.spm - add bergamot-enko/2025-12-18/trgvocab.enko.spm Same shape as the existing enja split entries already in this file (cloned from them for consistency: same description, engine, licenseId, tags, link). Refs QVAC-17892, PR #1785, this PR #1919. Co-authored-by: Cursor --- .../data/models.prod.json | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/packages/qvac-lib-registry-server/data/models.prod.json b/packages/qvac-lib-registry-server/data/models.prod.json index b462e38890..6909845ba9 100644 --- a/packages/qvac-lib-registry-server/data/models.prod.json +++ b/packages/qvac-lib-registry-server/data/models.prod.json @@ -7465,7 +7465,23 @@ "link": "https://github.com/mozilla/firefox-translations-models/tree/main/models/base-memory/enko" }, { - "source": "s3:///qvac_models_compiled/bergamot/bergamot-enko/2025-12-18/vocab.enko.spm", + "source": "s3:///qvac_models_compiled/bergamot/bergamot-enko/2025-12-18/srcvocab.enko.spm", + "description": "Bergamot vocabulary en-ko", + "engine": "@qvac/translation-nmtcpp", + "quantization": "", + "params": "", + "licenseId": "MPL-2.0", + "tags": [ + "translation", + "nmt", + "bergamot", + "enko" + ], + "notes": "", + "link": "https://github.com/mozilla/firefox-translations-models/tree/main/models/base-memory/enko" + }, + { + "source": "s3:///qvac_models_compiled/bergamot/bergamot-enko/2025-12-18/trgvocab.enko.spm", "description": "Bergamot vocabulary en-ko", "engine": "@qvac/translation-nmtcpp", "quantization": "",