From 4aea0d3f2fb7d928be66e18c2093e016c61532aa Mon Sep 17 00:00:00 2001 From: Krishna Chaitanya Balusu Date: Tue, 24 Mar 2026 22:18:15 -0400 Subject: [PATCH 1/3] fix: prevent UnicodeEncodeError on Windows CP1252 consoles in studio setup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On Windows, `unsloth studio setup` crashes with a UnicodeEncodeError when install_python_stack.py tries to print Unicode status glyphs (✅, ❌, ⚠️) to a console that uses a legacy code page like CP1252. Add a _safe_print() helper that catches UnicodeEncodeError and gracefully degrades emoji to ASCII equivalents ([OK], [FAIL], [!]). Replace all print() calls that emit Unicode glyphs with _safe_print(). Fixes #4509 --- studio/install_python_stack.py | 39 +++++++++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/studio/install_python_stack.py b/studio/install_python_stack.py index a141c64425..0880f69c0a 100644 --- a/studio/install_python_stack.py +++ b/studio/install_python_stack.py @@ -44,6 +44,35 @@ SCRIPT_DIR / "backend" / "plugins" / "data-designer-unstructured-seed" ) +# ── Unicode-safe printing ───────────────────────────────────────────── +# On Windows the default console encoding can be a legacy code page +# (e.g. CP1252) that cannot represent Unicode glyphs such as ✅ or ❌. +# _safe_print() gracefully degrades to ASCII equivalents so the +# installer never crashes just because of a status glyph. + +_UNICODE_TO_ASCII: dict[str, str] = { + "\u2705": "[OK]", # ✅ + "\u274c": "[FAIL]", # ❌ + "\u26a0\ufe0f": "[!]", # ⚠️ (warning + variation selector) + "\u26a0": "[!]", # ⚠ (warning without variation selector) +} + + +def _safe_print(*args: object, **kwargs: object) -> None: + """Drop-in print() replacement that survives non-UTF-8 consoles.""" + try: + print(*args, **kwargs) + except UnicodeEncodeError: + # Stringify, then swap emoji for ASCII equivalents + text = " ".join(str(a) for a in args) + for uni, ascii_alt in _UNICODE_TO_ASCII.items(): + text = text.replace(uni, ascii_alt) + # Final fallback: replace any remaining unencodable chars + print(text.encode(sys.stdout.encoding or "ascii", errors="replace").decode( + sys.stdout.encoding or "ascii", errors="replace" + ), **kwargs) + + # ── Color support ────────────────────────────────────────────────────── @@ -119,7 +148,7 @@ def run( stderr = subprocess.STDOUT if quiet else None, ) if result.returncode != 0: - print(_red(f"❌ {label} failed (exit code {result.returncode}):")) + _safe_print(_red(f"❌ {label} failed (exit code {result.returncode}):")) if result.stdout: print(result.stdout.decode(errors = "replace")) sys.exit(result.returncode) @@ -267,7 +296,7 @@ def patch_package_file(package_name: str, relative_path: str, url: str) -> None: text = True, ) if result.returncode != 0: - print(_red(f" ⚠️ Could not find package {package_name}, skipping patch")) + _safe_print(_red(f" ⚠️ Could not find package {package_name}, skipping patch")) return location = None @@ -277,7 +306,7 @@ def patch_package_file(package_name: str, relative_path: str, url: str) -> None: break if not location: - print(_red(f" ⚠️ Could not determine location of {package_name}")) + _safe_print(_red(f" ⚠️ Could not determine location of {package_name}")) return dest = Path(location) / relative_path @@ -393,7 +422,7 @@ def install_python_stack() -> int: # 11. Local Data Designer seed plugin if not LOCAL_DD_UNSTRUCTURED_PLUGIN.is_dir(): - print( + _safe_print( _red( f"❌ Missing local plugin directory: {LOCAL_DD_UNSTRUCTURED_PLUGIN}", ), @@ -422,7 +451,7 @@ def install_python_stack() -> int: stderr = subprocess.DEVNULL, ) - print(_green("✅ Python dependencies installed")) + _safe_print(_green("✅ Python dependencies installed")) return 0 From 23b1127bae55b34d02affd8ddfab5d47fb300810 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 25 Mar 2026 02:19:03 +0000 Subject: [PATCH 2/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- studio/install_python_stack.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/studio/install_python_stack.py b/studio/install_python_stack.py index 0880f69c0a..7ae29ee2d3 100644 --- a/studio/install_python_stack.py +++ b/studio/install_python_stack.py @@ -51,10 +51,10 @@ # installer never crashes just because of a status glyph. _UNICODE_TO_ASCII: dict[str, str] = { - "\u2705": "[OK]", # ✅ - "\u274c": "[FAIL]", # ❌ + "\u2705": "[OK]", # ✅ + "\u274c": "[FAIL]", # ❌ "\u26a0\ufe0f": "[!]", # ⚠️ (warning + variation selector) - "\u26a0": "[!]", # ⚠ (warning without variation selector) + "\u26a0": "[!]", # ⚠ (warning without variation selector) } @@ -68,9 +68,12 @@ def _safe_print(*args: object, **kwargs: object) -> None: for uni, ascii_alt in _UNICODE_TO_ASCII.items(): text = text.replace(uni, ascii_alt) # Final fallback: replace any remaining unencodable chars - print(text.encode(sys.stdout.encoding or "ascii", errors="replace").decode( - sys.stdout.encoding or "ascii", errors="replace" - ), **kwargs) + print( + text.encode(sys.stdout.encoding or "ascii", errors = "replace").decode( + sys.stdout.encoding or "ascii", errors = "replace" + ), + **kwargs, + ) # ── Color support ────────────────────────────────────────────────────── @@ -296,7 +299,9 @@ def patch_package_file(package_name: str, relative_path: str, url: str) -> None: text = True, ) if result.returncode != 0: - _safe_print(_red(f" ⚠️ Could not find package {package_name}, skipping patch")) + _safe_print( + _red(f" ⚠️ Could not find package {package_name}, skipping patch") + ) return location = None From e259939b78df62ff29ec004ca43774c9a2217a88 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Wed, 25 Mar 2026 05:03:31 +0000 Subject: [PATCH 3/3] Replace Unicode dashes with ASCII in install_python_stack.py Box-drawing (U+2500) and em dash (U+2014) chars in section dividers and comments are themselves not representable on CP1252 -- replace with plain ASCII dashes for consistency with the fix. --- studio/install_python_stack.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/studio/install_python_stack.py b/studio/install_python_stack.py index 7ae29ee2d3..9b2678f478 100644 --- a/studio/install_python_stack.py +++ b/studio/install_python_stack.py @@ -22,20 +22,20 @@ IS_WINDOWS = sys.platform == "win32" -# ── Verbosity control ────────────────────────────────────────────────────────── +# -- Verbosity control ---------------------------------------------------------- # By default the installer shows a minimal progress bar (one line, in-place). # Set UNSLOTH_VERBOSE=1 in the environment to restore full per-step output: # Linux/Mac: UNSLOTH_VERBOSE=1 ./studio/setup.sh # Windows: $env:UNSLOTH_VERBOSE="1" ; .\studio\setup.ps1 VERBOSE: bool = os.environ.get("UNSLOTH_VERBOSE", "0") == "1" -# Progress bar state — updated by _progress() as each install step runs. +# Progress bar state -- updated by _progress() as each install step runs. # _TOTAL counts: pip-upgrade + 7 shared steps + triton (non-Windows) + local-plugin + finalize # Update _TOTAL here if you add or remove install steps in install_python_stack(). _STEP: int = 0 _TOTAL: int = 0 # set at runtime in install_python_stack() based on platform -# ── Paths ────────────────────────────────────────────────────────────── +# -- Paths -------------------------------------------------------------- SCRIPT_DIR = Path(__file__).resolve().parent REQ_ROOT = SCRIPT_DIR / "backend" / "requirements" SINGLE_ENV = REQ_ROOT / "single-env" @@ -44,7 +44,7 @@ SCRIPT_DIR / "backend" / "plugins" / "data-designer-unstructured-seed" ) -# ── Unicode-safe printing ───────────────────────────────────────────── +# -- Unicode-safe printing --------------------------------------------- # On Windows the default console encoding can be a legacy code page # (e.g. CP1252) that cannot represent Unicode glyphs such as ✅ or ❌. # _safe_print() gracefully degrades to ASCII equivalents so the @@ -76,7 +76,7 @@ def _safe_print(*args: object, **kwargs: object) -> None: ) -# ── Color support ────────────────────────────────────────────────────── +# -- Color support ------------------------------------------------------ def _enable_colors() -> bool: @@ -104,7 +104,7 @@ def _enable_colors() -> bool: return True # Unix terminals support ANSI by default -# Colors disabled — Colab and most CI runners render ANSI fine, but plain output +# Colors disabled -- Colab and most CI runners render ANSI fine, but plain output # is cleaner in the notebook cell. Re-enable by setting _HAS_COLOR = _enable_colors() _HAS_COLOR = False @@ -124,7 +124,7 @@ def _red(msg: str) -> str: def _progress(label: str) -> None: """Print an in-place progress bar for the current install step. - Uses only stdlib (sys.stdout) — no extra packages required. + Uses only stdlib (sys.stdout) -- no extra packages required. In VERBOSE mode this is a no-op; per-step labels are printed by run() instead. """ global _STEP @@ -161,7 +161,7 @@ def run( # Packages to skip on Windows (require special build steps) WINDOWS_SKIP_PACKAGES = {"open_spiel", "triton_kernels"} -# ── uv bootstrap ────────────────────────────────────────────────────── +# -- uv bootstrap ------------------------------------------------------ USE_UV = False # Set by _bootstrap_uv() at the start of install_python_stack() UV_NEEDS_SYSTEM = False # Set by _bootstrap_uv() via probe @@ -319,7 +319,7 @@ def patch_package_file(package_name: str, relative_path: str, url: str) -> None: download_file(url, dest) -# ── Main install sequence ───────────────────────────────────────────── +# -- Main install sequence --------------------------------------------- def install_python_stack() -> int: @@ -350,7 +350,7 @@ def install_python_stack() -> int: req = REQ_ROOT / "extras.txt", ) - # 3b. Extra dependencies (no-deps) — audio model support etc. + # 3b. Extra dependencies (no-deps) -- audio model support etc. _progress("extra codecs") pip_install( "Installing extras (no-deps)", @@ -359,7 +359,7 @@ def install_python_stack() -> int: req = REQ_ROOT / "extras-no-deps.txt", ) - # 4. Overrides (torchao, transformers) — force-reinstall + # 4. Overrides (torchao, transformers) -- force-reinstall _progress("dependency overrides") pip_install( "Installing dependency overrides",