Skip to content
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
978d8ff
Add ICU per-item zstd decompression hook for compressed libicudata
dylan-conway May 22, 2026
c7e3b99
[autofix.ci] apply automated fixes
autofix-ci[bot] May 22, 2026
1a9a633
Use ZSTD_MAGICNUMBER constant instead of byte-by-byte magic checks
dylan-conway May 22, 2026
3c255df
Use WTF::Lock/HashMap/NeverDestroyed and MimallocMalloc instead of st…
dylan-conway May 22, 2026
c2cbbdc
Add Intl test coverage with snapshots captured from uncompressed ICU
dylan-conway May 22, 2026
b2243a0
[autofix.ci] apply automated fixes
autofix-ci[bot] May 22, 2026
ddacf76
Exhaustive Intl sweep over every ICU display-name locale via fixture …
dylan-conway May 22, 2026
ed00008
[autofix.ci] apply automated fixes
autofix-ci[bot] May 22, 2026
f123b58
Wrap state in ICUDecompressor singleton with call_once accessor
dylan-conway May 22, 2026
76fcc2b
[autofix.ci] apply automated fixes
autofix-ci[bot] May 22, 2026
d0e7449
Gate ICUDecompressor behind OS(LINUX); other platforms have unmodifie…
dylan-conway May 22, 2026
dd4abbb
Gate Intl snapshots on Linux + ICU 75.1; bump WEBKIT_VERSION to #237 …
dylan-conway May 22, 2026
43f5569
Pin to pre-#236 WebKit preview; gate Intl snapshots on Linux + ICU 75.1
dylan-conway May 22, 2026
277eca8
Review fixes: align alloc to 16, fix prebuiltDestDir cache key, guard…
dylan-conway May 23, 2026
bdd691b
Merge origin/main; bump to #237 preview 83b6a12f (post-#236)
dylan-conway May 23, 2026
4c4d076
Take main's WEBKIT_VERSION; hook is dormant until oven-sh/WebKit#237 …
dylan-conway May 23, 2026
a59a838
[autofix.ci] apply automated fixes
autofix-ci[bot] May 23, 2026
9d00194
Remove allowPreviewWebkit guard; restore #237 preview pin
dylan-conway May 23, 2026
9b70a2a
prebuiltDestDir: use full preview tag in cache key, not just first 16…
dylan-conway May 23, 2026
2900814
Bump WEBKIT_VERSION to 782504c968e2 (oven-sh/WebKit#237 merged)
dylan-conway May 23, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
115 changes: 115 additions & 0 deletions src/jsc/bindings/bun_icu_decompress.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
// Per-item zstd decompression hook for ICU common data.
//
// oven-sh/WebKit's ICU build (icu/udata-decompress-hook.patch) inserts a weak
// call to bun_icu_maybe_decompress between TOC lookup and checkDataItem.
// Display-name items (curr/ lang/ region/ unit/ zone/, non-en) are stored as
// raw zstd frames; everything else keeps its 0xda27 header and passes through
// after one u32 compare. Decompressed buffers are cached for the process
// lifetime, keyed by their .rodata address.
//
// The dict symbols are emitted by the repacked libicudata.a; declaring them
// weak here lets this file link against a prebuilt that predates the repack
// (the hook is then never called, since no item is compressed).

#include "root.h"

// The repacked libicudata.a (and the patched udata.cpp that calls this hook)
// are produced by oven-sh/WebKit's Dockerfile / Dockerfile.musl only. On every
// other platform ICU is unmodified, so there is nothing to decompress and the
// weak externs below have no definer — gate the whole implementation to keep
// non-ELF weak-symbol semantics out of the picture.
#if OS(LINUX)

#include "MimallocWTFMalloc.h"

#include <wtf/HashMap.h>
#include <wtf/Lock.h>
#include <wtf/NeverDestroyed.h>

#define ZSTD_STATIC_LINKING_ONLY
#include <zstd.h>

static_assert(ZSTD_MAGICNUMBER == 0xFD2FB528);
// Raw ICU items have bytes[2..3] == {0xda, 0x27} (ucmndata.h MAGIC1/MAGIC2),
// so their first u32 is 0x27da'hhhh — cannot collide with zstd's magic.

extern "C" __attribute__((weak)) const unsigned char bun_icu_zstd_dict[];
extern "C" __attribute__((weak)) const unsigned int bun_icu_zstd_dict_size;

namespace Bun {

class ICUDecompressor {
public:
static ICUDecompressor& get()
{
static LazyNeverDestroyed<ICUDecompressor> instance;
static std::once_flag once;
std::call_once(once, [] { instance.construct(); });
return instance.get();
}

const void* decompress(const void* p, int32_t* length)
{
Locker locker { m_lock };

if (auto it = m_cache.find(p); it != m_cache.end()) {
*length = static_cast<int32_t>(ZSTD_getFrameContentSize(p, frameBound(*length)));
return it->value;
}

size_t clen = ZSTD_findFrameCompressedSize(p, frameBound(*length));
if (ZSTD_isError(clen))
return p;
auto dlen = ZSTD_getFrameContentSize(p, clen);
if (dlen == ZSTD_CONTENTSIZE_UNKNOWN || dlen == ZSTD_CONTENTSIZE_ERROR)
return p;

void* buf = MimallocMalloc::tryAlignedMalloc(static_cast<size_t>(dlen), 16);

Check failure on line 67 in src/jsc/bindings/bun_icu_decompress.cpp

View check run for this annotation

Claude / Claude Code Review

tryAlignedMalloc debug ASSERT fires when dlen is not 16-aligned

`MimallocMalloc::tryAlignedMalloc(dlen, 16)` will trip the debug `ASSERT(((alignment - 1) & size) == 0)` at MimallocWTFMalloc.h:82 whenever the decompressed item size isn't a multiple of 16 — ICU `.res` items are only 4-byte-aligned, and the zstd frame's content size is the raw item length. The PR's test matrix lists baseline / compressed-release / compressed-LTO but not compressed-debug, so this path hasn't been exercised with `ASSERT_ENABLED`. Round the allocation size up, e.g. `tryAlignedMall
Comment thread
claude[bot] marked this conversation as resolved.
Outdated
if (!buf)
return p;
size_t r = m_ddict
? ZSTD_decompress_usingDDict(m_dctx, buf, static_cast<size_t>(dlen), p, clen, m_ddict)
: ZSTD_decompressDCtx(m_dctx, buf, static_cast<size_t>(dlen), p, clen);
if (ZSTD_isError(r)) {
MimallocMalloc::free(buf);
return p;
}

m_cache.add(p, buf);
*length = static_cast<int32_t>(dlen);
return buf;
}

private:
ICUDecompressor()
: m_dctx(ZSTD_createDCtx())
, m_ddict(&bun_icu_zstd_dict_size && bun_icu_zstd_dict_size
? ZSTD_createDDict_byReference(bun_icu_zstd_dict, bun_icu_zstd_dict_size)
: nullptr)
{
}

static size_t frameBound(int32_t tocLength) { return tocLength > 0 ? static_cast<size_t>(tocLength) : (1u << 20); }

friend class WTF::LazyNeverDestroyed<ICUDecompressor>;

WTF::Lock m_lock;
WTF::HashMap<const void*, void*> m_cache WTF_GUARDED_BY_LOCK(m_lock);
ZSTD_DCtx* const m_dctx;
ZSTD_DDict* const m_ddict;
};

} // namespace Bun

extern "C" const void* bun_icu_maybe_decompress(const void* p, int32_t* length)
{
if (!p)
return p;
uint32_t magic;
std::memcpy(&magic, p, sizeof(magic));
if (magic != ZSTD_MAGICNUMBER) [[likely]]
return p;
return Bun::ICUDecompressor::get().decompress(p, length);
}

#endif // OS(LINUX)
Loading
Loading