-
Notifications
You must be signed in to change notification settings - Fork 4.7k
ICU: per-item zstd decompression hook for compressed libicudata #31200
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 11 commits
Commits
Show all changes
20 commits
Select commit
Hold shift + click to select a range
978d8ff
Add ICU per-item zstd decompression hook for compressed libicudata
dylan-conway c7e3b99
[autofix.ci] apply automated fixes
autofix-ci[bot] 1a9a633
Use ZSTD_MAGICNUMBER constant instead of byte-by-byte magic checks
dylan-conway 3c255df
Use WTF::Lock/HashMap/NeverDestroyed and MimallocMalloc instead of st…
dylan-conway c2cbbdc
Add Intl test coverage with snapshots captured from uncompressed ICU
dylan-conway b2243a0
[autofix.ci] apply automated fixes
autofix-ci[bot] ddacf76
Exhaustive Intl sweep over every ICU display-name locale via fixture …
dylan-conway ed00008
[autofix.ci] apply automated fixes
autofix-ci[bot] f123b58
Wrap state in ICUDecompressor singleton with call_once accessor
dylan-conway 76fcc2b
[autofix.ci] apply automated fixes
autofix-ci[bot] d0e7449
Gate ICUDecompressor behind OS(LINUX); other platforms have unmodifie…
dylan-conway dd4abbb
Gate Intl snapshots on Linux + ICU 75.1; bump WEBKIT_VERSION to #237 …
dylan-conway 43f5569
Pin to pre-#236 WebKit preview; gate Intl snapshots on Linux + ICU 75.1
dylan-conway 277eca8
Review fixes: align alloc to 16, fix prebuiltDestDir cache key, guard…
dylan-conway bdd691b
Merge origin/main; bump to #237 preview 83b6a12f (post-#236)
dylan-conway 4c4d076
Take main's WEBKIT_VERSION; hook is dormant until oven-sh/WebKit#237 …
dylan-conway a59a838
[autofix.ci] apply automated fixes
autofix-ci[bot] 9d00194
Remove allowPreviewWebkit guard; restore #237 preview pin
dylan-conway 9b70a2a
prebuiltDestDir: use full preview tag in cache key, not just first 16…
dylan-conway 2900814
Bump WEBKIT_VERSION to 782504c968e2 (oven-sh/WebKit#237 merged)
dylan-conway File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,115 @@ | ||
| // Per-item zstd decompression hook for ICU common data. | ||
| // | ||
| // oven-sh/WebKit's ICU build (icu/udata-decompress-hook.patch) inserts a weak | ||
| // call to bun_icu_maybe_decompress between TOC lookup and checkDataItem. | ||
| // Display-name items (curr/ lang/ region/ unit/ zone/, non-en) are stored as | ||
| // raw zstd frames; everything else keeps its 0xda27 header and passes through | ||
| // after one u32 compare. Decompressed buffers are cached for the process | ||
| // lifetime, keyed by their .rodata address. | ||
| // | ||
| // The dict symbols are emitted by the repacked libicudata.a; declaring them | ||
| // weak here lets this file link against a prebuilt that predates the repack | ||
| // (the hook is then never called, since no item is compressed). | ||
|
|
||
| #include "root.h" | ||
|
|
||
| // The repacked libicudata.a (and the patched udata.cpp that calls this hook) | ||
| // are produced by oven-sh/WebKit's Dockerfile / Dockerfile.musl only. On every | ||
| // other platform ICU is unmodified, so there is nothing to decompress and the | ||
| // weak externs below have no definer — gate the whole implementation to keep | ||
| // non-ELF weak-symbol semantics out of the picture. | ||
| #if OS(LINUX) | ||
|
|
||
| #include "MimallocWTFMalloc.h" | ||
|
|
||
| #include <wtf/HashMap.h> | ||
| #include <wtf/Lock.h> | ||
| #include <wtf/NeverDestroyed.h> | ||
|
|
||
| #define ZSTD_STATIC_LINKING_ONLY | ||
| #include <zstd.h> | ||
|
|
||
| static_assert(ZSTD_MAGICNUMBER == 0xFD2FB528); | ||
| // Raw ICU items have bytes[2..3] == {0xda, 0x27} (ucmndata.h MAGIC1/MAGIC2), | ||
| // so their first u32 is 0x27da'hhhh — cannot collide with zstd's magic. | ||
|
|
||
| extern "C" __attribute__((weak)) const unsigned char bun_icu_zstd_dict[]; | ||
| extern "C" __attribute__((weak)) const unsigned int bun_icu_zstd_dict_size; | ||
|
|
||
| namespace Bun { | ||
|
|
||
| class ICUDecompressor { | ||
| public: | ||
| static ICUDecompressor& get() | ||
| { | ||
| static LazyNeverDestroyed<ICUDecompressor> instance; | ||
| static std::once_flag once; | ||
| std::call_once(once, [] { instance.construct(); }); | ||
| return instance.get(); | ||
| } | ||
|
|
||
| const void* decompress(const void* p, int32_t* length) | ||
| { | ||
| Locker locker { m_lock }; | ||
|
|
||
| if (auto it = m_cache.find(p); it != m_cache.end()) { | ||
| *length = static_cast<int32_t>(ZSTD_getFrameContentSize(p, frameBound(*length))); | ||
| return it->value; | ||
| } | ||
|
|
||
| size_t clen = ZSTD_findFrameCompressedSize(p, frameBound(*length)); | ||
| if (ZSTD_isError(clen)) | ||
| return p; | ||
| auto dlen = ZSTD_getFrameContentSize(p, clen); | ||
| if (dlen == ZSTD_CONTENTSIZE_UNKNOWN || dlen == ZSTD_CONTENTSIZE_ERROR) | ||
| return p; | ||
|
|
||
| void* buf = MimallocMalloc::tryAlignedMalloc(static_cast<size_t>(dlen), 16); | ||
|
Check failure on line 67 in src/jsc/bindings/bun_icu_decompress.cpp
|
||
| if (!buf) | ||
| return p; | ||
| size_t r = m_ddict | ||
| ? ZSTD_decompress_usingDDict(m_dctx, buf, static_cast<size_t>(dlen), p, clen, m_ddict) | ||
| : ZSTD_decompressDCtx(m_dctx, buf, static_cast<size_t>(dlen), p, clen); | ||
| if (ZSTD_isError(r)) { | ||
| MimallocMalloc::free(buf); | ||
| return p; | ||
| } | ||
|
|
||
| m_cache.add(p, buf); | ||
| *length = static_cast<int32_t>(dlen); | ||
| return buf; | ||
| } | ||
|
|
||
| private: | ||
| ICUDecompressor() | ||
| : m_dctx(ZSTD_createDCtx()) | ||
| , m_ddict(&bun_icu_zstd_dict_size && bun_icu_zstd_dict_size | ||
| ? ZSTD_createDDict_byReference(bun_icu_zstd_dict, bun_icu_zstd_dict_size) | ||
| : nullptr) | ||
| { | ||
| } | ||
|
|
||
| static size_t frameBound(int32_t tocLength) { return tocLength > 0 ? static_cast<size_t>(tocLength) : (1u << 20); } | ||
|
|
||
| friend class WTF::LazyNeverDestroyed<ICUDecompressor>; | ||
|
|
||
| WTF::Lock m_lock; | ||
| WTF::HashMap<const void*, void*> m_cache WTF_GUARDED_BY_LOCK(m_lock); | ||
| ZSTD_DCtx* const m_dctx; | ||
| ZSTD_DDict* const m_ddict; | ||
| }; | ||
|
|
||
| } // namespace Bun | ||
|
|
||
| extern "C" const void* bun_icu_maybe_decompress(const void* p, int32_t* length) | ||
| { | ||
| if (!p) | ||
| return p; | ||
| uint32_t magic; | ||
| std::memcpy(&magic, p, sizeof(magic)); | ||
| if (magic != ZSTD_MAGICNUMBER) [[likely]] | ||
| return p; | ||
| return Bun::ICUDecompressor::get().decompress(p, length); | ||
| } | ||
|
|
||
| #endif // OS(LINUX) | ||
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.