diff --git a/velox/CMakeLists.txt b/velox/CMakeLists.txt index 45d6a454054..b63feee4517 100644 --- a/velox/CMakeLists.txt +++ b/velox/CMakeLists.txt @@ -26,6 +26,7 @@ add_subdirectory(external/date) add_subdirectory(external/tzdb) add_subdirectory(external/md5) add_subdirectory(external/hdfs) +add_subdirectory(external/theta) # # examples depend on expression diff --git a/velox/external/theta/BinomialBounds.h b/velox/external/theta/BinomialBounds.h new file mode 100644 index 00000000000..53c561552eb --- /dev/null +++ b/velox/external/theta/BinomialBounds.h @@ -0,0 +1,1032 @@ +/* +* Copyright (c) Facebook, Inc. and its affiliates. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. + */ + +// Adapted from Apache DataSketches + +#pragma once + +#include "velox/common/base/Exceptions.h" +#include +#include +#include + +/* + * This class enables the estimation of error bounds given a sample set size, + * the sampling probability theta, the number of standard deviations and a + * simple noDataSeen flag. This can be used to estimate error bounds for fixed + * threshold sampling as well as the error bounds calculations for sketches. + * + * author Alexander Saydakov + * author Lee Rhodes + * author Kevin Lang + */ + +namespace facebook::velox::common::theta { + +static constexpr double deltaOfNumStdDevs[] = { + 0.5000000000000000000, // not actually using this value + 0.1586553191586026479, + 0.0227502618904135701, + 0.0013498126861731796}; + +static constexpr double lbEquivTable[] = { + 1.0, + 2.0, + 3.0, // fake values for k = 0 + 0.78733703534118149, + 3.14426768537558132, + 13.56789685109913535, // k = 1 + 0.94091379266077979, + 2.64699271711145911, + 6.29302733018320737, // k = 2 + 0.96869128474958188, + 2.46531676590527127, + 4.97375283467403051, // k = 3 + 0.97933572521046131, + 2.37418810664669877, + 4.44899975481712318, // k = 4 + 0.98479165917274258, + 2.31863116255024693, + 4.16712379778553554, // k = 5 + 0.98806033915698777, + 2.28075536565225434, + 3.99010556144099837, // k = 6 + 0.99021896790580399, + 2.25302005857281529, + 3.86784477136922078, // k = 7 + 0.99174267079089873, + 2.23168103978522936, + 3.77784896945266269, // k = 8 + 0.99287147837287648, + 2.21465899260871879, + 3.70851932988722410, // k = 9 + 0.99373900046805375, + 2.20070155496262032, + 3.65326029076638292, // k = 10 + 0.99442519013851438, + 2.18900651202670815, + 3.60803817612955413, // k = 11 + 0.99498066823221620, + 2.17903457780744247, + 3.57024330407946877, // k = 12 + 0.99543899410224412, + 2.17040883161922693, + 3.53810982030634591, // k = 13 + 0.99582322541263579, + 2.16285726913676513, + 3.51039837124298515, // k = 14 + 0.99614973311747690, + 2.15617827879603396, + 3.48621230377099778, // k = 15 + 0.99643042892560629, + 2.15021897666090922, + 3.46488605693562590, // k = 16 + 0.99667418783778317, + 2.14486114872480016, + 3.44591466064832730, // k = 17 + 0.99688774875812669, + 2.14001181420209718, + 3.42890765690452781, // k = 18 + 0.99707632299691795, + 2.13559675336844634, + 3.41355809420343803, // k = 19 + 0.99724399084971083, + 2.13155592217421486, + 3.39962113251016262, // k = 20 + 0.99739400151915447, + 2.12784018863251845, + 3.38689892877548004, // k = 21 + 0.99752896842633731, + 2.12440890875851096, + 3.37522975271599535, // k = 22 + 0.99765101725122918, + 2.12122815311133195, + 3.36448003577621080, // k = 23 + 0.99776189496810730, + 2.11826934724291505, + 3.35453840911279144, // k = 24 + 0.99786304821586214, + 2.11550823850916458, + 3.34531123809287578, // k = 25 + 0.99795568665180667, + 2.11292409529477254, + 3.33671916527694634, // k = 26 + 0.99804083063483517, + 2.11049908609763293, + 3.32869446834217797, // k = 27 + 0.99811933910984862, + 2.10821776918189130, + 3.32117898316676019, // k = 28 + 0.99819195457286014, + 2.10606671027090897, + 3.31412243534683171, // k = 29 + 0.99825930555178388, + 2.10403415237001923, + 3.30748113008135647, // k = 30 + 0.99832193858154028, + 2.10210975877822648, + 3.30121691946897045, // k = 31 + 0.99838032666573895, + 2.10028440670842542, + 3.29529629751144171, // k = 32 + 0.99843488390555990, + 2.09855000145353188, + 3.28968974413223236, // k = 33 + 0.99848596721417948, + 2.09689934193824001, + 3.28437111460505093, // k = 34 + 0.99853390005924325, + 2.09532599155502908, + 3.27931717312372939, // k = 35 + 0.99857895741078551, + 2.09382418262592296, + 3.27450718840060517, // k = 36 + 0.99862138880970974, + 2.09238872751677718, + 3.26992261182860489, // k = 37 + 0.99866141580770318, + 2.09101494715108061, + 3.26554677962434425, // k = 38 + 0.99869923565267982, + 2.08969860402822860, + 3.26136468165239535, // k = 39 + 0.99873502010169091, + 2.08843585627218431, + 3.25736275677081721, // k = 40 + 0.99876893292508839, + 2.08722321436752623, + 3.25352872241415980, // k = 41 + 0.99880111078502409, + 2.08605749165553789, + 3.24985141664350863, // k = 42 + 0.99883168573342118, + 2.08493577529222307, + 3.24632068399498053, // k = 43 + 0.99886077231613513, + 2.08385540129560809, + 3.24292724848112357, // k = 44 + 0.99888847451828155, + 2.08281392374021834, + 3.23966263299664092, // k = 45 + 0.99891488795844907, + 2.08180908991394631, + 3.23651906111521726, // k = 46 + 0.99894010085196783, + 2.08083882998420222, + 3.23348939240611344, // k = 47 + 0.99896419358239541, + 2.07990122528650545, + 3.23056705515594444, // k = 48 + 0.99898723510594323, + 2.07899450946285924, + 3.22774598963252402, // k = 49 + 0.99900929266780736, + 2.07811704477046533, + 3.22502059972006805, // k = 50 + 0.99903043086155208, + 2.07726730587160091, + 3.22238570890294795, // k = 51 + 0.99905070073845081, + 2.07644388314946582, + 3.21983651940365689, // k = 52 + 0.99907015770423868, + 2.07564546080757850, + 3.21736857351049821, // k = 53 + 0.99908884779227947, + 2.07487081196367740, + 3.21497773796417619, // k = 54 + 0.99910681586905525, + 2.07411879634256024, + 3.21266015316183484, // k = 55 + 0.99912410177549305, + 2.07338834403498140, + 3.21041222805715165, // k = 56 + 0.99914074347179849, + 2.07267845454973099, + 3.20823061166797174, // k = 57 + 0.99915677607464204, + 2.07198819052374006, + 3.20611216970604573, // k = 58 + 0.99917223149395795, + 2.07131667846186929, + 3.20405396962596001, // k = 59 + 0.99918714153457699, + 2.07066309019154460, + 3.20205326110445299, // k = 60 + 0.99920153247185794, + 2.07002665203046377, + 3.20010746990493544, // k = 61 + 0.99921543193525508, + 2.06940663431663552, + 3.19821417453343315, // k = 62 + 0.99922886570365677, + 2.06880235245998279, + 3.19637109973109546, // k = 63 + 0.99924185357357942, + 2.06821315729285971, + 3.19457610621114441, // k = 64 + 0.99925441845175555, + 2.06763843812092318, + 3.19282717869864996, // k = 65 + 0.99926658263325407, + 2.06707761824370095, + 3.19112241228646099, // k = 66 + 0.99927836173816331, + 2.06653015295219689, + 3.18946001739936946, // k = 67 + 0.99928977431994781, + 2.06599552505539918, + 3.18783829446098821, // k = 68 + 0.99930083753795884, + 2.06547324585920933, + 3.18625564538041317, // k = 69 + 0.99931156864562354, + 2.06496285191821016, + 3.18471055124089730, // k = 70 + 0.99932197985521043, + 2.06446390392778767, + 3.18320157510865442, // k = 71 + 0.99933208559809827, + 2.06397598606787369, + 3.18172735837393361, // k = 72 + 0.99934190032416836, + 2.06349869971447220, + 3.18028661102792398, // k = 73 + 0.99935143390791836, + 2.06303166975550312, + 3.17887810481605015, // k = 74 + 0.99936070171270330, + 2.06257453607466346, + 3.17750067581857820, // k = 75 + 0.99936971103502970, + 2.06212696042919674, + 3.17615321728274580, // k = 76 + 0.99937847392385493, + 2.06168861430600714, + 3.17483467831510779, // k = 77 + 0.99938700168914352, + 2.06125918927764928, + 3.17354405480557489, // k = 78 + 0.99939530099953799, + 2.06083838987589729, + 3.17228039269048168, // k = 79 + 0.99940338278830154, + 2.06042593411496000, + 3.17104278166036124, // k = 80 + 0.99941125463777780, + 2.06002155276328835, + 3.16983035274597569, // k = 81 + 0.99941892470027938, + 2.05962498741951094, + 3.16864227952240185, // k = 82 + 0.99942640059737187, + 2.05923599161263837, + 3.16747776846497686, // k = 83 + 0.99943368842187397, + 2.05885433061945378, + 3.16633606416374391, // k = 84 + 0.99944079790603269, + 2.05847977868873500, + 3.16521644518826406, // k = 85 + 0.99944773295734990, + 2.05811212058944193, + 3.16411821883858124, // k = 86 + 0.99945450059186669, + 2.05775114781260982, + 3.16304072400711789, // k = 87 + 0.99946110646314423, + 2.05739666442039493, + 3.16198332650733960, // k = 88 + 0.99946755770463369, + 2.05704847678819647, + 3.16094541781455973, // k = 89 + 0.99947385746861528, + 2.05670640500335367, + 3.15992641851471490, // k = 90 + 0.99948001256305474, + 2.05637027420314666, + 3.15892576988736096, // k = 91 + 0.99948602689656241, + 2.05603991286400856, + 3.15794293484717059, // k = 92 + 0.99949190674294641, + 2.05571516158917689, + 3.15697740043813724, // k = 93 + 0.99949765436329585, + 2.05539586490317561, + 3.15602867309343083, // k = 94 + 0.99950327557880314, + 2.05508187237845164, + 3.15509627710042651, // k = 95 + 0.99950877461972709, + 2.05477304104951486, + 3.15417975753007340, // k = 96 + 0.99951415481862682, + 2.05446923022574879, + 3.15327867462917766, // k = 97 + 0.99951942042375208, + 2.05417030908833453, + 3.15239260700215596, // k = 98 + 0.99952457390890004, + 2.05387614661762541, + 3.15152114915238712, // k = 99 + 0.99952962005008317, + 2.05358662050909402, + 3.15066390921020911, // k = 100 + 0.99953456216121594, + 2.05330161104427589, + 3.14982051097524618, // k = 101 + 0.99953940176368405, + 2.05302100378725072, + 3.14899059183684926, // k = 102 + 0.99954414373920031, + 2.05274468493067275, + 3.14817379948561893, // k = 103 + 0.99954879047621148, + 2.05247255013657082, + 3.14736979964868624, // k = 104 + 0.99955334485656522, + 2.05220449388099269, + 3.14657826610371671, // k = 105 + 0.99955780993869325, + 2.05194041831310869, + 3.14579888316276879, // k = 106 + 0.99956218652590678, + 2.05168022402710903, + 3.14503134811607765, // k = 107 + 0.99956647932785359, + 2.05142381889103831, + 3.14427536967733090, // k = 108 + 0.99957069025060719, + 2.05117111251445294, + 3.14353066260227365, // k = 109 + 0.99957482032178291, + 2.05092201793428330, + 3.14279695558593630, // k = 110 + 0.99957887261450651, + 2.05067645094720774, + 3.14207398336887422, // k = 111 + 0.99958284988383639, + 2.05043432833224415, + 3.14136149076028914, // k = 112 + 0.99958675435604505, + 2.05019557189746138, + 3.14065923143530767, // k = 113 + 0.99959058650074439, + 2.04996010556124020, + 3.13996696426707445, // k = 114 + 0.99959434898201494, + 2.04972785368377686, + 3.13928445867830419, // k = 115 + 0.99959804437042976, + 2.04949874512311681, + 3.13861149103462367, // k = 116 + 0.99960167394553423, + 2.04927271043337100, + 3.13794784369528656, // k = 117 + 0.99960523957651048, + 2.04904968140490951, + 3.13729330661277572, // k = 118 + 0.99960874253329735, + 2.04882959397491504, + 3.13664767767019725, // k = 119 + 0.99961218434327748, + 2.04861238220240693, + 3.13601075688413289 // k = 120 +}; + +static constexpr double ubEquivTable[] = { + 1.0, + 2.0, + 3.0, // fake values for k = 0 + 0.99067760836669549, + 1.75460517119302040, + 2.48055626001627161, // k = 1 + 0.99270518097577565, + 1.78855957509907171, + 2.53863835259832626, // k = 2 + 0.99402032633599902, + 1.81047286499563143, + 2.57811676180597260, // k = 3 + 0.99492607629539975, + 1.82625928017762362, + 2.60759550546498531, // k = 4 + 0.99558653966013821, + 1.83839160339161367, + 2.63086812358551470, // k = 5 + 0.99608981951632813, + 1.84812399034444752, + 2.64993712523727254, // k = 6 + 0.99648648035983456, + 1.85617372053235385, + 2.66598485907860550, // k = 7 + 0.99680750790483330, + 1.86298655802610824, + 2.67976541374471822, // k = 8 + 0.99707292880049181, + 1.86885682585270274, + 2.69178781407745760, // k = 9 + 0.99729614928489241, + 1.87398826101983218, + 2.70241106542158604, // k = 10 + 0.99748667952445658, + 1.87852708449801753, + 2.71189717290596377, // k = 11 + 0.99765127712748836, + 1.88258159501103250, + 2.72044290303773550, // k = 12 + 0.99779498340305395, + 1.88623391878036273, + 2.72819957382063194, // k = 13 + 0.99792160418357412, + 1.88954778748873764, + 2.73528576807902368, // k = 14 + 0.99803398604944960, + 1.89257337682371940, + 2.74179612106766513, // k = 15 + 0.99813449883217231, + 1.89535099316557876, + 2.74780718300419835, // k = 16 + 0.99822494122659577, + 1.89791339232732525, + 2.75338173141955167, // k = 17 + 0.99830679915913834, + 1.90028752122407241, + 2.75857186416826039, // k = 18 + 0.99838117410831728, + 1.90249575897183831, + 2.76342117562634826, // k = 19 + 0.99844913407071090, + 1.90455689090418900, + 2.76796659454200267, // k = 20 + 0.99851147736424650, + 1.90648682834171268, + 2.77223944710058845, // k = 21 + 0.99856879856019987, + 1.90829917277082473, + 2.77626682032629901, // k = 22 + 0.99862183849734265, + 1.91000561415842185, + 2.78007199816156003, // k = 23 + 0.99867096266018507, + 1.91161621560812023, + 2.78367524259661536, // k = 24 + 0.99871656986212543, + 1.91313978579765376, + 2.78709435016625662, // k = 25 + 0.99875907577771272, + 1.91458400425526065, + 2.79034488416175463, // k = 26 + 0.99879885565047744, + 1.91595563175945927, + 2.79344064132371273, // k = 27 + 0.99883610756373287, + 1.91726064301425936, + 2.79639384757751941, // k = 28 + 0.99887095169674467, + 1.91850441099725799, + 2.79921543574803877, // k = 29 + 0.99890379414739527, + 1.91969155477030995, + 2.80191513182441554, // k = 30 + 0.99893466279047516, + 1.92082633358913313, + 2.80450167352080371, // k = 31 + 0.99896392088177777, + 1.92191254955568525, + 2.80698295731653502, // k = 32 + 0.99899147889385631, + 1.92295362479495680, + 2.80936614404217266, // k = 33 + 0.99901764688726757, + 1.92395267400968351, + 2.81165765979318394, // k = 34 + 0.99904238606342233, + 1.92491244978191389, + 2.81386337393604435, // k = 35 + 0.99906590152386343, + 1.92583552644848055, + 2.81598868034527072, // k = 36 + 0.99908829040739988, + 1.92672418013918900, + 2.81803841726804194, // k = 37 + 0.99910959420023460, + 1.92758051694144683, + 2.82001709302821268, // k = 38 + 0.99912996403594434, + 1.92840654943159961, + 2.82192875763732332, // k = 39 + 0.99914930224576892, + 1.92920397044028391, + 2.82377730628954282, // k = 40 + 0.99916781270195543, + 1.92997447498220254, + 2.82556612075063640, // k = 41 + 0.99918553179077207, + 1.93071949211818605, + 2.82729843191989971, // k = 42 + 0.99920250730914972, + 1.93144048613876862, + 2.82897728689417249, // k = 43 + 0.99921873345181211, + 1.93213870990595638, + 2.83060537017752267, // k = 44 + 0.99923435180002684, + 1.93281536508689555, + 2.83218527795750674, // k = 45 + 0.99924930425362390, + 1.93347145882316340, + 2.83371938965598247, // k = 46 + 0.99926370394567243, + 1.93410820221384938, + 2.83520990872793277, // k = 47 + 0.99927750755296074, + 1.93472643138986200, + 2.83665891945119597, // k = 48 + 0.99929082941537217, + 1.93532697329771963, + 2.83806833931606661, // k = 49 + 0.99930366295501472, + 1.93591074716263734, + 2.83943997143404658, // k = 50 + 0.99931598804721489, + 1.93647857274021362, + 2.84077557836653227, // k = 51 + 0.99932789059798210, + 1.93703110239354714, + 2.84207662106302905, // k = 52 + 0.99933946180485123, + 1.93756904936378760, + 2.84334468086129277, // k = 53 + 0.99935053819703512, + 1.93809302131219852, + 2.84458116874117195, // k = 54 + 0.99936126637970801, + 1.93860365411038060, + 2.84578731838604426, // k = 55 + 0.99937166229284458, + 1.93910149816429112, + 2.84696443486512862, // k = 56 + 0.99938169190727422, + 1.93958709548454067, + 2.84811369085281285, // k = 57 + 0.99939136927613959, + 1.94006085573701625, + 2.84923617230361970, // k = 58 + 0.99940074328745254, + 1.94052339623206649, + 2.85033291216254270, // k = 59 + 0.99940993070470086, + 1.94097508636855309, + 2.85140492437699322, // k = 60 + 0.99941868577388959, + 1.94141633372043998, + 2.85245314430358121, // k = 61 + 0.99942734443487780, + 1.94184757038001976, + 2.85347839582286156, // k = 62 + 0.99943556385736088, + 1.94226915100517772, + 2.85448160365493209, // k = 63 + 0.99944374522542034, + 1.94268143723749631, + 2.85546346373061510, // k = 64 + 0.99945159955424856, + 1.94308482059116727, + 2.85642486111805738, // k = 65 + 0.99945915301904620, + 1.94347956957849988, + 2.85736639994965458, // k = 66 + 0.99946660663832176, + 1.94386600964031686, + 2.85828887832701639, // k = 67 + 0.99947383703224091, + 1.94424436597356021, + 2.85919278275500233, // k = 68 + 0.99948075442870277, + 1.94461502153473020, + 2.86007887186090670, // k = 69 + 0.99948766082269458, + 1.94497821937304138, + 2.86094774077355396, // k = 70 + 0.99949422748713346, + 1.94533411296001191, + 2.86179981848076181, // k = 71 + 0.99950070756119658, + 1.94568300035135167, + 2.86263579405672886, // k = 72 + 0.99950704321753392, + 1.94602523449961495, + 2.86345610449197352, // k = 73 + 0.99951320334216121, + 1.94636083782822311, + 2.86426125541271404, // k = 74 + 0.99951920293474927, + 1.94669011080745236, + 2.86505169255406145, // k = 75 + 0.99952501670378524, + 1.94701327348536779, + 2.86582788270862920, // k = 76 + 0.99953071209267819, + 1.94733044372333097, + 2.86659027602854621, // k = 77 + 0.99953632734991515, + 1.94764180764266825, + 2.86733927778843167, // k = 78 + 0.99954171164873173, + 1.94794766430732125, + 2.86807526143834934, // k = 79 + 0.99954699274462655, + 1.94824807472994621, + 2.86879864789403882, // k = 80 + 0.99955216611081710, + 1.94854317889829076, + 2.86950970901679625, // k = 81 + 0.99955730019613043, + 1.94883320227168610, + 2.87020887436986527, // k = 82 + 0.99956213770650493, + 1.94911826561721568, + 2.87089648477021342, // k = 83 + 0.99956704264963037, + 1.94939848545763539, + 2.87157281693902178, // k = 84 + 0.99957166306481327, + 1.94967401618316671, + 2.87223821840905202, // k = 85 + 0.99957632713136491, + 1.94994497791333288, + 2.87289293193450135, // k = 86 + 0.99958087233392234, + 1.95021155752212394, + 2.87353731228213860, // k = 87 + 0.99958532555996271, + 1.95047376805584349, + 2.87417154907075201, // k = 88 + 0.99958956246481989, + 1.95073180380688882, + 2.87479599765507032, // k = 89 + 0.99959389351869277, + 1.95098572880579013, + 2.87541081987382086, // k = 90 + 0.99959807862052230, + 1.95123574036898617, + 2.87601637401948551, // k = 91 + 0.99960214057801977, + 1.95148186921983324, + 2.87661283691068093, // k = 92 + 0.99960607527256684, + 1.95172415829728152, + 2.87720042968334155, // k = 93 + 0.99960996433179616, + 1.95196280898670693, + 2.87777936649376898, // k = 94 + 0.99961379137860717, + 1.95219787713926962, + 2.87834989933620022, // k = 95 + 0.99961756088146103, + 1.95242944583677058, + 2.87891216133900230, // k = 96 + 0.99962125605327401, + 1.95265762420910960, + 2.87946647367488140, // k = 97 + 0.99962486179100551, + 1.95288245314810638, + 2.88001290210658567, // k = 98 + 0.99962843240297161, + 1.95310404286672679, + 2.88055166523392359, // k = 99 + 0.99963187276145504, + 1.95332251980147475, + 2.88108300006589957, // k = 100 + 0.99963525453173929, + 1.95353785898848287, + 2.88160703591438505, // k = 101 + 0.99963855412988778, + 1.95375019354571577, + 2.88212393551896184, // k = 102 + 0.99964190254169694, + 1.95395953472205974, + 2.88263389761985422, // k = 103 + 0.99964506565942202, + 1.95416607430155409, + 2.88313700661564098, // k = 104 + 0.99964834424233118, + 1.95436972855640079, + 2.88363350163803034, // k = 105 + 0.99965136548857458, + 1.95457068540693513, + 2.88412349413960101, // k = 106 + 0.99965436594726498, + 1.95476896383092935, + 2.88460710620208260, // k = 107 + 0.99965736463468602, + 1.95496457504532373, + 2.88508450078833789, // k = 108 + 0.99966034130443404, + 1.95515761150707590, + 2.88555580586194083, // k = 109 + 0.99966326130828520, + 1.95534810382198998, + 2.88602118761679094, // k = 110 + 0.99966601446035952, + 1.95553622237747504, + 2.88648066384146773, // k = 111 + 0.99966887679593697, + 1.95572186728168163, + 2.88693444915907094, // k = 112 + 0.99967161286551232, + 1.95590523410490391, + 2.88738271495714116, // k = 113 + 0.99967435412270333, + 1.95608626483223702, + 2.88782540459769166, // k = 114 + 0.99967701261934394, + 1.95626497627117146, + 2.88826277189363623, // k = 115 + 0.99967963265157778, + 1.95644153684824573, + 2.88869486674335008, // k = 116 + 0.99968216317182623, + 1.95661589936000269, + 2.88912184353694101, // k = 117 + 0.99968479674396349, + 1.95678821614791332, + 2.88954376359643561, // k = 118 + 0.99968729031337489, + 1.95695842061650183, + 2.88996069422501023, // k = 119 + 0.99968963358631413, + 1.95712651709766305, + 2.89037285320668502 // k = 120 +}; + +class BinomialBounds { + public: + static double getLowerBound( + unsigned long long numSamples, + double theta, + unsigned numStdDevs) { + checkTheta(theta); + checkNumStdDevs(numStdDevs); + const double estimate = numSamples / theta; + const double lb = + computeApproxBinomialLowerBound(numSamples, theta, numStdDevs); + return std::min(estimate, std::max(static_cast(numSamples), lb)); + } + + static double getUpperBound( + unsigned long long numSamples, + double theta, + unsigned numStdDevs) { + checkTheta(theta); + checkNumStdDevs(numStdDevs); + const double estimate = numSamples / theta; + const double ub = + computeApproxBinomialUpperBound(numSamples, theta, numStdDevs); + return std::max(estimate, ub); + } + + private: + // our "classic" bounds, but now with continuity correction + static double contClassicLb( + unsigned long long numSamples, + double theta, + double numStdDevs) { + const double n_hat = (numSamples - 0.5) / theta; + const double b = numStdDevs * std::sqrt((1.0 - theta) / theta); + const double d = 0.5 * b * std::sqrt((b * b) + (4.0 * n_hat)); + const double center = n_hat + (0.5 * (b * b)); + return (center - d); + } + + // our "classic" bounds, but now with continuity correction + static double contClassicUb( + unsigned long long numSamples, + double theta, + double numStdSevs) { + const double n_hat = (numSamples + 0.5) / theta; + const double b = numStdSevs * std::sqrt((1.0 - theta) / theta); + const double d = 0.5 * b * std::sqrt((b * b) + (4.0 * n_hat)); + const double center = n_hat + (0.5 * (b * b)); + return (center + d); + } + + // This is a special purpose calculator for NStar, using a computational + // strategy inspired by its Bayesian definition. It is only appropriate + // for a very limited set of inputs. However, the procedure + // compute_approx_binomial_lower_bound() below does in fact only call it for + // suitably limited inputs. Outside of this limited range, two different bad + // things will happen. First, because we are not using logarithms, the values + // of intermediate quantities will exceed the dynamic range of doubles. + // Second, even if that problem were fixed, the running time of this procedure + // is essentially linear in est = (numSamples / p), and that can be Very, Very + // Big. + static unsigned long long + specialNStar(unsigned long long numSamples, double p, double delta) { + const double q = 1.0 - p; + // Use a different algorithm if the following is true; this one will be too + // slow, or worse. + if ((numSamples / p) >= 500.0) + throw VeloxRuntimeError( + __FILE__, + __LINE__, + __FUNCTION__, + "", + "out of range", + error_source::kErrorSourceUser, + error_code::kInvalidArgument, + false /*retriable*/); + double cur_term = + std::pow(p, numSamples); // curTerm = posteriorProbability (k, k, p) + if (cur_term <= 1e-100) + throw VeloxRuntimeError( + __FILE__, + __LINE__, + __FUNCTION__, + "", + "out of range", + error_source::kErrorSourceUser, + error_code::kInvalidArgument, + false /*retriable*/); // sanity check for non-use of logarithms + double tot = cur_term; + unsigned long long m = numSamples; + while (tot <= delta) { // this test can fail even the first time + cur_term = (cur_term * q * (m)) / ((m + 1) - numSamples); + tot += cur_term; + m += 1; + } + // we have reached a state where tot > delta, so back up one + return (m - 1); + } + + // The following procedure has very limited applicability. + // The above remarks about special_n_star() also apply here. + static unsigned long long + specialNPrimeB(unsigned long long numSamples, double p, double delta) { + const double q = 1.0 - p; + const double oneMinusDelta = 1.0 - delta; + double curTerm = + std::pow(p, numSamples); // curTerm = posteriorProbability (k, k, p) + if (curTerm <= 1e-100) + throw VeloxRuntimeError( + __FILE__, + __LINE__, + __FUNCTION__, + "", + "out of range", + error_source::kErrorSourceUser, + error_code::kInvalidArgument, + false /*retriable*/); // sanity check for non-use of logarithms + double tot = curTerm; + unsigned long long m = numSamples; + while (tot < oneMinusDelta) { + curTerm = (curTerm * q * (m)) / ((m + 1) - numSamples); + tot += curTerm; + m += 1; + } + return m; // no need to back up + } + + static unsigned long long + specialNPrimeF(unsigned long long numSamples, double p, double delta) { + // Use a different algorithm if the following is true; this one will be too + // slow, or worse. + if ((numSamples / p) >= 500.0) + throw VeloxRuntimeError( + __FILE__, + __LINE__, + __FUNCTION__, + "", + "out of range", + error_source::kErrorSourceUser, + error_code::kInvalidArgument, + false /*retriable*/); // A super-small delta could also make it slow. + return specialNPrimeB(numSamples + 1, p, delta); + } + + // The following computes an approximation to the lower bound of a Frequentist + // confidence interval based on the tails of the Binomial distribution. + static double computeApproxBinomialLowerBound( + unsigned long long numSamples, + double theta, + unsigned numStdDevs) { + if (theta == 1) + return static_cast(numSamples); + if (numSamples == 0) + return 0; + if (numSamples == 1) { + const double delta = deltaOfNumStdDevs[numStdDevs]; + const double rawLb = std::log(1 - delta) / std::log(1 - theta); + return std::floor(rawLb); // round down + } + if (numSamples > 120) { + // plenty of samples, so gaussian approximation to binomial distribution + // isn't too bad + const double rawLb = contClassicLb(numSamples, theta, numStdDevs); + return (rawLb - 0.5); // fake round down + } + // at this point we know 2 <= num_samples <= 120 + if (theta > (1 - 1e-5)) { // empirically-determined threshold + return static_cast(numSamples); + } + if (theta < (numSamples / 360.0)) { // empirically-determined threshold + // here we use the Gaussian approximation, but with a modified + // num_std_devs + const unsigned index = + 3 * static_cast(numSamples) + (numStdDevs - 1); + const double rawLb = + contClassicLb(numSamples, theta, lbEquivTable[index]); + return rawLb - 0.5; // fake round down + } + // This is the most difficult range to approximate; we will compute an + // "exact" LB. We know that est <= 360, so specialNStar() shouldn't be + // ridiculously slow. + const double delta = deltaOfNumStdDevs[numStdDevs]; + return static_cast( + specialNStar(numSamples, theta, delta)); // no need to round + } + + // The following computes an approximation to the upper bound of a Frequentist + // confidence interval based on the tails of the Binomial distribution. + static double computeApproxBinomialUpperBound( + unsigned long long numSamples, + double theta, + unsigned numStdDevs) { + if (theta == 1) + return static_cast(numSamples); + if (numSamples == 0) { + const double delta = deltaOfNumStdDevs[numStdDevs]; + const double raw_ub = std::log(delta) / std::log(1 - theta); + return std::ceil(raw_ub); // round up + } + if (numSamples > 120) { + // plenty of samples, so gaussian approximation to binomial distribution + // isn't too bad + const double raw_ub = contClassicUb(numSamples, theta, numStdDevs); + return (raw_ub + 0.5); // fake round up + } + // at this point we know 2 <= num_samples <= 120 + if (theta > (1 - 1e-5)) { // empirically-determined threshold + return static_cast(numSamples + 1); + } + if (theta < (numSamples / 360.0)) { // empirically-determined threshold + // here we use the Gaussian approximation, but with a modified + // num_std_devs + const unsigned index = + 3 * static_cast(numSamples) + (numStdDevs - 1); + const double raw_ub = + contClassicUb(numSamples, theta, ubEquivTable[index]); + return raw_ub + 0.5; // fake round up + } + // This is the most difficult range to approximate; we will compute an + // "exact" UB. We know that est <= 360, so specialNPrimeF() shouldn't be + // ridiculously slow. + const double delta = deltaOfNumStdDevs[numStdDevs]; + return static_cast( + specialNPrimeF(numSamples, theta, delta)); // no need to round + } + + static void checkTheta(double theta) { + if (theta < 0 || theta > 1) { + throw VeloxUserError( + __FILE__, + __LINE__, + __FUNCTION__, + "", + "theta must be in [0, 1]", + error_source::kErrorSourceUser, + error_code::kInvalidArgument, + false /*retriable*/); + } + } + + static void checkNumStdDevs(unsigned numStdDevs) { + if (numStdDevs < 1 || numStdDevs > 3) { + throw VeloxUserError( + __FILE__, + __LINE__, + __FUNCTION__, + "", + "num_std_devs must be 1, 2 or 3", + error_source::kErrorSourceUser, + error_code::kInvalidArgument, + false /*retriable*/); + } + } +}; + +} // namespace facebook::velox::common::theta diff --git a/velox/external/theta/BitPacking.h b/velox/external/theta/BitPacking.h new file mode 100644 index 00000000000..e964e1316a8 --- /dev/null +++ b/velox/external/theta/BitPacking.h @@ -0,0 +1,6552 @@ +/* +* Copyright (c) Facebook, Inc. and its affiliates. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. + */ + +// Adapted from Apache DataSketches + +#pragma once + +#include "velox/common/base/Exceptions.h" + +#include +#include + +namespace facebook::velox::common::theta { + +static inline uint8_t +packBits(uint64_t value, uint8_t bits, uint8_t*& ptr, uint8_t offset) { + if (offset > 0) { + const uint8_t chunkBits = 8 - offset; + const uint8_t mask = (1 << chunkBits) - 1; + if (bits < chunkBits) { + *ptr |= (value << (chunkBits - bits)) & mask; + return offset + bits; + } + *ptr++ |= (value >> (bits - chunkBits)) & mask; + bits -= chunkBits; + } + while (bits >= 8) { + *ptr++ = static_cast(value >> (bits - 8)); + bits -= 8; + } + if (bits > 0) { + *ptr = static_cast(value << (8 - bits)); + return bits; + } + return 0; +} + +static inline uint8_t +unpackBits(uint64_t& value, uint8_t bits, const uint8_t*& ptr, uint8_t offset) { + const uint8_t availBits = 8 - offset; + const uint8_t chunkBits = std::min(availBits, bits); + const uint8_t mask = (1 << chunkBits) - 1; + value = (*ptr >> (availBits - chunkBits)) & mask; + ptr += availBits == chunkBits; + offset = (offset + chunkBits) & 7; + bits -= chunkBits; + while (bits >= 8) { + value <<= 8; + value |= *ptr++; + bits -= 8; + } + if (bits > 0) { + value <<= bits; + value |= *ptr >> (8 - bits); + return bits; + } + return offset; +} + +// pack given number of bits from a block of 8 64-bit values into bytes +// we don't need 0 and 64 bits +// we assume that higher bits (which we are not packing) are zeros +// this assumption allows to avoid masking operations + +static inline void packBits1(const uint64_t* values, uint8_t* ptr) { + *ptr = static_cast(values[0] << 7); + *ptr |= static_cast(values[1] << 6); + *ptr |= static_cast(values[2] << 5); + *ptr |= static_cast(values[3] << 4); + *ptr |= static_cast(values[4] << 3); + *ptr |= static_cast(values[5] << 2); + *ptr |= static_cast(values[6] << 1); + *ptr |= static_cast(values[7]); +} + +static inline void packBits2(const uint64_t* values, uint8_t* ptr) { + *ptr = static_cast(values[0] << 6); + *ptr |= static_cast(values[1] << 4); + *ptr |= static_cast(values[2] << 2); + *ptr++ |= static_cast(values[3]); + + *ptr = static_cast(values[4] << 6); + *ptr |= static_cast(values[5] << 4); + *ptr |= static_cast(values[6] << 2); + *ptr |= static_cast(values[7]); +} + +static inline void packBits3(const uint64_t* values, uint8_t* ptr) { + *ptr = static_cast(values[0] << 5); + *ptr |= static_cast(values[1] << 2); + *ptr++ |= static_cast(values[2] >> 1); + + *ptr = static_cast(values[2] << 7); + *ptr |= static_cast(values[3] << 4); + *ptr |= static_cast(values[4] << 1); + *ptr++ |= static_cast(values[5] >> 2); + + *ptr = static_cast(values[5] << 6); + *ptr |= static_cast(values[6] << 3); + *ptr |= static_cast(values[7]); +} + +static inline void packBits4(const uint64_t* values, uint8_t* ptr) { + *ptr = static_cast(values[0] << 4); + *ptr++ |= static_cast(values[1]); + + *ptr = static_cast(values[2] << 4); + *ptr++ |= static_cast(values[3]); + + *ptr = static_cast(values[4] << 4); + *ptr++ |= static_cast(values[5]); + + *ptr = static_cast(values[6] << 4); + *ptr |= static_cast(values[7]); +} + +static inline void packBits5(const uint64_t* values, uint8_t* ptr) { + *ptr = static_cast(values[0] << 3); + *ptr++ |= static_cast(values[1] >> 2); + + *ptr = static_cast(values[1] << 6); + *ptr |= static_cast(values[2] << 1); + *ptr++ |= static_cast(values[3] >> 4); + + *ptr = static_cast(values[3] << 4); + *ptr++ |= static_cast(values[4] >> 1); + + *ptr = static_cast(values[4] << 7); + *ptr |= static_cast(values[5] << 2); + *ptr++ |= static_cast(values[6] >> 3); + + *ptr = static_cast(values[6] << 5); + *ptr |= static_cast(values[7]); +} + +static inline void packBits6(const uint64_t* values, uint8_t* ptr) { + *ptr = static_cast(values[0] << 2); + *ptr++ |= static_cast(values[1] >> 4); + + *ptr = static_cast(values[1] << 4); + *ptr++ |= static_cast(values[2] >> 2); + + *ptr = static_cast(values[2] << 6); + *ptr++ |= static_cast(values[3]); + + *ptr = static_cast(values[4] << 2); + *ptr++ |= static_cast(values[5] >> 4); + + *ptr = static_cast(values[5] << 4); + *ptr++ |= static_cast(values[6] >> 2); + + *ptr = static_cast(values[6] << 6); + *ptr |= static_cast(values[7]); +} + +static inline void packBits7(const uint64_t* values, uint8_t* ptr) { + *ptr = static_cast(values[0] << 1); + *ptr++ |= static_cast(values[1] >> 6); + + *ptr = static_cast(values[1] << 2); + *ptr++ |= static_cast(values[2] >> 5); + + *ptr = static_cast(values[2] << 3); + *ptr++ |= static_cast(values[3] >> 4); + + *ptr = static_cast(values[3] << 4); + *ptr++ |= static_cast(values[4] >> 3); + + *ptr = static_cast(values[4] << 5); + *ptr++ |= static_cast(values[5] >> 2); + + *ptr = static_cast(values[5] << 6); + *ptr++ |= static_cast(values[6] >> 1); + + *ptr = static_cast(values[6] << 7); + *ptr |= static_cast(values[7]); +} + +static inline void packBits8(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0]); + *ptr++ = static_cast(values[1]); + *ptr++ = static_cast(values[2]); + *ptr++ = static_cast(values[3]); + *ptr++ = static_cast(values[4]); + *ptr++ = static_cast(values[5]); + *ptr++ = static_cast(values[6]); + *ptr = static_cast(values[7]); +} + +static inline void packBits9(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 1); + + *ptr = static_cast(values[0] << 7); + *ptr++ |= static_cast(values[1] >> 2); + + *ptr = static_cast(values[1] << 6); + *ptr++ |= static_cast(values[2] >> 3); + + *ptr = static_cast(values[2] << 5); + *ptr++ |= static_cast(values[3] >> 4); + + *ptr = static_cast(values[3] << 4); + *ptr++ |= static_cast(values[4] >> 5); + + *ptr = static_cast(values[4] << 3); + *ptr++ |= static_cast(values[5] >> 6); + + *ptr = static_cast(values[5] << 2); + *ptr++ |= static_cast(values[6] >> 7); + + *ptr = static_cast(values[6] << 1); + *ptr++ |= static_cast(values[7] >> 8); + + *ptr = static_cast(values[7]); +} + +static inline void packBits10(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 2); + + *ptr = static_cast(values[0] << 6); + *ptr++ |= static_cast(values[1] >> 4); + + *ptr = static_cast(values[1] << 4); + *ptr++ |= static_cast(values[2] >> 6); + + *ptr = static_cast(values[2] << 2); + *ptr++ |= static_cast(values[3] >> 8); + + *ptr++ = static_cast(values[3]); + + *ptr++ = static_cast(values[4] >> 2); + + *ptr = static_cast(values[4] << 6); + *ptr++ |= static_cast(values[5] >> 4); + + *ptr = static_cast(values[5] << 4); + *ptr++ |= static_cast(values[6] >> 6); + + *ptr = static_cast(values[6] << 2); + *ptr++ |= static_cast(values[7] >> 8); + + *ptr = static_cast(values[7]); +} + +static inline void packBits11(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 3); + + *ptr = static_cast(values[0] << 5); + *ptr++ |= static_cast(values[1] >> 6); + + *ptr = static_cast(values[1] << 2); + *ptr++ |= static_cast(values[2] >> 9); + + *ptr++ = static_cast(values[2] >> 1); + + *ptr = static_cast(values[2] << 7); + *ptr++ |= static_cast(values[3] >> 4); + + *ptr = static_cast(values[3] << 4); + *ptr++ |= static_cast(values[4] >> 7); + + *ptr = static_cast(values[4] << 1); + *ptr++ |= static_cast(values[5] >> 10); + + *ptr++ = static_cast(values[5] >> 2); + + *ptr = static_cast(values[5] << 6); + *ptr++ |= static_cast(values[6] >> 5); + + *ptr = static_cast(values[6] << 3); + *ptr++ |= static_cast(values[7] >> 8); + + *ptr = static_cast(values[7]); +} + +static inline void packBits12(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 4); + + *ptr = static_cast(values[0] << 4); + *ptr++ |= static_cast(values[1] >> 8); + + *ptr++ = static_cast(values[1]); + + *ptr++ = static_cast(values[2] >> 4); + + *ptr = static_cast(values[2] << 4); + *ptr++ |= static_cast(values[3] >> 8); + + *ptr++ = static_cast(values[3]); + + *ptr++ = static_cast(values[4] >> 4); + + *ptr = static_cast(values[4] << 4); + *ptr++ |= static_cast(values[5] >> 8); + + *ptr++ = static_cast(values[5]); + + *ptr++ = static_cast(values[6] >> 4); + + *ptr = static_cast(values[6] << 4); + *ptr++ |= static_cast(values[7] >> 8); + + *ptr = static_cast(values[7]); +} + +static inline void packBits13(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 5); + + *ptr = static_cast(values[0] << 3); + *ptr++ |= static_cast(values[1] >> 10); + + *ptr++ = static_cast(values[1] >> 2); + + *ptr = static_cast(values[1] << 6); + *ptr++ |= static_cast(values[2] >> 7); + + *ptr = static_cast(values[2] << 1); + *ptr++ |= static_cast(values[3] >> 12); + + *ptr++ = static_cast(values[3] >> 4); + + *ptr = static_cast(values[3] << 4); + *ptr++ |= static_cast(values[4] >> 9); + + *ptr++ = static_cast(values[4] >> 1); + + *ptr = static_cast(values[4] << 7); + *ptr++ |= static_cast(values[5] >> 6); + + *ptr = static_cast(values[5] << 2); + *ptr++ |= static_cast(values[6] >> 11); + + *ptr++ = static_cast(values[6] >> 3); + + *ptr = static_cast(values[6] << 5); + *ptr++ |= static_cast(values[7] >> 8); + + *ptr = static_cast(values[7]); +} + +static inline void packBits14(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 6); + + *ptr = static_cast(values[0] << 2); + *ptr++ |= static_cast(values[1] >> 12); + + *ptr++ = static_cast(values[1] >> 4); + + *ptr = static_cast(values[1] << 4); + *ptr++ |= static_cast(values[2] >> 10); + + *ptr++ = static_cast(values[2] >> 2); + + *ptr = static_cast(values[2] << 6); + *ptr++ |= static_cast(values[3] >> 8); + + *ptr++ = static_cast(values[3]); + + *ptr++ = static_cast(values[4] >> 6); + + *ptr = static_cast(values[4] << 2); + *ptr++ |= static_cast(values[5] >> 12); + + *ptr++ = static_cast(values[5] >> 4); + + *ptr = static_cast(values[5] << 4); + *ptr++ |= static_cast(values[6] >> 10); + + *ptr++ = static_cast(values[6] >> 2); + + *ptr = static_cast(values[6] << 6); + *ptr++ |= static_cast(values[7] >> 8); + + *ptr = static_cast(values[7]); +} + +static inline void packBits15(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 7); + + *ptr = static_cast(values[0] << 1); + *ptr++ |= static_cast(values[1] >> 14); + + *ptr++ = static_cast(values[1] >> 6); + + *ptr = static_cast(values[1] << 2); + *ptr++ |= static_cast(values[2] >> 13); + + *ptr++ = static_cast(values[2] >> 5); + + *ptr = static_cast(values[2] << 3); + *ptr++ |= static_cast(values[3] >> 12); + + *ptr++ = static_cast(values[3] >> 4); + + *ptr = static_cast(values[3] << 4); + *ptr++ |= static_cast(values[4] >> 11); + + *ptr++ = static_cast(values[4] >> 3); + + *ptr = static_cast(values[4] << 5); + *ptr++ |= static_cast(values[5] >> 10); + + *ptr++ = static_cast(values[5] >> 2); + + *ptr = static_cast(values[5] << 6); + *ptr++ |= static_cast(values[6] >> 9); + + *ptr++ = static_cast(values[6] >> 1); + + *ptr = static_cast(values[6] << 7); + *ptr++ |= static_cast(values[7] >> 8); + + *ptr = static_cast(values[7]); +} + +static inline void packBits16(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 8); + *ptr++ = static_cast(values[0]); + + *ptr++ = static_cast(values[1] >> 8); + *ptr++ = static_cast(values[1]); + + *ptr++ = static_cast(values[2] >> 8); + *ptr++ = static_cast(values[2]); + + *ptr++ = static_cast(values[3] >> 8); + *ptr++ = static_cast(values[3]); + + *ptr++ = static_cast(values[4] >> 8); + *ptr++ = static_cast(values[4]); + + *ptr++ = static_cast(values[5] >> 8); + *ptr++ = static_cast(values[5]); + + *ptr++ = static_cast(values[6] >> 8); + *ptr++ = static_cast(values[6]); + + *ptr++ = static_cast(values[7] >> 8); + *ptr = static_cast(values[7]); +} + +static inline void packBits17(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 9); + + *ptr++ = static_cast(values[0] >> 1); + + *ptr = static_cast(values[0] << 7); + *ptr++ |= static_cast(values[1] >> 10); + + *ptr++ = static_cast(values[1] >> 2); + + *ptr = static_cast(values[1] << 6); + *ptr++ |= static_cast(values[2] >> 11); + + *ptr++ = static_cast(values[2] >> 3); + + *ptr = static_cast(values[2] << 5); + *ptr++ |= static_cast(values[3] >> 12); + + *ptr++ = static_cast(values[3] >> 4); + + *ptr = static_cast(values[3] << 4); + *ptr++ |= static_cast(values[4] >> 13); + + *ptr++ = static_cast(values[4] >> 5); + + *ptr = static_cast(values[4] << 3); + *ptr++ |= static_cast(values[5] >> 14); + + *ptr++ = static_cast(values[5] >> 6); + + *ptr = static_cast(values[5] << 2); + *ptr++ |= static_cast(values[6] >> 15); + + *ptr++ = static_cast(values[6] >> 7); + + *ptr = static_cast(values[6] << 1); + *ptr++ |= static_cast(values[7] >> 16); + + *ptr++ = static_cast(values[7] >> 8); + + *ptr = static_cast(values[7]); +} + +static inline void packBits18(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 10); + + *ptr++ = static_cast(values[0] >> 2); + + *ptr = static_cast(values[0] << 6); + *ptr++ |= static_cast(values[1] >> 12); + + *ptr++ = static_cast(values[1] >> 4); + + *ptr = static_cast(values[1] << 4); + *ptr++ |= static_cast(values[2] >> 14); + + *ptr++ = static_cast(values[2] >> 6); + + *ptr = static_cast(values[2] << 2); + *ptr++ |= static_cast(values[3] >> 16); + + *ptr++ = static_cast(values[3] >> 8); + + *ptr++ = static_cast(values[3]); + + *ptr++ = static_cast(values[4] >> 10); + + *ptr++ = static_cast(values[4] >> 2); + + *ptr = static_cast(values[4] << 6); + *ptr++ |= static_cast(values[5] >> 12); + + *ptr++ = static_cast(values[5] >> 4); + + *ptr = static_cast(values[5] << 4); + *ptr++ |= static_cast(values[6] >> 14); + + *ptr++ = static_cast(values[6] >> 6); + + *ptr = static_cast(values[6] << 2); + *ptr++ |= static_cast(values[7] >> 16); + + *ptr++ = static_cast(values[7] >> 8); + + *ptr = static_cast(values[7]); +} + +static inline void packBits19(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 11); + + *ptr++ = static_cast(values[0] >> 3); + + *ptr = static_cast(values[0] << 5); + *ptr++ |= static_cast(values[1] >> 14); + + *ptr++ = static_cast(values[1] >> 6); + + *ptr = static_cast(values[1] << 2); + *ptr++ |= static_cast(values[2] >> 17); + + *ptr++ = static_cast(values[2] >> 9); + + *ptr++ = static_cast(values[2] >> 1); + + *ptr = static_cast(values[2] << 7); + *ptr++ |= static_cast(values[3] >> 12); + + *ptr++ = static_cast(values[3] >> 4); + + *ptr = static_cast(values[3] << 4); + *ptr++ |= static_cast(values[4] >> 15); + + *ptr++ |= static_cast(values[4] >> 7); + + *ptr = static_cast(values[4] << 1); + *ptr++ |= static_cast(values[5] >> 18); + + *ptr++ = static_cast(values[5] >> 10); + + *ptr++ = static_cast(values[5] >> 2); + + *ptr = static_cast(values[5] << 6); + *ptr++ |= static_cast(values[6] >> 13); + + *ptr++ = static_cast(values[6] >> 5); + + *ptr = static_cast(values[6] << 3); + *ptr++ |= static_cast(values[7] >> 16); + + *ptr++ = static_cast(values[7] >> 8); + + *ptr = static_cast(values[7]); +} + +static inline void packBits20(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 12); + + *ptr++ = static_cast(values[0] >> 4); + + *ptr = static_cast(values[0] << 4); + *ptr++ |= static_cast(values[1] >> 16); + + *ptr++ = static_cast(values[1] >> 8); + + *ptr++ = static_cast(values[1]); + + *ptr++ = static_cast(values[2] >> 12); + + *ptr++ = static_cast(values[2] >> 4); + + *ptr = static_cast(values[2] << 4); + *ptr++ |= static_cast(values[3] >> 16); + + *ptr++ = static_cast(values[3] >> 8); + + *ptr++ = static_cast(values[3]); + + *ptr++ = static_cast(values[4] >> 12); + + *ptr++ = static_cast(values[4] >> 4); + + *ptr = static_cast(values[4] << 4); + *ptr++ |= static_cast(values[5] >> 16); + + *ptr++ = static_cast(values[5] >> 8); + + *ptr++ = static_cast(values[5]); + + *ptr++ = static_cast(values[6] >> 12); + + *ptr++ = static_cast(values[6] >> 4); + + *ptr = static_cast(values[6] << 4); + *ptr++ |= static_cast(values[7] >> 16); + + *ptr++ = static_cast(values[7] >> 8); + + *ptr = static_cast(values[7]); +} + +static inline void packBits21(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 13); + + *ptr++ = static_cast(values[0] >> 5); + + *ptr = static_cast(values[0] << 3); + *ptr++ |= static_cast(values[1] >> 18); + + *ptr++ = static_cast(values[1] >> 10); + + *ptr++ = static_cast(values[1] >> 2); + + *ptr = static_cast(values[1] << 6); + *ptr++ |= static_cast(values[2] >> 15); + + *ptr++ = static_cast(values[2] >> 7); + + *ptr = static_cast(values[2] << 1); + *ptr++ |= static_cast(values[3] >> 20); + + *ptr++ = static_cast(values[3] >> 12); + + *ptr++ = static_cast(values[3] >> 4); + + *ptr = static_cast(values[3] << 4); + *ptr++ |= static_cast(values[4] >> 17); + + *ptr++ = static_cast(values[4] >> 9); + + *ptr++ = static_cast(values[4] >> 1); + + *ptr = static_cast(values[4] << 7); + *ptr++ |= static_cast(values[5] >> 14); + + *ptr++ = static_cast(values[5] >> 6); + + *ptr = static_cast(values[5] << 2); + *ptr++ |= static_cast(values[6] >> 19); + + *ptr++ = static_cast(values[6] >> 11); + + *ptr++ = static_cast(values[6] >> 3); + + *ptr = static_cast(values[6] << 5); + *ptr++ |= static_cast(values[7] >> 16); + + *ptr++ = static_cast(values[7] >> 8); + + *ptr = static_cast(values[7]); +} + +static inline void packBits22(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 14); + + *ptr++ = static_cast(values[0] >> 6); + + *ptr = static_cast(values[0] << 2); + *ptr++ |= static_cast(values[1] >> 20); + + *ptr++ = static_cast(values[1] >> 12); + + *ptr++ = static_cast(values[1] >> 4); + + *ptr = static_cast(values[1] << 4); + *ptr++ |= static_cast(values[2] >> 18); + + *ptr++ = static_cast(values[2] >> 10); + + *ptr++ = static_cast(values[2] >> 2); + + *ptr = static_cast(values[2] << 6); + *ptr++ |= static_cast(values[3] >> 16); + + *ptr++ = static_cast(values[3] >> 8); + + *ptr++ = static_cast(values[3]); + + *ptr++ = static_cast(values[4] >> 14); + + *ptr++ = static_cast(values[4] >> 6); + + *ptr = static_cast(values[4] << 2); + *ptr++ |= static_cast(values[5] >> 20); + + *ptr++ = static_cast(values[5] >> 12); + + *ptr++ = static_cast(values[5] >> 4); + + *ptr = static_cast(values[5] << 4); + *ptr++ |= static_cast(values[6] >> 18); + + *ptr++ = static_cast(values[6] >> 10); + + *ptr++ = static_cast(values[6] >> 2); + + *ptr = static_cast(values[6] << 6); + *ptr++ |= static_cast(values[7] >> 16); + + *ptr++ = static_cast(values[7] >> 8); + + *ptr = static_cast(values[7]); +} + +static inline void packBits23(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 15); + + *ptr++ = static_cast(values[0] >> 7); + + *ptr = static_cast(values[0] << 1); + *ptr++ |= static_cast(values[1] >> 22); + + *ptr++ = static_cast(values[1] >> 14); + + *ptr++ = static_cast(values[1] >> 6); + + *ptr = static_cast(values[1] << 2); + *ptr++ |= static_cast(values[2] >> 21); + + *ptr++ = static_cast(values[2] >> 13); + + *ptr++ = static_cast(values[2] >> 5); + + *ptr = static_cast(values[2] << 3); + *ptr++ |= static_cast(values[3] >> 20); + + *ptr++ = static_cast(values[3] >> 12); + + *ptr++ = static_cast(values[3] >> 4); + + *ptr = static_cast(values[3] << 4); + *ptr++ |= static_cast(values[4] >> 19); + + *ptr++ = static_cast(values[4] >> 11); + + *ptr++ = static_cast(values[4] >> 3); + + *ptr = static_cast(values[4] << 5); + *ptr++ |= static_cast(values[5] >> 18); + + *ptr++ = static_cast(values[5] >> 10); + + *ptr++ = static_cast(values[5] >> 2); + + *ptr = static_cast(values[5] << 6); + *ptr++ |= static_cast(values[6] >> 17); + + *ptr++ = static_cast(values[6] >> 9); + + *ptr++ = static_cast(values[6] >> 1); + + *ptr = static_cast(values[6] << 7); + *ptr++ |= static_cast(values[7] >> 16); + + *ptr++ = static_cast(values[7] >> 8); + + *ptr = static_cast(values[7]); +} + +static inline void packBits24(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 16); + *ptr++ = static_cast(values[0] >> 8); + *ptr++ = static_cast(values[0]); + + *ptr++ = static_cast(values[1] >> 16); + *ptr++ = static_cast(values[1] >> 8); + *ptr++ = static_cast(values[1]); + + *ptr++ = static_cast(values[2] >> 16); + *ptr++ = static_cast(values[2] >> 8); + *ptr++ = static_cast(values[2]); + + *ptr++ = static_cast(values[3] >> 16); + *ptr++ = static_cast(values[3] >> 8); + *ptr++ = static_cast(values[3]); + + *ptr++ = static_cast(values[4] >> 16); + *ptr++ = static_cast(values[4] >> 8); + *ptr++ = static_cast(values[4]); + + *ptr++ = static_cast(values[5] >> 16); + *ptr++ = static_cast(values[5] >> 8); + *ptr++ = static_cast(values[5]); + + *ptr++ = static_cast(values[6] >> 16); + *ptr++ = static_cast(values[6] >> 8); + *ptr++ = static_cast(values[6]); + + *ptr++ = static_cast(values[7] >> 16); + *ptr++ = static_cast(values[7] >> 8); + *ptr = static_cast(values[7]); +} + +static inline void packBits25(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 17); + + *ptr++ = static_cast(values[0] >> 9); + + *ptr++ = static_cast(values[0] >> 1); + + *ptr = static_cast(values[0] << 7); + *ptr++ |= static_cast(values[1] >> 18); + + *ptr++ = static_cast(values[1] >> 10); + + *ptr++ = static_cast(values[1] >> 2); + + *ptr = static_cast(values[1] << 6); + *ptr++ |= static_cast(values[2] >> 19); + + *ptr++ = static_cast(values[2] >> 11); + + *ptr++ = static_cast(values[2] >> 3); + + *ptr = static_cast(values[2] << 5); + *ptr++ |= static_cast(values[3] >> 20); + + *ptr++ = static_cast(values[3] >> 12); + + *ptr++ = static_cast(values[3] >> 4); + + *ptr = static_cast(values[3] << 4); + *ptr++ |= static_cast(values[4] >> 21); + + *ptr++ = static_cast(values[4] >> 13); + + *ptr++ = static_cast(values[4] >> 5); + + *ptr = static_cast(values[4] << 3); + *ptr++ |= static_cast(values[5] >> 22); + + *ptr++ = static_cast(values[5] >> 14); + + *ptr++ = static_cast(values[5] >> 6); + + *ptr = static_cast(values[5] << 2); + *ptr++ |= static_cast(values[6] >> 23); + + *ptr++ = static_cast(values[6] >> 15); + + *ptr++ = static_cast(values[6] >> 7); + + *ptr = static_cast(values[6] << 1); + *ptr++ |= static_cast(values[7] >> 24); + + *ptr++ = static_cast(values[7] >> 16); + + *ptr++ = static_cast(values[7] >> 8); + + *ptr = static_cast(values[7]); +} + +static inline void packBits26(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 18); + + *ptr++ = static_cast(values[0] >> 10); + + *ptr++ = static_cast(values[0] >> 2); + + *ptr = static_cast(values[0] << 6); + *ptr++ |= static_cast(values[1] >> 20); + + *ptr++ = static_cast(values[1] >> 12); + + *ptr++ = static_cast(values[1] >> 4); + + *ptr = static_cast(values[1] << 4); + *ptr++ |= static_cast(values[2] >> 22); + + *ptr++ = static_cast(values[2] >> 14); + + *ptr++ = static_cast(values[2] >> 6); + + *ptr = static_cast(values[2] << 2); + *ptr++ |= static_cast(values[3] >> 24); + + *ptr++ = static_cast(values[3] >> 16); + + *ptr++ = static_cast(values[3] >> 8); + + *ptr++ = static_cast(values[3]); + + *ptr++ = static_cast(values[4] >> 18); + + *ptr++ = static_cast(values[4] >> 10); + + *ptr++ = static_cast(values[4] >> 2); + + *ptr = static_cast(values[4] << 6); + *ptr++ |= static_cast(values[5] >> 20); + + *ptr++ = static_cast(values[5] >> 12); + + *ptr++ = static_cast(values[5] >> 4); + + *ptr = static_cast(values[5] << 4); + *ptr++ |= static_cast(values[6] >> 22); + + *ptr++ = static_cast(values[6] >> 14); + + *ptr++ = static_cast(values[6] >> 6); + + *ptr = static_cast(values[6] << 2); + *ptr++ |= static_cast(values[7] >> 24); + + *ptr++ = static_cast(values[7] >> 16); + + *ptr++ = static_cast(values[7] >> 8); + + *ptr = static_cast(values[7]); +} + +static inline void packBits27(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 19); + + *ptr++ = static_cast(values[0] >> 11); + + *ptr++ = static_cast(values[0] >> 3); + + *ptr = static_cast(values[0] << 5); + *ptr++ |= static_cast(values[1] >> 22); + + *ptr++ = static_cast(values[1] >> 14); + + *ptr++ = static_cast(values[1] >> 6); + + *ptr = static_cast(values[1] << 2); + *ptr++ |= static_cast(values[2] >> 25); + + *ptr++ = static_cast(values[2] >> 17); + + *ptr++ = static_cast(values[2] >> 9); + + *ptr++ = static_cast(values[2] >> 1); + + *ptr = static_cast(values[2] << 7); + *ptr++ |= static_cast(values[3] >> 20); + + *ptr++ = static_cast(values[3] >> 12); + + *ptr++ = static_cast(values[3] >> 4); + + *ptr = static_cast(values[3] << 4); + *ptr++ |= static_cast(values[4] >> 23); + + *ptr++ = static_cast(values[4] >> 15); + + *ptr++ = static_cast(values[4] >> 7); + + *ptr = static_cast(values[4] << 1); + *ptr++ |= static_cast(values[5] >> 26); + + *ptr++ = static_cast(values[5] >> 18); + + *ptr++ = static_cast(values[5] >> 10); + + *ptr++ = static_cast(values[5] >> 2); + + *ptr = static_cast(values[5] << 6); + *ptr++ |= static_cast(values[6] >> 21); + + *ptr++ = static_cast(values[6] >> 13); + + *ptr++ = static_cast(values[6] >> 5); + + *ptr = static_cast(values[6] << 3); + *ptr++ |= static_cast(values[7] >> 24); + + *ptr++ = static_cast(values[7] >> 16); + + *ptr++ = static_cast(values[7] >> 8); + + *ptr = static_cast(values[7]); +} + +static inline void packBits28(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 20); + *ptr++ = static_cast(values[0] >> 12); + *ptr++ = static_cast(values[0] >> 4); + *ptr = static_cast(values[0] << 4); + *ptr++ |= static_cast(values[1] >> 24); + *ptr++ = static_cast(values[1] >> 16); + *ptr++ = static_cast(values[1] >> 8); + *ptr++ = static_cast(values[1]); + *ptr++ = static_cast(values[2] >> 20); + *ptr++ = static_cast(values[2] >> 12); + *ptr++ = static_cast(values[2] >> 4); + *ptr = static_cast(values[2] << 4); + *ptr++ |= static_cast(values[3] >> 24); + *ptr++ = static_cast(values[3] >> 16); + *ptr++ = static_cast(values[3] >> 8); + *ptr++ = static_cast(values[3]); + *ptr++ = static_cast(values[4] >> 20); + *ptr++ = static_cast(values[4] >> 12); + *ptr++ = static_cast(values[4] >> 4); + *ptr = static_cast(values[4] << 4); + *ptr++ |= static_cast(values[5] >> 24); + *ptr++ = static_cast(values[5] >> 16); + *ptr++ = static_cast(values[5] >> 8); + *ptr++ = static_cast(values[5]); + *ptr++ = static_cast(values[6] >> 20); + *ptr++ = static_cast(values[6] >> 12); + *ptr++ = static_cast(values[6] >> 4); + *ptr = static_cast(values[6] << 4); + *ptr++ |= static_cast(values[7] >> 24); + *ptr++ = static_cast(values[7] >> 16); + *ptr++ = static_cast(values[7] >> 8); + *ptr = static_cast(values[7]); +} + +static inline void packBits29(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 21); + + *ptr++ = static_cast(values[0] >> 13); + + *ptr++ = static_cast(values[0] >> 5); + + *ptr = static_cast(values[0] << 3); + *ptr++ |= static_cast(values[1] >> 26); + + *ptr++ = static_cast(values[1] >> 18); + + *ptr++ = static_cast(values[1] >> 10); + + *ptr++ = static_cast(values[1] >> 2); + + *ptr = static_cast(values[1] << 6); + *ptr++ |= static_cast(values[2] >> 23); + + *ptr++ = static_cast(values[2] >> 15); + + *ptr++ = static_cast(values[2] >> 7); + + *ptr = static_cast(values[2] << 1); + *ptr++ |= static_cast(values[3] >> 28); + + *ptr++ = static_cast(values[3] >> 20); + + *ptr++ = static_cast(values[3] >> 12); + + *ptr++ = static_cast(values[3] >> 4); + + *ptr = static_cast(values[3] << 4); + *ptr++ |= static_cast(values[4] >> 25); + + *ptr++ = static_cast(values[4] >> 17); + + *ptr++ = static_cast(values[4] >> 9); + + *ptr++ = static_cast(values[4] >> 1); + + *ptr = static_cast(values[4] << 7); + *ptr++ |= static_cast(values[5] >> 22); + + *ptr++ = static_cast(values[5] >> 14); + + *ptr++ = static_cast(values[5] >> 6); + + *ptr = static_cast(values[5] << 2); + *ptr++ |= static_cast(values[6] >> 27); + + *ptr++ = static_cast(values[6] >> 19); + + *ptr++ = static_cast(values[6] >> 11); + + *ptr++ = static_cast(values[6] >> 3); + + *ptr = static_cast(values[6] << 5); + *ptr++ |= static_cast(values[7] >> 24); + + *ptr++ = static_cast(values[7] >> 16); + + *ptr++ = static_cast(values[7] >> 8); + + *ptr = static_cast(values[7]); +} + +static inline void packBits30(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 22); + *ptr++ = static_cast(values[0] >> 14); + *ptr++ = static_cast(values[0] >> 6); + + *ptr = static_cast(values[0] << 2); + *ptr++ |= static_cast(values[1] >> 28); + *ptr++ = static_cast(values[1] >> 20); + *ptr++ = static_cast(values[1] >> 12); + *ptr++ = static_cast(values[1] >> 4); + + *ptr = static_cast(values[1] << 4); + *ptr++ |= static_cast(values[2] >> 26); + *ptr++ = static_cast(values[2] >> 18); + *ptr++ = static_cast(values[2] >> 10); + *ptr++ = static_cast(values[2] >> 2); + + *ptr = static_cast(values[2] << 6); + *ptr++ |= static_cast(values[3] >> 24); + *ptr++ = static_cast(values[3] >> 16); + *ptr++ = static_cast(values[3] >> 8); + *ptr++ = static_cast(values[3]); + + *ptr++ = static_cast(values[4] >> 22); + *ptr++ = static_cast(values[4] >> 14); + *ptr++ = static_cast(values[4] >> 6); + + *ptr = static_cast(values[4] << 2); + *ptr++ |= static_cast(values[5] >> 28); + *ptr++ = static_cast(values[5] >> 20); + *ptr++ = static_cast(values[5] >> 12); + *ptr++ = static_cast(values[5] >> 4); + + *ptr = static_cast(values[5] << 4); + *ptr++ |= static_cast(values[6] >> 26); + *ptr++ = static_cast(values[6] >> 18); + *ptr++ = static_cast(values[6] >> 10); + *ptr++ = static_cast(values[6] >> 2); + + *ptr = static_cast(values[6] << 6); + *ptr++ |= static_cast(values[7] >> 24); + *ptr++ = static_cast(values[7] >> 16); + *ptr++ = static_cast(values[7] >> 8); + *ptr = static_cast(values[7]); +} + +static inline void packBits31(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 23); + *ptr++ = static_cast(values[0] >> 15); + *ptr++ = static_cast(values[0] >> 7); + + *ptr = static_cast(values[0] << 1); + *ptr++ |= static_cast(values[1] >> 30); + *ptr++ = static_cast(values[1] >> 22); + *ptr++ = static_cast(values[1] >> 14); + *ptr++ = static_cast(values[1] >> 6); + + *ptr = static_cast(values[1] << 2); + *ptr++ |= static_cast(values[2] >> 29); + *ptr++ = static_cast(values[2] >> 21); + *ptr++ = static_cast(values[2] >> 13); + *ptr++ = static_cast(values[2] >> 5); + + *ptr = static_cast(values[2] << 3); + *ptr++ |= static_cast(values[3] >> 28); + *ptr++ = static_cast(values[3] >> 20); + *ptr++ = static_cast(values[3] >> 12); + *ptr++ = static_cast(values[3] >> 4); + + *ptr = static_cast(values[3] << 4); + *ptr++ |= static_cast(values[4] >> 27); + *ptr++ = static_cast(values[4] >> 19); + *ptr++ = static_cast(values[4] >> 11); + *ptr++ = static_cast(values[4] >> 3); + + *ptr = static_cast(values[4] << 5); + *ptr++ |= static_cast(values[5] >> 26); + *ptr++ = static_cast(values[5] >> 18); + *ptr++ = static_cast(values[5] >> 10); + *ptr++ = static_cast(values[5] >> 2); + + *ptr = static_cast(values[5] << 6); + *ptr++ |= static_cast(values[6] >> 25); + *ptr++ = static_cast(values[6] >> 17); + *ptr++ = static_cast(values[6] >> 9); + *ptr++ = static_cast(values[6] >> 1); + + *ptr = static_cast(values[6] << 7); + *ptr++ |= static_cast(values[7] >> 24); + *ptr++ = static_cast(values[7] >> 16); + *ptr++ = static_cast(values[7] >> 8); + *ptr = static_cast(values[7]); +} + +static inline void packBits32(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 24); + *ptr++ = static_cast(values[0] >> 16); + *ptr++ = static_cast(values[0] >> 8); + *ptr++ = static_cast(values[0]); + + *ptr++ = static_cast(values[1] >> 24); + *ptr++ = static_cast(values[1] >> 16); + *ptr++ = static_cast(values[1] >> 8); + *ptr++ = static_cast(values[1]); + + *ptr++ = static_cast(values[2] >> 24); + *ptr++ = static_cast(values[2] >> 16); + *ptr++ = static_cast(values[2] >> 8); + *ptr++ = static_cast(values[2]); + + *ptr++ = static_cast(values[3] >> 24); + *ptr++ = static_cast(values[3] >> 16); + *ptr++ = static_cast(values[3] >> 8); + *ptr++ = static_cast(values[3]); + + *ptr++ = static_cast(values[4] >> 24); + *ptr++ = static_cast(values[4] >> 16); + *ptr++ = static_cast(values[4] >> 8); + *ptr++ = static_cast(values[4]); + + *ptr++ = static_cast(values[5] >> 24); + *ptr++ = static_cast(values[5] >> 16); + *ptr++ = static_cast(values[5] >> 8); + *ptr++ = static_cast(values[5]); + + *ptr++ = static_cast(values[6] >> 24); + *ptr++ = static_cast(values[6] >> 16); + *ptr++ = static_cast(values[6] >> 8); + *ptr++ = static_cast(values[6]); + + *ptr++ = static_cast(values[7] >> 24); + *ptr++ = static_cast(values[7] >> 16); + *ptr++ = static_cast(values[7] >> 8); + *ptr = static_cast(values[7]); +} + +static inline void packBits33(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 25); + *ptr++ = static_cast(values[0] >> 17); + *ptr++ = static_cast(values[0] >> 9); + *ptr++ = static_cast(values[0] >> 1); + + *ptr = static_cast(values[0] << 7); + *ptr++ |= static_cast(values[1] >> 26); + *ptr++ = static_cast(values[1] >> 18); + *ptr++ = static_cast(values[1] >> 10); + *ptr++ = static_cast(values[1] >> 2); + + *ptr = static_cast(values[1] << 6); + *ptr++ |= static_cast(values[2] >> 27); + *ptr++ = static_cast(values[2] >> 19); + *ptr++ = static_cast(values[2] >> 11); + *ptr++ = static_cast(values[2] >> 3); + + *ptr = static_cast(values[2] << 5); + *ptr++ |= static_cast(values[3] >> 28); + *ptr++ = static_cast(values[3] >> 20); + *ptr++ = static_cast(values[3] >> 12); + *ptr++ = static_cast(values[3] >> 4); + + *ptr = static_cast(values[3] << 4); + *ptr++ |= static_cast(values[4] >> 29); + *ptr++ = static_cast(values[4] >> 21); + *ptr++ = static_cast(values[4] >> 13); + *ptr++ = static_cast(values[4] >> 5); + + *ptr = static_cast(values[4] << 3); + *ptr++ |= static_cast(values[5] >> 30); + *ptr++ = static_cast(values[5] >> 22); + *ptr++ = static_cast(values[5] >> 14); + *ptr++ = static_cast(values[5] >> 6); + + *ptr = static_cast(values[5] << 2); + *ptr++ |= static_cast(values[6] >> 31); + *ptr++ = static_cast(values[6] >> 23); + *ptr++ = static_cast(values[6] >> 15); + *ptr++ = static_cast(values[6] >> 7); + + *ptr = static_cast(values[6] << 1); + *ptr++ |= static_cast(values[7] >> 32); + *ptr++ = static_cast(values[7] >> 24); + *ptr++ = static_cast(values[7] >> 16); + *ptr++ = static_cast(values[7] >> 8); + *ptr = static_cast(values[7]); +} + +static inline void packBits34(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 26); + *ptr++ = static_cast(values[0] >> 18); + *ptr++ = static_cast(values[0] >> 10); + *ptr++ = static_cast(values[0] >> 2); + + *ptr = static_cast(values[0] << 6); + *ptr++ |= static_cast(values[1] >> 28); + *ptr++ = static_cast(values[1] >> 20); + *ptr++ = static_cast(values[1] >> 12); + *ptr++ = static_cast(values[1] >> 4); + + *ptr = static_cast(values[1] << 4); + *ptr++ |= static_cast(values[2] >> 30); + *ptr++ = static_cast(values[2] >> 22); + *ptr++ = static_cast(values[2] >> 14); + *ptr++ = static_cast(values[2] >> 6); + + *ptr = static_cast(values[2] << 2); + *ptr++ |= static_cast(values[3] >> 32); + *ptr++ = static_cast(values[3] >> 24); + *ptr++ = static_cast(values[3] >> 16); + *ptr++ = static_cast(values[3] >> 8); + *ptr++ = static_cast(values[3]); + + *ptr++ = static_cast(values[4] >> 26); + *ptr++ = static_cast(values[4] >> 18); + *ptr++ = static_cast(values[4] >> 10); + *ptr++ = static_cast(values[4] >> 2); + + *ptr = static_cast(values[4] << 6); + *ptr++ |= static_cast(values[5] >> 28); + *ptr++ = static_cast(values[5] >> 20); + *ptr++ = static_cast(values[5] >> 12); + *ptr++ = static_cast(values[5] >> 4); + + *ptr = static_cast(values[5] << 4); + *ptr++ |= static_cast(values[6] >> 30); + *ptr++ = static_cast(values[6] >> 22); + *ptr++ = static_cast(values[6] >> 14); + *ptr++ = static_cast(values[6] >> 6); + + *ptr = static_cast(values[6] << 2); + *ptr++ |= static_cast(values[7] >> 32); + *ptr++ = static_cast(values[7] >> 24); + *ptr++ = static_cast(values[7] >> 16); + *ptr++ = static_cast(values[7] >> 8); + *ptr = static_cast(values[7]); +} + +static inline void packBits35(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 27); + *ptr++ = static_cast(values[0] >> 19); + *ptr++ = static_cast(values[0] >> 11); + *ptr++ = static_cast(values[0] >> 3); + + *ptr = static_cast(values[0] << 5); + *ptr++ |= static_cast(values[1] >> 30); + *ptr++ = static_cast(values[1] >> 22); + *ptr++ = static_cast(values[1] >> 14); + *ptr++ = static_cast(values[1] >> 6); + + *ptr = static_cast(values[1] << 2); + *ptr++ |= static_cast(values[2] >> 33); + *ptr++ = static_cast(values[2] >> 25); + *ptr++ = static_cast(values[2] >> 17); + *ptr++ = static_cast(values[2] >> 9); + *ptr++ = static_cast(values[2] >> 1); + + *ptr = static_cast(values[2] << 7); + *ptr++ |= static_cast(values[3] >> 28); + *ptr++ = static_cast(values[3] >> 20); + *ptr++ = static_cast(values[3] >> 12); + *ptr++ = static_cast(values[3] >> 4); + + *ptr = static_cast(values[3] << 4); + *ptr++ |= static_cast(values[4] >> 31); + *ptr++ = static_cast(values[4] >> 23); + *ptr++ = static_cast(values[4] >> 15); + *ptr++ = static_cast(values[4] >> 7); + + *ptr = static_cast(values[4] << 1); + *ptr++ |= static_cast(values[5] >> 34); + *ptr++ = static_cast(values[5] >> 26); + *ptr++ = static_cast(values[5] >> 18); + *ptr++ = static_cast(values[5] >> 10); + *ptr++ = static_cast(values[5] >> 2); + + *ptr = static_cast(values[5] << 6); + *ptr++ |= static_cast(values[6] >> 29); + *ptr++ = static_cast(values[6] >> 21); + *ptr++ = static_cast(values[6] >> 13); + *ptr++ = static_cast(values[6] >> 5); + + *ptr = static_cast(values[6] << 3); + *ptr++ |= static_cast(values[7] >> 32); + *ptr++ = static_cast(values[7] >> 24); + *ptr++ = static_cast(values[7] >> 16); + *ptr++ = static_cast(values[7] >> 8); + *ptr = static_cast(values[7]); +} + +static inline void packBits36(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 28); + *ptr++ = static_cast(values[0] >> 20); + *ptr++ = static_cast(values[0] >> 12); + *ptr++ = static_cast(values[0] >> 4); + + *ptr = static_cast(values[0] << 4); + *ptr++ |= static_cast(values[1] >> 32); + *ptr++ = static_cast(values[1] >> 24); + *ptr++ = static_cast(values[1] >> 16); + *ptr++ = static_cast(values[1] >> 8); + *ptr++ = static_cast(values[1]); + + *ptr++ = static_cast(values[2] >> 28); + *ptr++ = static_cast(values[2] >> 20); + *ptr++ = static_cast(values[2] >> 12); + *ptr++ = static_cast(values[2] >> 4); + + *ptr = static_cast(values[2] << 4); + *ptr++ |= static_cast(values[3] >> 32); + *ptr++ = static_cast(values[3] >> 24); + *ptr++ = static_cast(values[3] >> 16); + *ptr++ = static_cast(values[3] >> 8); + *ptr++ = static_cast(values[3]); + + *ptr++ = static_cast(values[4] >> 28); + *ptr++ = static_cast(values[4] >> 20); + *ptr++ = static_cast(values[4] >> 12); + *ptr++ = static_cast(values[4] >> 4); + + *ptr = static_cast(values[4] << 4); + *ptr++ |= static_cast(values[5] >> 32); + *ptr++ = static_cast(values[5] >> 24); + *ptr++ = static_cast(values[5] >> 16); + *ptr++ = static_cast(values[5] >> 8); + *ptr++ = static_cast(values[5]); + + *ptr++ = static_cast(values[6] >> 28); + *ptr++ = static_cast(values[6] >> 20); + *ptr++ = static_cast(values[6] >> 12); + *ptr++ = static_cast(values[6] >> 4); + + *ptr = static_cast(values[6] << 4); + *ptr++ |= static_cast(values[7] >> 32); + *ptr++ = static_cast(values[7] >> 24); + *ptr++ = static_cast(values[7] >> 16); + *ptr++ = static_cast(values[7] >> 8); + *ptr = static_cast(values[7]); +} + +static inline void packBits37(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 29); + *ptr++ = static_cast(values[0] >> 21); + *ptr++ = static_cast(values[0] >> 13); + *ptr++ = static_cast(values[0] >> 5); + + *ptr = static_cast(values[0] << 3); + *ptr++ |= static_cast(values[1] >> 34); + *ptr++ = static_cast(values[1] >> 26); + *ptr++ = static_cast(values[1] >> 18); + *ptr++ = static_cast(values[1] >> 10); + *ptr++ = static_cast(values[1] >> 2); + + *ptr = static_cast(values[1] << 6); + *ptr++ |= static_cast(values[2] >> 31); + *ptr++ = static_cast(values[2] >> 23); + *ptr++ = static_cast(values[2] >> 15); + *ptr++ = static_cast(values[2] >> 7); + + *ptr = static_cast(values[2] << 1); + *ptr++ |= static_cast(values[3] >> 36); + *ptr++ = static_cast(values[3] >> 28); + *ptr++ = static_cast(values[3] >> 20); + *ptr++ = static_cast(values[3] >> 12); + *ptr++ = static_cast(values[3] >> 4); + + *ptr = static_cast(values[3] << 4); + *ptr++ |= static_cast(values[4] >> 33); + *ptr++ = static_cast(values[4] >> 25); + *ptr++ = static_cast(values[4] >> 17); + *ptr++ = static_cast(values[4] >> 9); + *ptr++ = static_cast(values[4] >> 1); + + *ptr = static_cast(values[4] << 7); + *ptr++ |= static_cast(values[5] >> 30); + *ptr++ = static_cast(values[5] >> 22); + *ptr++ = static_cast(values[5] >> 14); + *ptr++ = static_cast(values[5] >> 6); + + *ptr = static_cast(values[5] << 2); + *ptr++ |= static_cast(values[6] >> 35); + *ptr++ = static_cast(values[6] >> 27); + *ptr++ = static_cast(values[6] >> 19); + *ptr++ = static_cast(values[6] >> 11); + *ptr++ = static_cast(values[6] >> 3); + + *ptr = static_cast(values[6] << 5); + *ptr++ |= static_cast(values[7] >> 32); + *ptr++ = static_cast(values[7] >> 24); + *ptr++ = static_cast(values[7] >> 16); + *ptr++ = static_cast(values[7] >> 8); + *ptr = static_cast(values[7]); +} + +static inline void packBits38(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 30); + *ptr++ = static_cast(values[0] >> 22); + *ptr++ = static_cast(values[0] >> 14); + *ptr++ = static_cast(values[0] >> 6); + + *ptr = static_cast(values[0] << 2); + *ptr++ |= static_cast(values[1] >> 36); + *ptr++ = static_cast(values[1] >> 28); + *ptr++ = static_cast(values[1] >> 20); + *ptr++ = static_cast(values[1] >> 12); + *ptr++ = static_cast(values[1] >> 4); + + *ptr = static_cast(values[1] << 4); + *ptr++ |= static_cast(values[2] >> 34); + *ptr++ = static_cast(values[2] >> 26); + *ptr++ = static_cast(values[2] >> 18); + *ptr++ = static_cast(values[2] >> 10); + *ptr++ = static_cast(values[2] >> 2); + + *ptr = static_cast(values[2] << 6); + *ptr++ |= static_cast(values[3] >> 32); + *ptr++ = static_cast(values[3] >> 24); + *ptr++ = static_cast(values[3] >> 16); + *ptr++ = static_cast(values[3] >> 8); + *ptr++ = static_cast(values[3]); + + *ptr++ = static_cast(values[4] >> 30); + *ptr++ = static_cast(values[4] >> 22); + *ptr++ = static_cast(values[4] >> 14); + *ptr++ = static_cast(values[4] >> 6); + + *ptr = static_cast(values[4] << 2); + *ptr++ |= static_cast(values[5] >> 36); + *ptr++ = static_cast(values[5] >> 28); + *ptr++ = static_cast(values[5] >> 20); + *ptr++ = static_cast(values[5] >> 12); + *ptr++ = static_cast(values[5] >> 4); + + *ptr = static_cast(values[5] << 4); + *ptr++ |= static_cast(values[6] >> 34); + *ptr++ = static_cast(values[6] >> 26); + *ptr++ = static_cast(values[6] >> 18); + *ptr++ = static_cast(values[6] >> 10); + *ptr++ = static_cast(values[6] >> 2); + + *ptr = static_cast(values[6] << 6); + *ptr++ |= static_cast(values[7] >> 32); + *ptr++ = static_cast(values[7] >> 24); + *ptr++ = static_cast(values[7] >> 16); + *ptr++ = static_cast(values[7] >> 8); + *ptr = static_cast(values[7]); +} + +static inline void packBits39(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 31); + *ptr++ = static_cast(values[0] >> 23); + *ptr++ = static_cast(values[0] >> 15); + *ptr++ = static_cast(values[0] >> 7); + + *ptr = static_cast(values[0] << 1); + *ptr++ |= static_cast(values[1] >> 38); + *ptr++ = static_cast(values[1] >> 30); + *ptr++ = static_cast(values[1] >> 22); + *ptr++ = static_cast(values[1] >> 14); + *ptr++ = static_cast(values[1] >> 6); + + *ptr = static_cast(values[1] << 2); + *ptr++ |= static_cast(values[2] >> 37); + *ptr++ = static_cast(values[2] >> 29); + *ptr++ = static_cast(values[2] >> 21); + *ptr++ = static_cast(values[2] >> 13); + *ptr++ = static_cast(values[2] >> 5); + + *ptr = static_cast(values[2] << 3); + *ptr++ |= static_cast(values[3] >> 36); + *ptr++ = static_cast(values[3] >> 28); + *ptr++ = static_cast(values[3] >> 20); + *ptr++ = static_cast(values[3] >> 12); + *ptr++ = static_cast(values[3] >> 4); + + *ptr = static_cast(values[3] << 4); + *ptr++ |= static_cast(values[4] >> 35); + *ptr++ = static_cast(values[4] >> 27); + *ptr++ = static_cast(values[4] >> 19); + *ptr++ = static_cast(values[4] >> 11); + *ptr++ = static_cast(values[4] >> 3); + + *ptr = static_cast(values[4] << 5); + *ptr++ |= static_cast(values[5] >> 34); + *ptr++ = static_cast(values[5] >> 26); + *ptr++ = static_cast(values[5] >> 18); + *ptr++ = static_cast(values[5] >> 10); + *ptr++ = static_cast(values[5] >> 2); + + *ptr = static_cast(values[5] << 6); + *ptr++ |= static_cast(values[6] >> 33); + *ptr++ = static_cast(values[6] >> 25); + *ptr++ = static_cast(values[6] >> 17); + *ptr++ = static_cast(values[6] >> 9); + *ptr++ = static_cast(values[6] >> 1); + + *ptr = static_cast(values[6] << 7); + *ptr++ |= static_cast(values[7] >> 32); + *ptr++ = static_cast(values[7] >> 24); + *ptr++ = static_cast(values[7] >> 16); + *ptr++ = static_cast(values[7] >> 8); + *ptr = static_cast(values[7]); +} + +static inline void packBits40(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 32); + *ptr++ = static_cast(values[0] >> 24); + *ptr++ = static_cast(values[0] >> 16); + *ptr++ = static_cast(values[0] >> 8); + *ptr++ = static_cast(values[0]); + + *ptr++ = static_cast(values[1] >> 32); + *ptr++ = static_cast(values[1] >> 24); + *ptr++ = static_cast(values[1] >> 16); + *ptr++ = static_cast(values[1] >> 8); + *ptr++ = static_cast(values[1]); + + *ptr++ = static_cast(values[2] >> 32); + *ptr++ = static_cast(values[2] >> 24); + *ptr++ = static_cast(values[2] >> 16); + *ptr++ = static_cast(values[2] >> 8); + *ptr++ = static_cast(values[2]); + + *ptr++ = static_cast(values[3] >> 32); + *ptr++ = static_cast(values[3] >> 24); + *ptr++ = static_cast(values[3] >> 16); + *ptr++ = static_cast(values[3] >> 8); + *ptr++ = static_cast(values[3]); + + *ptr++ = static_cast(values[4] >> 32); + *ptr++ = static_cast(values[4] >> 24); + *ptr++ = static_cast(values[4] >> 16); + *ptr++ = static_cast(values[4] >> 8); + *ptr++ = static_cast(values[4]); + + *ptr++ = static_cast(values[5] >> 32); + *ptr++ = static_cast(values[5] >> 24); + *ptr++ = static_cast(values[5] >> 16); + *ptr++ = static_cast(values[5] >> 8); + *ptr++ = static_cast(values[5]); + + *ptr++ = static_cast(values[6] >> 32); + *ptr++ = static_cast(values[6] >> 24); + *ptr++ = static_cast(values[6] >> 16); + *ptr++ = static_cast(values[6] >> 8); + *ptr++ = static_cast(values[6]); + + *ptr++ = static_cast(values[7] >> 32); + *ptr++ = static_cast(values[7] >> 24); + *ptr++ = static_cast(values[7] >> 16); + *ptr++ = static_cast(values[7] >> 8); + *ptr = static_cast(values[7]); +} + +static inline void packBits41(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 33); + *ptr++ = static_cast(values[0] >> 25); + *ptr++ = static_cast(values[0] >> 17); + *ptr++ = static_cast(values[0] >> 9); + *ptr++ = static_cast(values[0] >> 1); + + *ptr = static_cast(values[0] << 7); + *ptr++ |= static_cast(values[1] >> 34); + *ptr++ = static_cast(values[1] >> 26); + *ptr++ = static_cast(values[1] >> 18); + *ptr++ = static_cast(values[1] >> 10); + *ptr++ = static_cast(values[1] >> 2); + + *ptr = static_cast(values[1] << 6); + *ptr++ |= static_cast(values[2] >> 35); + *ptr++ = static_cast(values[2] >> 27); + *ptr++ = static_cast(values[2] >> 19); + *ptr++ = static_cast(values[2] >> 11); + *ptr++ = static_cast(values[2] >> 3); + + *ptr = static_cast(values[2] << 5); + *ptr++ |= static_cast(values[3] >> 36); + *ptr++ = static_cast(values[3] >> 28); + *ptr++ = static_cast(values[3] >> 20); + *ptr++ = static_cast(values[3] >> 12); + *ptr++ = static_cast(values[3] >> 4); + + *ptr = static_cast(values[3] << 4); + *ptr++ |= static_cast(values[4] >> 37); + *ptr++ = static_cast(values[4] >> 29); + *ptr++ = static_cast(values[4] >> 21); + *ptr++ = static_cast(values[4] >> 13); + *ptr++ = static_cast(values[4] >> 5); + + *ptr = static_cast(values[4] << 3); + *ptr++ |= static_cast(values[5] >> 38); + *ptr++ = static_cast(values[5] >> 30); + *ptr++ = static_cast(values[5] >> 22); + *ptr++ = static_cast(values[5] >> 14); + *ptr++ = static_cast(values[5] >> 6); + + *ptr = static_cast(values[5] << 2); + *ptr++ |= static_cast(values[6] >> 39); + *ptr++ = static_cast(values[6] >> 31); + *ptr++ = static_cast(values[6] >> 23); + *ptr++ = static_cast(values[6] >> 15); + *ptr++ = static_cast(values[6] >> 7); + + *ptr = static_cast(values[6] << 1); + *ptr++ |= static_cast(values[7] >> 40); + *ptr++ = static_cast(values[7] >> 32); + *ptr++ = static_cast(values[7] >> 24); + *ptr++ = static_cast(values[7] >> 16); + *ptr++ = static_cast(values[7] >> 8); + *ptr = static_cast(values[7]); +} + +static inline void packBits42(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 34); + *ptr++ = static_cast(values[0] >> 26); + *ptr++ = static_cast(values[0] >> 18); + *ptr++ = static_cast(values[0] >> 10); + *ptr++ = static_cast(values[0] >> 2); + + *ptr = static_cast(values[0] << 6); + *ptr++ |= static_cast(values[1] >> 36); + *ptr++ = static_cast(values[1] >> 28); + *ptr++ = static_cast(values[1] >> 20); + *ptr++ = static_cast(values[1] >> 12); + *ptr++ = static_cast(values[1] >> 4); + + *ptr = static_cast(values[1] << 4); + *ptr++ |= static_cast(values[2] >> 38); + *ptr++ = static_cast(values[2] >> 30); + *ptr++ = static_cast(values[2] >> 22); + *ptr++ = static_cast(values[2] >> 14); + *ptr++ = static_cast(values[2] >> 6); + + *ptr = static_cast(values[2] << 2); + *ptr++ |= static_cast(values[3] >> 40); + *ptr++ = static_cast(values[3] >> 32); + *ptr++ = static_cast(values[3] >> 24); + *ptr++ = static_cast(values[3] >> 16); + *ptr++ = static_cast(values[3] >> 8); + *ptr++ = static_cast(values[3]); + + *ptr++ = static_cast(values[4] >> 34); + *ptr++ = static_cast(values[4] >> 26); + *ptr++ = static_cast(values[4] >> 18); + *ptr++ = static_cast(values[4] >> 10); + *ptr++ = static_cast(values[4] >> 2); + + *ptr = static_cast(values[4] << 6); + *ptr++ |= static_cast(values[5] >> 36); + *ptr++ = static_cast(values[5] >> 28); + *ptr++ = static_cast(values[5] >> 20); + *ptr++ = static_cast(values[5] >> 12); + *ptr++ = static_cast(values[5] >> 4); + + *ptr = static_cast(values[5] << 4); + *ptr++ |= static_cast(values[6] >> 38); + *ptr++ = static_cast(values[6] >> 30); + *ptr++ = static_cast(values[6] >> 22); + *ptr++ = static_cast(values[6] >> 14); + *ptr++ = static_cast(values[6] >> 6); + + *ptr = static_cast(values[6] << 2); + *ptr++ |= static_cast(values[7] >> 40); + *ptr++ = static_cast(values[7] >> 32); + *ptr++ = static_cast(values[7] >> 24); + *ptr++ = static_cast(values[7] >> 16); + *ptr++ = static_cast(values[7] >> 8); + *ptr = static_cast(values[7]); +} + +static inline void packBits43(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 35); + *ptr++ = static_cast(values[0] >> 27); + *ptr++ = static_cast(values[0] >> 19); + *ptr++ = static_cast(values[0] >> 11); + *ptr++ = static_cast(values[0] >> 3); + + *ptr = static_cast(values[0] << 5); + *ptr++ |= static_cast(values[1] >> 38); + *ptr++ = static_cast(values[1] >> 30); + *ptr++ = static_cast(values[1] >> 22); + *ptr++ = static_cast(values[1] >> 14); + *ptr++ = static_cast(values[1] >> 6); + + *ptr = static_cast(values[1] << 2); + *ptr++ |= static_cast(values[2] >> 41); + *ptr++ = static_cast(values[2] >> 33); + *ptr++ = static_cast(values[2] >> 25); + *ptr++ = static_cast(values[2] >> 17); + *ptr++ = static_cast(values[2] >> 9); + *ptr++ = static_cast(values[2] >> 1); + + *ptr = static_cast(values[2] << 7); + *ptr++ |= static_cast(values[3] >> 36); + *ptr++ = static_cast(values[3] >> 28); + *ptr++ = static_cast(values[3] >> 20); + *ptr++ = static_cast(values[3] >> 12); + *ptr++ = static_cast(values[3] >> 4); + + *ptr = static_cast(values[3] << 4); + *ptr++ |= static_cast(values[4] >> 39); + *ptr++ = static_cast(values[4] >> 31); + *ptr++ = static_cast(values[4] >> 23); + *ptr++ = static_cast(values[4] >> 15); + *ptr++ = static_cast(values[4] >> 7); + + *ptr = static_cast(values[4] << 1); + *ptr++ |= static_cast(values[5] >> 42); + *ptr++ = static_cast(values[5] >> 34); + *ptr++ = static_cast(values[5] >> 26); + *ptr++ = static_cast(values[5] >> 18); + *ptr++ = static_cast(values[5] >> 10); + *ptr++ = static_cast(values[5] >> 2); + + *ptr = static_cast(values[5] << 6); + *ptr++ |= static_cast(values[6] >> 37); + *ptr++ = static_cast(values[6] >> 29); + *ptr++ = static_cast(values[6] >> 21); + *ptr++ = static_cast(values[6] >> 13); + *ptr++ = static_cast(values[6] >> 5); + + *ptr = static_cast(values[6] << 3); + *ptr++ |= static_cast(values[7] >> 40); + *ptr++ = static_cast(values[7] >> 32); + *ptr++ = static_cast(values[7] >> 24); + *ptr++ = static_cast(values[7] >> 16); + *ptr++ = static_cast(values[7] >> 8); + *ptr = static_cast(values[7]); +} + +static inline void packBits44(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 36); + *ptr++ = static_cast(values[0] >> 28); + *ptr++ = static_cast(values[0] >> 20); + *ptr++ = static_cast(values[0] >> 12); + *ptr++ = static_cast(values[0] >> 4); + + *ptr = static_cast(values[0] << 4); + *ptr++ |= static_cast(values[1] >> 40); + *ptr++ = static_cast(values[1] >> 32); + *ptr++ = static_cast(values[1] >> 24); + *ptr++ = static_cast(values[1] >> 16); + *ptr++ = static_cast(values[1] >> 8); + *ptr++ = static_cast(values[1]); + + *ptr++ = static_cast(values[2] >> 36); + *ptr++ = static_cast(values[2] >> 28); + *ptr++ = static_cast(values[2] >> 20); + *ptr++ = static_cast(values[2] >> 12); + *ptr++ = static_cast(values[2] >> 4); + + *ptr = static_cast(values[2] << 4); + *ptr++ |= static_cast(values[3] >> 40); + *ptr++ = static_cast(values[3] >> 32); + *ptr++ = static_cast(values[3] >> 24); + *ptr++ = static_cast(values[3] >> 16); + *ptr++ = static_cast(values[3] >> 8); + *ptr++ = static_cast(values[3]); + + *ptr++ = static_cast(values[4] >> 36); + *ptr++ = static_cast(values[4] >> 28); + *ptr++ = static_cast(values[4] >> 20); + *ptr++ = static_cast(values[4] >> 12); + *ptr++ = static_cast(values[4] >> 4); + + *ptr = static_cast(values[4] << 4); + *ptr++ |= static_cast(values[5] >> 40); + *ptr++ = static_cast(values[5] >> 32); + *ptr++ = static_cast(values[5] >> 24); + *ptr++ = static_cast(values[5] >> 16); + *ptr++ = static_cast(values[5] >> 8); + *ptr++ = static_cast(values[5]); + + *ptr++ = static_cast(values[6] >> 36); + *ptr++ = static_cast(values[6] >> 28); + *ptr++ = static_cast(values[6] >> 20); + *ptr++ = static_cast(values[6] >> 12); + *ptr++ = static_cast(values[6] >> 4); + + *ptr = static_cast(values[6] << 4); + *ptr++ |= static_cast(values[7] >> 40); + *ptr++ = static_cast(values[7] >> 32); + *ptr++ = static_cast(values[7] >> 24); + *ptr++ = static_cast(values[7] >> 16); + *ptr++ = static_cast(values[7] >> 8); + *ptr = static_cast(values[7]); +} + +static inline void packBits45(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 37); + *ptr++ = static_cast(values[0] >> 29); + *ptr++ = static_cast(values[0] >> 21); + *ptr++ = static_cast(values[0] >> 13); + *ptr++ = static_cast(values[0] >> 5); + + *ptr = static_cast(values[0] << 3); + *ptr++ |= static_cast(values[1] >> 42); + *ptr++ = static_cast(values[1] >> 34); + *ptr++ = static_cast(values[1] >> 26); + *ptr++ = static_cast(values[1] >> 18); + *ptr++ = static_cast(values[1] >> 10); + *ptr++ = static_cast(values[1] >> 2); + + *ptr = static_cast(values[1] << 6); + *ptr++ |= static_cast(values[2] >> 39); + *ptr++ = static_cast(values[2] >> 31); + *ptr++ = static_cast(values[2] >> 23); + *ptr++ = static_cast(values[2] >> 15); + *ptr++ = static_cast(values[2] >> 7); + + *ptr = static_cast(values[2] << 1); + *ptr++ |= static_cast(values[3] >> 44); + *ptr++ = static_cast(values[3] >> 36); + *ptr++ = static_cast(values[3] >> 28); + *ptr++ = static_cast(values[3] >> 20); + *ptr++ = static_cast(values[3] >> 12); + *ptr++ = static_cast(values[3] >> 4); + + *ptr = static_cast(values[3] << 4); + *ptr++ |= static_cast(values[4] >> 41); + *ptr++ = static_cast(values[4] >> 33); + *ptr++ = static_cast(values[4] >> 25); + *ptr++ = static_cast(values[4] >> 17); + *ptr++ = static_cast(values[4] >> 9); + *ptr++ = static_cast(values[4] >> 1); + + *ptr = static_cast(values[4] << 7); + *ptr++ |= static_cast(values[5] >> 38); + *ptr++ = static_cast(values[5] >> 30); + *ptr++ = static_cast(values[5] >> 22); + *ptr++ = static_cast(values[5] >> 14); + *ptr++ = static_cast(values[5] >> 6); + + *ptr = static_cast(values[5] << 2); + *ptr++ |= static_cast(values[6] >> 43); + *ptr++ = static_cast(values[6] >> 35); + *ptr++ = static_cast(values[6] >> 27); + *ptr++ = static_cast(values[6] >> 19); + *ptr++ = static_cast(values[6] >> 11); + *ptr++ = static_cast(values[6] >> 3); + + *ptr = static_cast(values[6] << 5); + *ptr++ |= static_cast(values[7] >> 40); + *ptr++ = static_cast(values[7] >> 32); + *ptr++ = static_cast(values[7] >> 24); + *ptr++ = static_cast(values[7] >> 16); + *ptr++ = static_cast(values[7] >> 8); + *ptr = static_cast(values[7]); +} + +static inline void packBits46(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 38); + *ptr++ = static_cast(values[0] >> 30); + *ptr++ = static_cast(values[0] >> 22); + *ptr++ = static_cast(values[0] >> 14); + *ptr++ = static_cast(values[0] >> 6); + + *ptr = static_cast(values[0] << 2); + *ptr++ |= static_cast(values[1] >> 44); + *ptr++ = static_cast(values[1] >> 36); + *ptr++ = static_cast(values[1] >> 28); + *ptr++ = static_cast(values[1] >> 20); + *ptr++ = static_cast(values[1] >> 12); + *ptr++ = static_cast(values[1] >> 4); + + *ptr = static_cast(values[1] << 4); + *ptr++ |= static_cast(values[2] >> 42); + *ptr++ = static_cast(values[2] >> 34); + *ptr++ = static_cast(values[2] >> 26); + *ptr++ = static_cast(values[2] >> 18); + *ptr++ = static_cast(values[2] >> 10); + *ptr++ = static_cast(values[2] >> 2); + + *ptr = static_cast(values[2] << 6); + *ptr++ |= static_cast(values[3] >> 40); + *ptr++ = static_cast(values[3] >> 32); + *ptr++ = static_cast(values[3] >> 24); + *ptr++ = static_cast(values[3] >> 16); + *ptr++ = static_cast(values[3] >> 8); + *ptr++ = static_cast(values[3]); + + *ptr++ = static_cast(values[4] >> 38); + *ptr++ = static_cast(values[4] >> 30); + *ptr++ = static_cast(values[4] >> 22); + *ptr++ = static_cast(values[4] >> 14); + *ptr++ = static_cast(values[4] >> 6); + + *ptr = static_cast(values[4] << 2); + *ptr++ |= static_cast(values[5] >> 44); + *ptr++ = static_cast(values[5] >> 36); + *ptr++ = static_cast(values[5] >> 28); + *ptr++ = static_cast(values[5] >> 20); + *ptr++ = static_cast(values[5] >> 12); + *ptr++ = static_cast(values[5] >> 4); + + *ptr = static_cast(values[5] << 4); + *ptr++ |= static_cast(values[6] >> 42); + *ptr++ = static_cast(values[6] >> 34); + *ptr++ = static_cast(values[6] >> 26); + *ptr++ = static_cast(values[6] >> 18); + *ptr++ = static_cast(values[6] >> 10); + *ptr++ = static_cast(values[6] >> 2); + + *ptr = static_cast(values[6] << 6); + *ptr++ |= static_cast(values[7] >> 40); + *ptr++ = static_cast(values[7] >> 32); + *ptr++ = static_cast(values[7] >> 24); + *ptr++ = static_cast(values[7] >> 16); + *ptr++ = static_cast(values[7] >> 8); + *ptr = static_cast(values[7]); +} + +static inline void packBits47(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 39); + *ptr++ = static_cast(values[0] >> 31); + *ptr++ = static_cast(values[0] >> 23); + *ptr++ = static_cast(values[0] >> 15); + *ptr++ = static_cast(values[0] >> 7); + + *ptr = static_cast(values[0] << 1); + *ptr++ |= static_cast(values[1] >> 46); + *ptr++ = static_cast(values[1] >> 38); + *ptr++ = static_cast(values[1] >> 30); + *ptr++ = static_cast(values[1] >> 22); + *ptr++ = static_cast(values[1] >> 14); + *ptr++ = static_cast(values[1] >> 6); + + *ptr = static_cast(values[1] << 2); + *ptr++ |= static_cast(values[2] >> 45); + *ptr++ = static_cast(values[2] >> 37); + *ptr++ = static_cast(values[2] >> 29); + *ptr++ = static_cast(values[2] >> 21); + *ptr++ = static_cast(values[2] >> 13); + *ptr++ = static_cast(values[2] >> 5); + + *ptr = static_cast(values[2] << 3); + *ptr++ |= static_cast(values[3] >> 44); + *ptr++ = static_cast(values[3] >> 36); + *ptr++ = static_cast(values[3] >> 28); + *ptr++ = static_cast(values[3] >> 20); + *ptr++ = static_cast(values[3] >> 12); + *ptr++ = static_cast(values[3] >> 4); + + *ptr = static_cast(values[3] << 4); + *ptr++ |= static_cast(values[4] >> 43); + *ptr++ = static_cast(values[4] >> 35); + *ptr++ = static_cast(values[4] >> 27); + *ptr++ = static_cast(values[4] >> 19); + *ptr++ = static_cast(values[4] >> 11); + *ptr++ = static_cast(values[4] >> 3); + + *ptr = static_cast(values[4] << 5); + *ptr++ |= static_cast(values[5] >> 42); + *ptr++ = static_cast(values[5] >> 34); + *ptr++ = static_cast(values[5] >> 26); + *ptr++ = static_cast(values[5] >> 18); + *ptr++ = static_cast(values[5] >> 10); + *ptr++ = static_cast(values[5] >> 2); + + *ptr = static_cast(values[5] << 6); + *ptr++ |= static_cast(values[6] >> 41); + *ptr++ = static_cast(values[6] >> 33); + *ptr++ = static_cast(values[6] >> 25); + *ptr++ = static_cast(values[6] >> 17); + *ptr++ = static_cast(values[6] >> 9); + *ptr++ = static_cast(values[6] >> 1); + + *ptr = static_cast(values[6] << 7); + *ptr++ |= static_cast(values[7] >> 40); + *ptr++ = static_cast(values[7] >> 32); + *ptr++ = static_cast(values[7] >> 24); + *ptr++ = static_cast(values[7] >> 16); + *ptr++ = static_cast(values[7] >> 8); + *ptr = static_cast(values[7]); +} + +static inline void packBits48(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 40); + *ptr++ = static_cast(values[0] >> 32); + *ptr++ = static_cast(values[0] >> 24); + *ptr++ = static_cast(values[0] >> 16); + *ptr++ = static_cast(values[0] >> 8); + *ptr++ = static_cast(values[0]); + + *ptr++ = static_cast(values[1] >> 40); + *ptr++ = static_cast(values[1] >> 32); + *ptr++ = static_cast(values[1] >> 24); + *ptr++ = static_cast(values[1] >> 16); + *ptr++ = static_cast(values[1] >> 8); + *ptr++ = static_cast(values[1]); + + *ptr++ = static_cast(values[2] >> 40); + *ptr++ = static_cast(values[2] >> 32); + *ptr++ = static_cast(values[2] >> 24); + *ptr++ = static_cast(values[2] >> 16); + *ptr++ = static_cast(values[2] >> 8); + *ptr++ = static_cast(values[2]); + + *ptr++ = static_cast(values[3] >> 40); + *ptr++ = static_cast(values[3] >> 32); + *ptr++ = static_cast(values[3] >> 24); + *ptr++ = static_cast(values[3] >> 16); + *ptr++ = static_cast(values[3] >> 8); + *ptr++ = static_cast(values[3]); + + *ptr++ = static_cast(values[4] >> 40); + *ptr++ = static_cast(values[4] >> 32); + *ptr++ = static_cast(values[4] >> 24); + *ptr++ = static_cast(values[4] >> 16); + *ptr++ = static_cast(values[4] >> 8); + *ptr++ = static_cast(values[4]); + + *ptr++ = static_cast(values[5] >> 40); + *ptr++ = static_cast(values[5] >> 32); + *ptr++ = static_cast(values[5] >> 24); + *ptr++ = static_cast(values[5] >> 16); + *ptr++ = static_cast(values[5] >> 8); + *ptr++ = static_cast(values[5]); + + *ptr++ = static_cast(values[6] >> 40); + *ptr++ = static_cast(values[6] >> 32); + *ptr++ = static_cast(values[6] >> 24); + *ptr++ = static_cast(values[6] >> 16); + *ptr++ = static_cast(values[6] >> 8); + *ptr++ = static_cast(values[6]); + + *ptr++ = static_cast(values[7] >> 40); + *ptr++ = static_cast(values[7] >> 32); + *ptr++ = static_cast(values[7] >> 24); + *ptr++ = static_cast(values[7] >> 16); + *ptr++ = static_cast(values[7] >> 8); + *ptr = static_cast(values[7]); +} + +static inline void packBits49(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 41); + *ptr++ = static_cast(values[0] >> 33); + *ptr++ = static_cast(values[0] >> 25); + *ptr++ = static_cast(values[0] >> 17); + *ptr++ = static_cast(values[0] >> 9); + *ptr++ = static_cast(values[0] >> 1); + + *ptr = static_cast(values[0] << 7); + *ptr++ |= static_cast(values[1] >> 42); + *ptr++ = static_cast(values[1] >> 34); + *ptr++ = static_cast(values[1] >> 26); + *ptr++ = static_cast(values[1] >> 18); + *ptr++ = static_cast(values[1] >> 10); + *ptr++ = static_cast(values[1] >> 2); + + *ptr = static_cast(values[1] << 6); + *ptr++ |= static_cast(values[2] >> 43); + *ptr++ = static_cast(values[2] >> 35); + *ptr++ = static_cast(values[2] >> 27); + *ptr++ = static_cast(values[2] >> 19); + *ptr++ = static_cast(values[2] >> 11); + *ptr++ = static_cast(values[2] >> 3); + + *ptr = static_cast(values[2] << 5); + *ptr++ |= static_cast(values[3] >> 44); + *ptr++ = static_cast(values[3] >> 36); + *ptr++ = static_cast(values[3] >> 28); + *ptr++ = static_cast(values[3] >> 20); + *ptr++ = static_cast(values[3] >> 12); + *ptr++ = static_cast(values[3] >> 4); + + *ptr = static_cast(values[3] << 4); + *ptr++ |= static_cast(values[4] >> 45); + *ptr++ = static_cast(values[4] >> 37); + *ptr++ = static_cast(values[4] >> 29); + *ptr++ = static_cast(values[4] >> 21); + *ptr++ = static_cast(values[4] >> 13); + *ptr++ = static_cast(values[4] >> 5); + + *ptr = static_cast(values[4] << 3); + *ptr++ |= static_cast(values[5] >> 46); + *ptr++ = static_cast(values[5] >> 38); + *ptr++ = static_cast(values[5] >> 30); + *ptr++ = static_cast(values[5] >> 22); + *ptr++ = static_cast(values[5] >> 14); + *ptr++ = static_cast(values[5] >> 6); + + *ptr = static_cast(values[5] << 2); + *ptr++ |= static_cast(values[6] >> 47); + *ptr++ = static_cast(values[6] >> 39); + *ptr++ = static_cast(values[6] >> 31); + *ptr++ = static_cast(values[6] >> 23); + *ptr++ = static_cast(values[6] >> 15); + *ptr++ = static_cast(values[6] >> 7); + + *ptr = static_cast(values[6] << 1); + *ptr++ |= static_cast(values[7] >> 48); + *ptr++ = static_cast(values[7] >> 40); + *ptr++ = static_cast(values[7] >> 32); + *ptr++ = static_cast(values[7] >> 24); + *ptr++ = static_cast(values[7] >> 16); + *ptr++ = static_cast(values[7] >> 8); + *ptr = static_cast(values[7]); +} + +static inline void packBits50(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 42); + *ptr++ = static_cast(values[0] >> 34); + *ptr++ = static_cast(values[0] >> 26); + *ptr++ = static_cast(values[0] >> 18); + *ptr++ = static_cast(values[0] >> 10); + *ptr++ = static_cast(values[0] >> 2); + + *ptr = static_cast(values[0] << 6); + *ptr++ |= static_cast(values[1] >> 44); + *ptr++ = static_cast(values[1] >> 36); + *ptr++ = static_cast(values[1] >> 28); + *ptr++ = static_cast(values[1] >> 20); + *ptr++ = static_cast(values[1] >> 12); + *ptr++ = static_cast(values[1] >> 4); + + *ptr = static_cast(values[1] << 4); + *ptr++ |= static_cast(values[2] >> 46); + *ptr++ = static_cast(values[2] >> 38); + *ptr++ = static_cast(values[2] >> 30); + *ptr++ = static_cast(values[2] >> 22); + *ptr++ = static_cast(values[2] >> 14); + *ptr++ = static_cast(values[2] >> 6); + + *ptr = static_cast(values[2] << 2); + *ptr++ |= static_cast(values[3] >> 48); + *ptr++ = static_cast(values[3] >> 40); + *ptr++ = static_cast(values[3] >> 32); + *ptr++ = static_cast(values[3] >> 24); + *ptr++ = static_cast(values[3] >> 16); + *ptr++ = static_cast(values[3] >> 8); + *ptr++ = static_cast(values[3]); + + *ptr++ = static_cast(values[4] >> 42); + *ptr++ = static_cast(values[4] >> 34); + *ptr++ = static_cast(values[4] >> 26); + *ptr++ = static_cast(values[4] >> 18); + *ptr++ = static_cast(values[4] >> 10); + *ptr++ = static_cast(values[4] >> 2); + + *ptr = static_cast(values[4] << 6); + *ptr++ |= static_cast(values[5] >> 44); + *ptr++ = static_cast(values[5] >> 36); + *ptr++ = static_cast(values[5] >> 28); + *ptr++ = static_cast(values[5] >> 20); + *ptr++ = static_cast(values[5] >> 12); + *ptr++ = static_cast(values[5] >> 4); + + *ptr = static_cast(values[5] << 4); + *ptr++ |= static_cast(values[6] >> 46); + *ptr++ = static_cast(values[6] >> 38); + *ptr++ = static_cast(values[6] >> 30); + *ptr++ = static_cast(values[6] >> 22); + *ptr++ = static_cast(values[6] >> 14); + *ptr++ = static_cast(values[6] >> 6); + + *ptr = static_cast(values[6] << 2); + *ptr++ |= static_cast(values[7] >> 48); + *ptr++ = static_cast(values[7] >> 40); + *ptr++ = static_cast(values[7] >> 32); + *ptr++ = static_cast(values[7] >> 24); + *ptr++ = static_cast(values[7] >> 16); + *ptr++ = static_cast(values[7] >> 8); + *ptr = static_cast(values[7]); +} + +static inline void packBits51(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 43); + *ptr++ = static_cast(values[0] >> 35); + *ptr++ = static_cast(values[0] >> 27); + *ptr++ = static_cast(values[0] >> 19); + *ptr++ = static_cast(values[0] >> 11); + *ptr++ = static_cast(values[0] >> 3); + + *ptr = static_cast(values[0] << 5); + *ptr++ |= static_cast(values[1] >> 46); + *ptr++ = static_cast(values[1] >> 38); + *ptr++ = static_cast(values[1] >> 30); + *ptr++ = static_cast(values[1] >> 22); + *ptr++ = static_cast(values[1] >> 14); + *ptr++ = static_cast(values[1] >> 6); + + *ptr = static_cast(values[1] << 2); + *ptr++ |= static_cast(values[2] >> 49); + *ptr++ = static_cast(values[2] >> 41); + *ptr++ = static_cast(values[2] >> 33); + *ptr++ = static_cast(values[2] >> 25); + *ptr++ = static_cast(values[2] >> 17); + *ptr++ = static_cast(values[2] >> 9); + *ptr++ = static_cast(values[2] >> 1); + + *ptr = static_cast(values[2] << 7); + *ptr++ |= static_cast(values[3] >> 44); + *ptr++ = static_cast(values[3] >> 36); + *ptr++ = static_cast(values[3] >> 28); + *ptr++ = static_cast(values[3] >> 20); + *ptr++ = static_cast(values[3] >> 12); + *ptr++ = static_cast(values[3] >> 4); + + *ptr = static_cast(values[3] << 4); + *ptr++ |= static_cast(values[4] >> 47); + *ptr++ = static_cast(values[4] >> 39); + *ptr++ = static_cast(values[4] >> 31); + *ptr++ = static_cast(values[4] >> 23); + *ptr++ = static_cast(values[4] >> 15); + *ptr++ = static_cast(values[4] >> 7); + + *ptr = static_cast(values[4] << 1); + *ptr++ |= static_cast(values[5] >> 50); + *ptr++ = static_cast(values[5] >> 42); + *ptr++ = static_cast(values[5] >> 34); + *ptr++ = static_cast(values[5] >> 26); + *ptr++ = static_cast(values[5] >> 18); + *ptr++ = static_cast(values[5] >> 10); + *ptr++ = static_cast(values[5] >> 2); + + *ptr = static_cast(values[5] << 6); + *ptr++ |= static_cast(values[6] >> 45); + *ptr++ = static_cast(values[6] >> 37); + *ptr++ = static_cast(values[6] >> 29); + *ptr++ = static_cast(values[6] >> 21); + *ptr++ = static_cast(values[6] >> 13); + *ptr++ = static_cast(values[6] >> 5); + + *ptr = static_cast(values[6] << 3); + *ptr++ |= static_cast(values[7] >> 48); + *ptr++ = static_cast(values[7] >> 40); + *ptr++ = static_cast(values[7] >> 32); + *ptr++ = static_cast(values[7] >> 24); + *ptr++ = static_cast(values[7] >> 16); + *ptr++ = static_cast(values[7] >> 8); + *ptr = static_cast(values[7]); +} + +static inline void packBits52(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 44); + *ptr++ = static_cast(values[0] >> 36); + *ptr++ = static_cast(values[0] >> 28); + *ptr++ = static_cast(values[0] >> 20); + *ptr++ = static_cast(values[0] >> 12); + *ptr++ = static_cast(values[0] >> 4); + + *ptr = static_cast(values[0] << 4); + *ptr++ |= static_cast(values[1] >> 48); + *ptr++ = static_cast(values[1] >> 40); + *ptr++ = static_cast(values[1] >> 32); + *ptr++ = static_cast(values[1] >> 24); + *ptr++ = static_cast(values[1] >> 16); + *ptr++ = static_cast(values[1] >> 8); + *ptr++ = static_cast(values[1]); + + *ptr++ = static_cast(values[2] >> 44); + *ptr++ = static_cast(values[2] >> 36); + *ptr++ = static_cast(values[2] >> 28); + *ptr++ = static_cast(values[2] >> 20); + *ptr++ = static_cast(values[2] >> 12); + *ptr++ = static_cast(values[2] >> 4); + + *ptr = static_cast(values[2] << 4); + *ptr++ |= static_cast(values[3] >> 48); + *ptr++ = static_cast(values[3] >> 40); + *ptr++ = static_cast(values[3] >> 32); + *ptr++ = static_cast(values[3] >> 24); + *ptr++ = static_cast(values[3] >> 16); + *ptr++ = static_cast(values[3] >> 8); + *ptr++ = static_cast(values[3]); + + *ptr++ = static_cast(values[4] >> 44); + *ptr++ = static_cast(values[4] >> 36); + *ptr++ = static_cast(values[4] >> 28); + *ptr++ = static_cast(values[4] >> 20); + *ptr++ = static_cast(values[4] >> 12); + *ptr++ = static_cast(values[4] >> 4); + + *ptr = static_cast(values[4] << 4); + *ptr++ |= static_cast(values[5] >> 48); + *ptr++ = static_cast(values[5] >> 40); + *ptr++ = static_cast(values[5] >> 32); + *ptr++ = static_cast(values[5] >> 24); + *ptr++ = static_cast(values[5] >> 16); + *ptr++ = static_cast(values[5] >> 8); + *ptr++ = static_cast(values[5]); + + *ptr++ = static_cast(values[6] >> 44); + *ptr++ = static_cast(values[6] >> 36); + *ptr++ = static_cast(values[6] >> 28); + *ptr++ = static_cast(values[6] >> 20); + *ptr++ = static_cast(values[6] >> 12); + *ptr++ = static_cast(values[6] >> 4); + + *ptr = static_cast(values[6] << 4); + *ptr++ |= static_cast(values[7] >> 48); + *ptr++ = static_cast(values[7] >> 40); + *ptr++ = static_cast(values[7] >> 32); + *ptr++ = static_cast(values[7] >> 24); + *ptr++ = static_cast(values[7] >> 16); + *ptr++ = static_cast(values[7] >> 8); + *ptr = static_cast(values[7]); +} + +static inline void packBits53(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 45); + *ptr++ = static_cast(values[0] >> 37); + *ptr++ = static_cast(values[0] >> 29); + *ptr++ = static_cast(values[0] >> 21); + *ptr++ = static_cast(values[0] >> 13); + *ptr++ = static_cast(values[0] >> 5); + + *ptr = static_cast(values[0] << 3); + *ptr++ |= static_cast(values[1] >> 50); + *ptr++ = static_cast(values[1] >> 42); + *ptr++ = static_cast(values[1] >> 34); + *ptr++ = static_cast(values[1] >> 26); + *ptr++ = static_cast(values[1] >> 18); + *ptr++ = static_cast(values[1] >> 10); + *ptr++ = static_cast(values[1] >> 2); + + *ptr = static_cast(values[1] << 6); + *ptr++ |= static_cast(values[2] >> 47); + *ptr++ = static_cast(values[2] >> 39); + *ptr++ = static_cast(values[2] >> 31); + *ptr++ = static_cast(values[2] >> 23); + *ptr++ = static_cast(values[2] >> 15); + *ptr++ = static_cast(values[2] >> 7); + + *ptr = static_cast(values[2] << 1); + *ptr++ |= static_cast(values[3] >> 52); + *ptr++ = static_cast(values[3] >> 44); + *ptr++ = static_cast(values[3] >> 36); + *ptr++ = static_cast(values[3] >> 28); + *ptr++ = static_cast(values[3] >> 20); + *ptr++ = static_cast(values[3] >> 12); + *ptr++ = static_cast(values[3] >> 4); + + *ptr = static_cast(values[3] << 4); + *ptr++ |= static_cast(values[4] >> 49); + *ptr++ = static_cast(values[4] >> 41); + *ptr++ = static_cast(values[4] >> 33); + *ptr++ = static_cast(values[4] >> 25); + *ptr++ = static_cast(values[4] >> 17); + *ptr++ = static_cast(values[4] >> 9); + *ptr++ = static_cast(values[4] >> 1); + + *ptr = static_cast(values[4] << 7); + *ptr++ |= static_cast(values[5] >> 46); + *ptr++ = static_cast(values[5] >> 38); + *ptr++ = static_cast(values[5] >> 30); + *ptr++ = static_cast(values[5] >> 22); + *ptr++ = static_cast(values[5] >> 14); + *ptr++ = static_cast(values[5] >> 6); + + *ptr = static_cast(values[5] << 2); + *ptr++ |= static_cast(values[6] >> 51); + *ptr++ = static_cast(values[6] >> 43); + *ptr++ = static_cast(values[6] >> 35); + *ptr++ = static_cast(values[6] >> 27); + *ptr++ = static_cast(values[6] >> 19); + *ptr++ = static_cast(values[6] >> 11); + *ptr++ = static_cast(values[6] >> 3); + + *ptr = static_cast(values[6] << 5); + *ptr++ |= static_cast(values[7] >> 48); + *ptr++ = static_cast(values[7] >> 40); + *ptr++ = static_cast(values[7] >> 32); + *ptr++ = static_cast(values[7] >> 24); + *ptr++ = static_cast(values[7] >> 16); + *ptr++ = static_cast(values[7] >> 8); + *ptr = static_cast(values[7]); +} + +static inline void packBits54(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 46); + *ptr++ = static_cast(values[0] >> 38); + *ptr++ = static_cast(values[0] >> 30); + *ptr++ = static_cast(values[0] >> 22); + *ptr++ = static_cast(values[0] >> 14); + *ptr++ = static_cast(values[0] >> 6); + + *ptr = static_cast(values[0] << 2); + *ptr++ |= static_cast(values[1] >> 52); + *ptr++ = static_cast(values[1] >> 44); + *ptr++ = static_cast(values[1] >> 36); + *ptr++ = static_cast(values[1] >> 28); + *ptr++ = static_cast(values[1] >> 20); + *ptr++ = static_cast(values[1] >> 12); + *ptr++ = static_cast(values[1] >> 4); + + *ptr = static_cast(values[1] << 4); + *ptr++ |= static_cast(values[2] >> 50); + *ptr++ = static_cast(values[2] >> 42); + *ptr++ = static_cast(values[2] >> 34); + *ptr++ = static_cast(values[2] >> 26); + *ptr++ = static_cast(values[2] >> 18); + *ptr++ = static_cast(values[2] >> 10); + *ptr++ = static_cast(values[2] >> 2); + + *ptr = static_cast(values[2] << 6); + *ptr++ |= static_cast(values[3] >> 48); + *ptr++ = static_cast(values[3] >> 40); + *ptr++ = static_cast(values[3] >> 32); + *ptr++ = static_cast(values[3] >> 24); + *ptr++ = static_cast(values[3] >> 16); + *ptr++ = static_cast(values[3] >> 8); + *ptr++ = static_cast(values[3]); + + *ptr++ = static_cast(values[4] >> 46); + *ptr++ = static_cast(values[4] >> 38); + *ptr++ = static_cast(values[4] >> 30); + *ptr++ = static_cast(values[4] >> 22); + *ptr++ = static_cast(values[4] >> 14); + *ptr++ = static_cast(values[4] >> 6); + + *ptr = static_cast(values[4] << 2); + *ptr++ |= static_cast(values[5] >> 52); + *ptr++ = static_cast(values[5] >> 44); + *ptr++ = static_cast(values[5] >> 36); + *ptr++ = static_cast(values[5] >> 28); + *ptr++ = static_cast(values[5] >> 20); + *ptr++ = static_cast(values[5] >> 12); + *ptr++ = static_cast(values[5] >> 4); + + *ptr = static_cast(values[5] << 4); + *ptr++ |= static_cast(values[6] >> 50); + *ptr++ = static_cast(values[6] >> 42); + *ptr++ = static_cast(values[6] >> 34); + *ptr++ = static_cast(values[6] >> 26); + *ptr++ = static_cast(values[6] >> 18); + *ptr++ = static_cast(values[6] >> 10); + *ptr++ = static_cast(values[6] >> 2); + + *ptr = static_cast(values[6] << 6); + *ptr++ |= static_cast(values[7] >> 48); + *ptr++ = static_cast(values[7] >> 40); + *ptr++ = static_cast(values[7] >> 32); + *ptr++ = static_cast(values[7] >> 24); + *ptr++ = static_cast(values[7] >> 16); + *ptr++ = static_cast(values[7] >> 8); + *ptr = static_cast(values[7]); +} + +static inline void packBits55(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 47); + *ptr++ = static_cast(values[0] >> 39); + *ptr++ = static_cast(values[0] >> 31); + *ptr++ = static_cast(values[0] >> 23); + *ptr++ = static_cast(values[0] >> 15); + *ptr++ = static_cast(values[0] >> 7); + + *ptr = static_cast(values[0] << 1); + *ptr++ |= static_cast(values[1] >> 54); + *ptr++ = static_cast(values[1] >> 46); + *ptr++ = static_cast(values[1] >> 38); + *ptr++ = static_cast(values[1] >> 30); + *ptr++ = static_cast(values[1] >> 22); + *ptr++ = static_cast(values[1] >> 14); + *ptr++ = static_cast(values[1] >> 6); + + *ptr = static_cast(values[1] << 2); + *ptr++ |= static_cast(values[2] >> 53); + *ptr++ = static_cast(values[2] >> 45); + *ptr++ = static_cast(values[2] >> 37); + *ptr++ = static_cast(values[2] >> 29); + *ptr++ = static_cast(values[2] >> 21); + *ptr++ = static_cast(values[2] >> 13); + *ptr++ = static_cast(values[2] >> 5); + + *ptr = static_cast(values[2] << 3); + *ptr++ |= static_cast(values[3] >> 52); + *ptr++ = static_cast(values[3] >> 44); + *ptr++ = static_cast(values[3] >> 36); + *ptr++ = static_cast(values[3] >> 28); + *ptr++ = static_cast(values[3] >> 20); + *ptr++ = static_cast(values[3] >> 12); + *ptr++ = static_cast(values[3] >> 4); + + *ptr = static_cast(values[3] << 4); + *ptr++ |= static_cast(values[4] >> 51); + *ptr++ = static_cast(values[4] >> 43); + *ptr++ = static_cast(values[4] >> 35); + *ptr++ = static_cast(values[4] >> 27); + *ptr++ = static_cast(values[4] >> 19); + *ptr++ = static_cast(values[4] >> 11); + *ptr++ = static_cast(values[4] >> 3); + + *ptr = static_cast(values[4] << 5); + *ptr++ |= static_cast(values[5] >> 50); + *ptr++ = static_cast(values[5] >> 42); + *ptr++ = static_cast(values[5] >> 34); + *ptr++ = static_cast(values[5] >> 26); + *ptr++ = static_cast(values[5] >> 18); + *ptr++ = static_cast(values[5] >> 10); + *ptr++ = static_cast(values[5] >> 2); + + *ptr = static_cast(values[5] << 6); + *ptr++ |= static_cast(values[6] >> 49); + *ptr++ = static_cast(values[6] >> 41); + *ptr++ = static_cast(values[6] >> 33); + *ptr++ = static_cast(values[6] >> 25); + *ptr++ = static_cast(values[6] >> 17); + *ptr++ = static_cast(values[6] >> 9); + *ptr++ = static_cast(values[6] >> 1); + + *ptr = static_cast(values[6] << 7); + *ptr++ |= static_cast(values[7] >> 48); + *ptr++ = static_cast(values[7] >> 40); + *ptr++ = static_cast(values[7] >> 32); + *ptr++ = static_cast(values[7] >> 24); + *ptr++ = static_cast(values[7] >> 16); + *ptr++ = static_cast(values[7] >> 8); + *ptr = static_cast(values[7]); +} + +static inline void packBits56(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 48); + *ptr++ = static_cast(values[0] >> 40); + *ptr++ = static_cast(values[0] >> 32); + *ptr++ = static_cast(values[0] >> 24); + *ptr++ = static_cast(values[0] >> 16); + *ptr++ = static_cast(values[0] >> 8); + *ptr++ = static_cast(values[0]); + + *ptr++ = static_cast(values[1] >> 48); + *ptr++ = static_cast(values[1] >> 40); + *ptr++ = static_cast(values[1] >> 32); + *ptr++ = static_cast(values[1] >> 24); + *ptr++ = static_cast(values[1] >> 16); + *ptr++ = static_cast(values[1] >> 8); + *ptr++ = static_cast(values[1]); + + *ptr++ = static_cast(values[2] >> 48); + *ptr++ = static_cast(values[2] >> 40); + *ptr++ = static_cast(values[2] >> 32); + *ptr++ = static_cast(values[2] >> 24); + *ptr++ = static_cast(values[2] >> 16); + *ptr++ = static_cast(values[2] >> 8); + *ptr++ = static_cast(values[2]); + + *ptr++ = static_cast(values[3] >> 48); + *ptr++ = static_cast(values[3] >> 40); + *ptr++ = static_cast(values[3] >> 32); + *ptr++ = static_cast(values[3] >> 24); + *ptr++ = static_cast(values[3] >> 16); + *ptr++ = static_cast(values[3] >> 8); + *ptr++ = static_cast(values[3]); + + *ptr++ = static_cast(values[4] >> 48); + *ptr++ = static_cast(values[4] >> 40); + *ptr++ = static_cast(values[4] >> 32); + *ptr++ = static_cast(values[4] >> 24); + *ptr++ = static_cast(values[4] >> 16); + *ptr++ = static_cast(values[4] >> 8); + *ptr++ = static_cast(values[4]); + + *ptr++ = static_cast(values[5] >> 48); + *ptr++ = static_cast(values[5] >> 40); + *ptr++ = static_cast(values[5] >> 32); + *ptr++ = static_cast(values[5] >> 24); + *ptr++ = static_cast(values[5] >> 16); + *ptr++ = static_cast(values[5] >> 8); + *ptr++ = static_cast(values[5]); + + *ptr++ = static_cast(values[6] >> 48); + *ptr++ = static_cast(values[6] >> 40); + *ptr++ = static_cast(values[6] >> 32); + *ptr++ = static_cast(values[6] >> 24); + *ptr++ = static_cast(values[6] >> 16); + *ptr++ = static_cast(values[6] >> 8); + *ptr++ = static_cast(values[6]); + + *ptr++ = static_cast(values[7] >> 48); + *ptr++ = static_cast(values[7] >> 40); + *ptr++ = static_cast(values[7] >> 32); + *ptr++ = static_cast(values[7] >> 24); + *ptr++ = static_cast(values[7] >> 16); + *ptr++ = static_cast(values[7] >> 8); + *ptr = static_cast(values[7]); +} + +static inline void packBits57(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 49); + *ptr++ = static_cast(values[0] >> 41); + *ptr++ = static_cast(values[0] >> 33); + *ptr++ = static_cast(values[0] >> 25); + *ptr++ = static_cast(values[0] >> 17); + *ptr++ = static_cast(values[0] >> 9); + *ptr++ = static_cast(values[0] >> 1); + + *ptr = static_cast(values[0] << 7); + *ptr++ |= static_cast(values[1] >> 50); + *ptr++ = static_cast(values[1] >> 42); + *ptr++ = static_cast(values[1] >> 34); + *ptr++ = static_cast(values[1] >> 26); + *ptr++ = static_cast(values[1] >> 18); + *ptr++ = static_cast(values[1] >> 10); + *ptr++ = static_cast(values[1] >> 2); + + *ptr = static_cast(values[1] << 6); + *ptr++ |= static_cast(values[2] >> 51); + *ptr++ = static_cast(values[2] >> 43); + *ptr++ = static_cast(values[2] >> 35); + *ptr++ = static_cast(values[2] >> 27); + *ptr++ = static_cast(values[2] >> 19); + *ptr++ = static_cast(values[2] >> 11); + *ptr++ = static_cast(values[2] >> 3); + + *ptr = static_cast(values[2] << 5); + *ptr++ |= static_cast(values[3] >> 52); + *ptr++ = static_cast(values[3] >> 44); + *ptr++ = static_cast(values[3] >> 36); + *ptr++ = static_cast(values[3] >> 28); + *ptr++ = static_cast(values[3] >> 20); + *ptr++ = static_cast(values[3] >> 12); + *ptr++ = static_cast(values[3] >> 4); + + *ptr = static_cast(values[3] << 4); + *ptr++ |= static_cast(values[4] >> 53); + *ptr++ = static_cast(values[4] >> 45); + *ptr++ = static_cast(values[4] >> 37); + *ptr++ = static_cast(values[4] >> 29); + *ptr++ = static_cast(values[4] >> 21); + *ptr++ = static_cast(values[4] >> 13); + *ptr++ = static_cast(values[4] >> 5); + + *ptr = static_cast(values[4] << 3); + *ptr++ |= static_cast(values[5] >> 54); + *ptr++ = static_cast(values[5] >> 46); + *ptr++ = static_cast(values[5] >> 38); + *ptr++ = static_cast(values[5] >> 30); + *ptr++ = static_cast(values[5] >> 22); + *ptr++ = static_cast(values[5] >> 14); + *ptr++ = static_cast(values[5] >> 6); + + *ptr = static_cast(values[5] << 2); + *ptr++ |= static_cast(values[6] >> 55); + *ptr++ = static_cast(values[6] >> 47); + *ptr++ = static_cast(values[6] >> 39); + *ptr++ = static_cast(values[6] >> 31); + *ptr++ = static_cast(values[6] >> 23); + *ptr++ = static_cast(values[6] >> 15); + *ptr++ = static_cast(values[6] >> 7); + + *ptr = static_cast(values[6] << 1); + *ptr++ |= static_cast(values[7] >> 56); + *ptr++ = static_cast(values[7] >> 48); + *ptr++ = static_cast(values[7] >> 40); + *ptr++ = static_cast(values[7] >> 32); + *ptr++ = static_cast(values[7] >> 24); + *ptr++ = static_cast(values[7] >> 16); + *ptr++ = static_cast(values[7] >> 8); + *ptr = static_cast(values[7]); +} + +static inline void packBits58(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 50); + *ptr++ = static_cast(values[0] >> 42); + *ptr++ = static_cast(values[0] >> 34); + *ptr++ = static_cast(values[0] >> 26); + *ptr++ = static_cast(values[0] >> 18); + *ptr++ = static_cast(values[0] >> 10); + *ptr++ = static_cast(values[0] >> 2); + + *ptr = static_cast(values[0] << 6); + *ptr++ |= static_cast(values[1] >> 52); + *ptr++ = static_cast(values[1] >> 44); + *ptr++ = static_cast(values[1] >> 36); + *ptr++ = static_cast(values[1] >> 28); + *ptr++ = static_cast(values[1] >> 20); + *ptr++ = static_cast(values[1] >> 12); + *ptr++ = static_cast(values[1] >> 4); + + *ptr = static_cast(values[1] << 4); + *ptr++ |= static_cast(values[2] >> 54); + *ptr++ = static_cast(values[2] >> 46); + *ptr++ = static_cast(values[2] >> 38); + *ptr++ = static_cast(values[2] >> 30); + *ptr++ = static_cast(values[2] >> 22); + *ptr++ = static_cast(values[2] >> 14); + *ptr++ = static_cast(values[2] >> 6); + + *ptr = static_cast(values[2] << 2); + *ptr++ |= static_cast(values[3] >> 56); + *ptr++ = static_cast(values[3] >> 48); + *ptr++ = static_cast(values[3] >> 40); + *ptr++ = static_cast(values[3] >> 32); + *ptr++ = static_cast(values[3] >> 24); + *ptr++ = static_cast(values[3] >> 16); + *ptr++ = static_cast(values[3] >> 8); + *ptr++ = static_cast(values[3]); + + *ptr++ = static_cast(values[4] >> 50); + *ptr++ = static_cast(values[4] >> 42); + *ptr++ = static_cast(values[4] >> 34); + *ptr++ = static_cast(values[4] >> 26); + *ptr++ = static_cast(values[4] >> 18); + *ptr++ = static_cast(values[4] >> 10); + *ptr++ = static_cast(values[4] >> 2); + + *ptr = static_cast(values[4] << 6); + *ptr++ |= static_cast(values[5] >> 52); + *ptr++ = static_cast(values[5] >> 44); + *ptr++ = static_cast(values[5] >> 36); + *ptr++ = static_cast(values[5] >> 28); + *ptr++ = static_cast(values[5] >> 20); + *ptr++ = static_cast(values[5] >> 12); + *ptr++ = static_cast(values[5] >> 4); + + *ptr = static_cast(values[5] << 4); + *ptr++ |= static_cast(values[6] >> 54); + *ptr++ = static_cast(values[6] >> 46); + *ptr++ = static_cast(values[6] >> 38); + *ptr++ = static_cast(values[6] >> 30); + *ptr++ = static_cast(values[6] >> 22); + *ptr++ = static_cast(values[6] >> 14); + *ptr++ = static_cast(values[6] >> 6); + + *ptr = static_cast(values[6] << 2); + *ptr++ |= static_cast(values[7] >> 56); + *ptr++ = static_cast(values[7] >> 48); + *ptr++ = static_cast(values[7] >> 40); + *ptr++ = static_cast(values[7] >> 32); + *ptr++ = static_cast(values[7] >> 24); + *ptr++ = static_cast(values[7] >> 16); + *ptr++ = static_cast(values[7] >> 8); + *ptr = static_cast(values[7]); +} + +static inline void packBits59(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 51); + *ptr++ = static_cast(values[0] >> 43); + *ptr++ = static_cast(values[0] >> 35); + *ptr++ = static_cast(values[0] >> 27); + *ptr++ = static_cast(values[0] >> 19); + *ptr++ = static_cast(values[0] >> 11); + *ptr++ = static_cast(values[0] >> 3); + + *ptr = static_cast(values[0] << 5); + *ptr++ |= static_cast(values[1] >> 54); + *ptr++ = static_cast(values[1] >> 46); + *ptr++ = static_cast(values[1] >> 38); + *ptr++ = static_cast(values[1] >> 30); + *ptr++ = static_cast(values[1] >> 22); + *ptr++ = static_cast(values[1] >> 14); + *ptr++ = static_cast(values[1] >> 6); + + *ptr = static_cast(values[1] << 2); + *ptr++ |= static_cast(values[2] >> 57); + *ptr++ = static_cast(values[2] >> 49); + *ptr++ = static_cast(values[2] >> 41); + *ptr++ = static_cast(values[2] >> 33); + *ptr++ = static_cast(values[2] >> 25); + *ptr++ = static_cast(values[2] >> 17); + *ptr++ = static_cast(values[2] >> 9); + *ptr++ = static_cast(values[2] >> 1); + + *ptr = static_cast(values[2] << 7); + *ptr++ |= static_cast(values[3] >> 52); + *ptr++ = static_cast(values[3] >> 44); + *ptr++ = static_cast(values[3] >> 36); + *ptr++ = static_cast(values[3] >> 28); + *ptr++ = static_cast(values[3] >> 20); + *ptr++ = static_cast(values[3] >> 12); + *ptr++ = static_cast(values[3] >> 4); + + *ptr = static_cast(values[3] << 4); + *ptr++ |= static_cast(values[4] >> 55); + *ptr++ = static_cast(values[4] >> 47); + *ptr++ = static_cast(values[4] >> 39); + *ptr++ = static_cast(values[4] >> 31); + *ptr++ = static_cast(values[4] >> 23); + *ptr++ = static_cast(values[4] >> 15); + *ptr++ = static_cast(values[4] >> 7); + + *ptr = static_cast(values[4] << 1); + *ptr++ |= static_cast(values[5] >> 58); + *ptr++ = static_cast(values[5] >> 50); + *ptr++ = static_cast(values[5] >> 42); + *ptr++ = static_cast(values[5] >> 34); + *ptr++ = static_cast(values[5] >> 26); + *ptr++ = static_cast(values[5] >> 18); + *ptr++ = static_cast(values[5] >> 10); + *ptr++ = static_cast(values[5] >> 2); + + *ptr = static_cast(values[5] << 6); + *ptr++ |= static_cast(values[6] >> 53); + *ptr++ = static_cast(values[6] >> 45); + *ptr++ = static_cast(values[6] >> 37); + *ptr++ = static_cast(values[6] >> 29); + *ptr++ = static_cast(values[6] >> 21); + *ptr++ = static_cast(values[6] >> 13); + *ptr++ = static_cast(values[6] >> 5); + + *ptr = static_cast(values[6] << 3); + *ptr++ |= static_cast(values[7] >> 56); + *ptr++ = static_cast(values[7] >> 48); + *ptr++ = static_cast(values[7] >> 40); + *ptr++ = static_cast(values[7] >> 32); + *ptr++ = static_cast(values[7] >> 24); + *ptr++ = static_cast(values[7] >> 16); + *ptr++ = static_cast(values[7] >> 8); + *ptr = static_cast(values[7]); +} + +static inline void packBits60(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 52); + *ptr++ = static_cast(values[0] >> 44); + *ptr++ = static_cast(values[0] >> 36); + *ptr++ = static_cast(values[0] >> 28); + *ptr++ = static_cast(values[0] >> 20); + *ptr++ = static_cast(values[0] >> 12); + *ptr++ = static_cast(values[0] >> 4); + + *ptr = static_cast(values[0] << 4); + *ptr++ |= static_cast(values[1] >> 56); + *ptr++ = static_cast(values[1] >> 48); + *ptr++ = static_cast(values[1] >> 40); + *ptr++ = static_cast(values[1] >> 32); + *ptr++ = static_cast(values[1] >> 24); + *ptr++ = static_cast(values[1] >> 16); + *ptr++ = static_cast(values[1] >> 8); + *ptr++ = static_cast(values[1]); + + *ptr++ = static_cast(values[2] >> 52); + *ptr++ = static_cast(values[2] >> 44); + *ptr++ = static_cast(values[2] >> 36); + *ptr++ = static_cast(values[2] >> 28); + *ptr++ = static_cast(values[2] >> 20); + *ptr++ = static_cast(values[2] >> 12); + *ptr++ = static_cast(values[2] >> 4); + + *ptr = static_cast(values[2] << 4); + *ptr++ |= static_cast(values[3] >> 56); + *ptr++ = static_cast(values[3] >> 48); + *ptr++ = static_cast(values[3] >> 40); + *ptr++ = static_cast(values[3] >> 32); + *ptr++ = static_cast(values[3] >> 24); + *ptr++ = static_cast(values[3] >> 16); + *ptr++ = static_cast(values[3] >> 8); + *ptr++ = static_cast(values[3]); + + *ptr++ = static_cast(values[4] >> 52); + *ptr++ = static_cast(values[4] >> 44); + *ptr++ = static_cast(values[4] >> 36); + *ptr++ = static_cast(values[4] >> 28); + *ptr++ = static_cast(values[4] >> 20); + *ptr++ = static_cast(values[4] >> 12); + *ptr++ = static_cast(values[4] >> 4); + + *ptr = static_cast(values[4] << 4); + *ptr++ |= static_cast(values[5] >> 56); + *ptr++ = static_cast(values[5] >> 48); + *ptr++ = static_cast(values[5] >> 40); + *ptr++ = static_cast(values[5] >> 32); + *ptr++ = static_cast(values[5] >> 24); + *ptr++ = static_cast(values[5] >> 16); + *ptr++ = static_cast(values[5] >> 8); + *ptr++ = static_cast(values[5]); + + *ptr++ = static_cast(values[6] >> 52); + *ptr++ = static_cast(values[6] >> 44); + *ptr++ = static_cast(values[6] >> 36); + *ptr++ = static_cast(values[6] >> 28); + *ptr++ = static_cast(values[6] >> 20); + *ptr++ = static_cast(values[6] >> 12); + *ptr++ = static_cast(values[6] >> 4); + + *ptr = static_cast(values[6] << 4); + *ptr++ |= static_cast(values[7] >> 56); + *ptr++ = static_cast(values[7] >> 48); + *ptr++ = static_cast(values[7] >> 40); + *ptr++ = static_cast(values[7] >> 32); + *ptr++ = static_cast(values[7] >> 24); + *ptr++ = static_cast(values[7] >> 16); + *ptr++ = static_cast(values[7] >> 8); + *ptr = static_cast(values[7]); +} + +static inline void packBits61(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 53); + *ptr++ = static_cast(values[0] >> 45); + *ptr++ = static_cast(values[0] >> 37); + *ptr++ = static_cast(values[0] >> 29); + *ptr++ = static_cast(values[0] >> 21); + *ptr++ = static_cast(values[0] >> 13); + *ptr++ = static_cast(values[0] >> 5); + + *ptr = static_cast(values[0] << 3); + *ptr++ |= static_cast(values[1] >> 58); + *ptr++ = static_cast(values[1] >> 50); + *ptr++ = static_cast(values[1] >> 42); + *ptr++ = static_cast(values[1] >> 34); + *ptr++ = static_cast(values[1] >> 26); + *ptr++ = static_cast(values[1] >> 18); + *ptr++ = static_cast(values[1] >> 10); + *ptr++ = static_cast(values[1] >> 2); + + *ptr = static_cast(values[1] << 6); + *ptr++ |= static_cast(values[2] >> 55); + *ptr++ = static_cast(values[2] >> 47); + *ptr++ = static_cast(values[2] >> 39); + *ptr++ = static_cast(values[2] >> 31); + *ptr++ = static_cast(values[2] >> 23); + *ptr++ = static_cast(values[2] >> 15); + *ptr++ = static_cast(values[2] >> 7); + + *ptr = static_cast(values[2] << 1); + *ptr++ |= static_cast(values[3] >> 60); + *ptr++ = static_cast(values[3] >> 52); + *ptr++ = static_cast(values[3] >> 44); + *ptr++ = static_cast(values[3] >> 36); + *ptr++ = static_cast(values[3] >> 28); + *ptr++ = static_cast(values[3] >> 20); + *ptr++ = static_cast(values[3] >> 12); + *ptr++ = static_cast(values[3] >> 4); + + *ptr = static_cast(values[3] << 4); + *ptr++ |= static_cast(values[4] >> 57); + *ptr++ = static_cast(values[4] >> 49); + *ptr++ = static_cast(values[4] >> 41); + *ptr++ = static_cast(values[4] >> 33); + *ptr++ = static_cast(values[4] >> 25); + *ptr++ = static_cast(values[4] >> 17); + *ptr++ = static_cast(values[4] >> 9); + *ptr++ = static_cast(values[4] >> 1); + + *ptr = static_cast(values[4] << 7); + *ptr++ |= static_cast(values[5] >> 54); + *ptr++ = static_cast(values[5] >> 46); + *ptr++ = static_cast(values[5] >> 38); + *ptr++ = static_cast(values[5] >> 30); + *ptr++ = static_cast(values[5] >> 22); + *ptr++ = static_cast(values[5] >> 14); + *ptr++ = static_cast(values[5] >> 6); + + *ptr = static_cast(values[5] << 2); + *ptr++ |= static_cast(values[6] >> 59); + *ptr++ = static_cast(values[6] >> 51); + *ptr++ = static_cast(values[6] >> 43); + *ptr++ = static_cast(values[6] >> 35); + *ptr++ = static_cast(values[6] >> 27); + *ptr++ = static_cast(values[6] >> 19); + *ptr++ = static_cast(values[6] >> 11); + *ptr++ = static_cast(values[6] >> 3); + + *ptr = static_cast(values[6] << 5); + *ptr++ |= static_cast(values[7] >> 56); + *ptr++ = static_cast(values[7] >> 48); + *ptr++ = static_cast(values[7] >> 40); + *ptr++ = static_cast(values[7] >> 32); + *ptr++ = static_cast(values[7] >> 24); + *ptr++ = static_cast(values[7] >> 16); + *ptr++ = static_cast(values[7] >> 8); + *ptr = static_cast(values[7]); +} + +static inline void packBits62(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 54); + *ptr++ = static_cast(values[0] >> 46); + *ptr++ = static_cast(values[0] >> 38); + *ptr++ = static_cast(values[0] >> 30); + *ptr++ = static_cast(values[0] >> 22); + *ptr++ = static_cast(values[0] >> 14); + *ptr++ = static_cast(values[0] >> 6); + + *ptr = static_cast(values[0] << 2); + *ptr++ |= static_cast(values[1] >> 60); + *ptr++ = static_cast(values[1] >> 52); + *ptr++ = static_cast(values[1] >> 44); + *ptr++ = static_cast(values[1] >> 36); + *ptr++ = static_cast(values[1] >> 28); + *ptr++ = static_cast(values[1] >> 20); + *ptr++ = static_cast(values[1] >> 12); + *ptr++ = static_cast(values[1] >> 4); + + *ptr = static_cast(values[1] << 4); + *ptr++ |= static_cast(values[2] >> 58); + *ptr++ = static_cast(values[2] >> 50); + *ptr++ = static_cast(values[2] >> 42); + *ptr++ = static_cast(values[2] >> 34); + *ptr++ = static_cast(values[2] >> 26); + *ptr++ = static_cast(values[2] >> 18); + *ptr++ = static_cast(values[2] >> 10); + *ptr++ = static_cast(values[2] >> 2); + + *ptr = static_cast(values[2] << 6); + *ptr++ |= static_cast(values[3] >> 56); + *ptr++ = static_cast(values[3] >> 48); + *ptr++ = static_cast(values[3] >> 40); + *ptr++ = static_cast(values[3] >> 32); + *ptr++ = static_cast(values[3] >> 24); + *ptr++ = static_cast(values[3] >> 16); + *ptr++ = static_cast(values[3] >> 8); + *ptr++ = static_cast(values[3]); + + *ptr++ = static_cast(values[4] >> 54); + *ptr++ = static_cast(values[4] >> 46); + *ptr++ = static_cast(values[4] >> 38); + *ptr++ = static_cast(values[4] >> 30); + *ptr++ = static_cast(values[4] >> 22); + *ptr++ = static_cast(values[4] >> 14); + *ptr++ = static_cast(values[4] >> 6); + + *ptr = static_cast(values[4] << 2); + *ptr++ |= static_cast(values[5] >> 60); + *ptr++ = static_cast(values[5] >> 52); + *ptr++ = static_cast(values[5] >> 44); + *ptr++ = static_cast(values[5] >> 36); + *ptr++ = static_cast(values[5] >> 28); + *ptr++ = static_cast(values[5] >> 20); + *ptr++ = static_cast(values[5] >> 12); + *ptr++ = static_cast(values[5] >> 4); + + *ptr = static_cast(values[5] << 4); + *ptr++ |= static_cast(values[6] >> 58); + *ptr++ = static_cast(values[6] >> 50); + *ptr++ = static_cast(values[6] >> 42); + *ptr++ = static_cast(values[6] >> 34); + *ptr++ = static_cast(values[6] >> 26); + *ptr++ = static_cast(values[6] >> 18); + *ptr++ = static_cast(values[6] >> 10); + *ptr++ = static_cast(values[6] >> 2); + + *ptr = static_cast(values[6] << 6); + *ptr++ |= static_cast(values[7] >> 56); + *ptr++ = static_cast(values[7] >> 48); + *ptr++ = static_cast(values[7] >> 40); + *ptr++ = static_cast(values[7] >> 32); + *ptr++ = static_cast(values[7] >> 24); + *ptr++ = static_cast(values[7] >> 16); + *ptr++ = static_cast(values[7] >> 8); + *ptr = static_cast(values[7]); +} + +static inline void packBits63(const uint64_t* values, uint8_t* ptr) { + *ptr++ = static_cast(values[0] >> 55); + *ptr++ = static_cast(values[0] >> 47); + *ptr++ = static_cast(values[0] >> 39); + *ptr++ = static_cast(values[0] >> 31); + *ptr++ = static_cast(values[0] >> 23); + *ptr++ = static_cast(values[0] >> 15); + *ptr++ = static_cast(values[0] >> 7); + + *ptr = static_cast(values[0] << 1); + *ptr++ |= static_cast(values[1] >> 62); + *ptr++ = static_cast(values[1] >> 54); + *ptr++ = static_cast(values[1] >> 46); + *ptr++ = static_cast(values[1] >> 38); + *ptr++ = static_cast(values[1] >> 30); + *ptr++ = static_cast(values[1] >> 22); + *ptr++ = static_cast(values[1] >> 14); + *ptr++ = static_cast(values[1] >> 6); + + *ptr = static_cast(values[1] << 2); + *ptr++ |= static_cast(values[2] >> 61); + *ptr++ = static_cast(values[2] >> 53); + *ptr++ = static_cast(values[2] >> 45); + *ptr++ = static_cast(values[2] >> 37); + *ptr++ = static_cast(values[2] >> 29); + *ptr++ = static_cast(values[2] >> 21); + *ptr++ = static_cast(values[2] >> 13); + *ptr++ = static_cast(values[2] >> 5); + + *ptr = static_cast(values[2] << 3); + *ptr++ |= static_cast(values[3] >> 60); + *ptr++ = static_cast(values[3] >> 52); + *ptr++ = static_cast(values[3] >> 44); + *ptr++ = static_cast(values[3] >> 36); + *ptr++ = static_cast(values[3] >> 28); + *ptr++ = static_cast(values[3] >> 20); + *ptr++ = static_cast(values[3] >> 12); + *ptr++ = static_cast(values[3] >> 4); + + *ptr = static_cast(values[3] << 4); + *ptr++ |= static_cast(values[4] >> 59); + *ptr++ = static_cast(values[4] >> 51); + *ptr++ = static_cast(values[4] >> 43); + *ptr++ = static_cast(values[4] >> 35); + *ptr++ = static_cast(values[4] >> 27); + *ptr++ = static_cast(values[4] >> 19); + *ptr++ = static_cast(values[4] >> 11); + *ptr++ = static_cast(values[4] >> 3); + + *ptr = static_cast(values[4] << 5); + *ptr++ |= static_cast(values[5] >> 58); + *ptr++ = static_cast(values[5] >> 50); + *ptr++ = static_cast(values[5] >> 42); + *ptr++ = static_cast(values[5] >> 34); + *ptr++ = static_cast(values[5] >> 26); + *ptr++ = static_cast(values[5] >> 18); + *ptr++ = static_cast(values[5] >> 10); + *ptr++ = static_cast(values[5] >> 2); + + *ptr = static_cast(values[5] << 6); + *ptr++ |= static_cast(values[6] >> 57); + *ptr++ = static_cast(values[6] >> 49); + *ptr++ = static_cast(values[6] >> 41); + *ptr++ = static_cast(values[6] >> 33); + *ptr++ = static_cast(values[6] >> 25); + *ptr++ = static_cast(values[6] >> 17); + *ptr++ = static_cast(values[6] >> 9); + *ptr++ = static_cast(values[6] >> 1); + + *ptr = static_cast(values[6] << 7); + *ptr++ |= static_cast(values[7] >> 56); + *ptr++ = static_cast(values[7] >> 48); + *ptr++ = static_cast(values[7] >> 40); + *ptr++ = static_cast(values[7] >> 32); + *ptr++ = static_cast(values[7] >> 24); + *ptr++ = static_cast(values[7] >> 16); + *ptr++ = static_cast(values[7] >> 8); + *ptr = static_cast(values[7]); +} + +static inline void unpackBits1(uint64_t* values, const uint8_t* ptr) { + values[0] = *ptr >> 7; + values[1] = (*ptr >> 6) & 1; + values[2] = (*ptr >> 5) & 1; + values[3] = (*ptr >> 4) & 1; + values[4] = (*ptr >> 3) & 1; + values[5] = (*ptr >> 2) & 1; + values[6] = (*ptr >> 1) & 1; + values[7] = *ptr & 1; +} + +static inline void unpackBits2(uint64_t* values, const uint8_t* ptr) { + values[0] = *ptr >> 6; + values[1] = (*ptr >> 4) & 3; + values[2] = (*ptr >> 2) & 3; + values[3] = *ptr++ & 3; + values[4] = *ptr >> 6; + values[5] = (*ptr >> 4) & 3; + values[6] = (*ptr >> 2) & 3; + values[7] = *ptr & 3; +} + +static inline void unpackBits3(uint64_t* values, const uint8_t* ptr) { + values[0] = *ptr >> 5; + values[1] = (*ptr >> 2) & 7; + values[2] = (*ptr++ & 3) << 1; + values[2] |= *ptr >> 7; + values[3] = (*ptr >> 4) & 7; + values[4] = (*ptr >> 1) & 7; + values[5] = (*ptr++ & 1) << 2; + values[5] |= *ptr >> 6; + values[6] = (*ptr >> 3) & 7; + values[7] = *ptr & 7; +} + +static inline void unpackBits4(uint64_t* values, const uint8_t* ptr) { + values[0] = *ptr >> 4; + values[1] = *ptr++ & 0xf; + values[2] = *ptr >> 4; + values[3] = *ptr++ & 0xf; + values[4] = *ptr >> 4; + values[5] = *ptr++ & 0xf; + values[6] = *ptr >> 4; + values[7] = *ptr & 0xf; +} + +static inline void unpackBits5(uint64_t* values, const uint8_t* ptr) { + values[0] = *ptr >> 3; + + values[1] = (*ptr++ & 7) << 2; + values[1] |= *ptr >> 6; + + values[2] = (*ptr >> 1) & 0x1f; + + values[3] = (*ptr++ & 1) << 4; + values[3] |= *ptr >> 4; + + values[4] = (*ptr++ & 0xf) << 1; + values[4] |= *ptr >> 7; + + values[5] = (*ptr >> 2) & 0x1f; + + values[6] = (*ptr++ & 3) << 3; + values[6] |= *ptr >> 5; + + values[7] = *ptr & 0x1f; +} + +static inline void unpackBits6(uint64_t* values, const uint8_t* ptr) { + values[0] = *ptr >> 2; + + values[1] = (*ptr++ & 3) << 4; + values[1] |= *ptr >> 4; + + values[2] = (*ptr++ & 0xf) << 2; + values[2] |= *ptr >> 6; + + values[3] = *ptr++ & 0x3f; + + values[4] = *ptr >> 2; + + values[5] = (*ptr++ & 3) << 4; + values[5] |= *ptr >> 4; + + values[6] = (*ptr++ & 0xf) << 2; + values[6] |= *ptr >> 6; + + values[7] = *ptr & 0x3f; +} + +static inline void unpackBits7(uint64_t* values, const uint8_t* ptr) { + values[0] = *ptr >> 1; + + values[1] = (*ptr++ & 1) << 6; + values[1] |= *ptr >> 2; + + values[2] = (*ptr++ & 3) << 5; + values[2] |= *ptr >> 3; + + values[3] = (*ptr++ & 7) << 4; + values[3] |= *ptr >> 4; + + values[4] = (*ptr++ & 0xf) << 3; + values[4] |= *ptr >> 5; + + values[5] = (*ptr++ & 0x1f) << 2; + values[5] |= *ptr >> 6; + + values[6] = (*ptr++ & 0x3f) << 1; + values[6] |= *ptr >> 7; + + values[7] = *ptr & 0x7f; +} + +static inline void unpackBits8(uint64_t* values, const uint8_t* ptr) { + values[0] = *ptr++; + values[1] = *ptr++; + values[2] = *ptr++; + values[3] = *ptr++; + values[4] = *ptr++; + values[5] = *ptr++; + values[6] = *ptr++; + values[7] = *ptr; +} + +static inline void unpackBits9(uint64_t* values, const uint8_t* ptr) { + values[0] = *ptr++ << 1; + values[0] |= *ptr >> 7; + + values[1] = (*ptr++ & 0x7f) << 2; + values[1] |= *ptr >> 6; + + values[2] = (*ptr++ & 0x3f) << 3; + values[2] |= *ptr >> 5; + + values[3] = (*ptr++ & 0x1f) << 4; + values[3] |= *ptr >> 4; + + values[4] = (*ptr++ & 0xf) << 5; + values[4] |= *ptr >> 3; + + values[5] = (*ptr++ & 7) << 6; + values[5] |= *ptr >> 2; + + values[6] = (*ptr++ & 3) << 7; + values[6] |= *ptr >> 1; + + values[7] = (*ptr++ & 1) << 8; + values[7] |= *ptr; +} + +static inline void unpackBits10(uint64_t* values, const uint8_t* ptr) { + values[0] = *ptr++ << 2; + values[0] |= *ptr >> 6; + + values[1] = (*ptr++ & 0x3f) << 4; + values[1] |= *ptr >> 4; + + values[2] = (*ptr++ & 0xf) << 6; + values[2] |= *ptr >> 2; + + values[3] = (*ptr++ & 3) << 8; + values[3] |= *ptr++; + + values[4] = *ptr++ << 2; + values[4] |= *ptr >> 6; + + values[5] = (*ptr++ & 0x3f) << 4; + values[5] |= *ptr >> 4; + + values[6] = (*ptr++ & 0xf) << 6; + values[6] |= *ptr >> 2; + + values[7] = (*ptr++ & 3) << 8; + values[7] |= *ptr; +} + +static inline void unpackBits11(uint64_t* values, const uint8_t* ptr) { + values[0] = *ptr++ << 3; + values[0] |= *ptr >> 5; + + values[1] = (*ptr++ & 0x1f) << 6; + values[1] |= *ptr >> 2; + + values[2] = (*ptr++ & 3) << 9; + values[2] |= *ptr++ << 1; + values[2] |= *ptr >> 7; + + values[3] = (*ptr++ & 0x7f) << 4; + values[3] |= *ptr >> 4; + + values[4] = (*ptr++ & 0xf) << 7; + values[4] |= *ptr >> 1; + + values[5] = (*ptr++ & 1) << 10; + values[5] |= *ptr++ << 2; + values[5] |= *ptr >> 6; + + values[6] = (*ptr++ & 0x3f) << 5; + values[6] |= *ptr >> 3; + + values[7] = (*ptr++ & 7) << 8; + values[7] |= *ptr; +} + +static inline void unpackBits12(uint64_t* values, const uint8_t* ptr) { + values[0] = *ptr++ << 4; + values[0] |= *ptr >> 4; + + values[1] = (*ptr++ & 0xf) << 8; + values[1] |= *ptr++; + + values[2] = *ptr++ << 4; + values[2] |= *ptr >> 4; + + values[3] = (*ptr++ & 0xf) << 8; + values[3] |= *ptr++; + + values[4] = *ptr++ << 4; + values[4] |= *ptr >> 4; + + values[5] = (*ptr++ & 0xf) << 8; + values[5] |= *ptr++; + + values[6] = *ptr++ << 4; + values[6] |= *ptr >> 4; + + values[7] = (*ptr++ & 0xf) << 8; + values[7] |= *ptr; +} + +static inline void unpackBits13(uint64_t* values, const uint8_t* ptr) { + values[0] = *ptr++ << 5; + values[0] |= *ptr >> 3; + + values[1] = (*ptr++ & 7) << 10; + values[1] |= *ptr++ << 2; + values[1] |= *ptr >> 6; + + values[2] = (*ptr++ & 0x3f) << 7; + values[2] |= *ptr >> 1; + + values[3] = (*ptr++ & 1) << 12; + values[3] |= *ptr++ << 4; + values[3] |= *ptr >> 4; + + values[4] = (*ptr++ & 0xf) << 9; + values[4] |= *ptr++ << 1; + values[4] |= *ptr >> 7; + + values[5] = (*ptr++ & 0x7f) << 6; + values[5] |= *ptr >> 2; + + values[6] = (*ptr++ & 3) << 11; + values[6] |= *ptr++ << 3; + values[6] |= *ptr >> 5; + + values[7] = (*ptr++ & 0x1f) << 8; + values[7] |= *ptr; +} + +static inline void unpackBits14(uint64_t* values, const uint8_t* ptr) { + values[0] = *ptr++ << 6; + values[0] |= *ptr >> 2; + + values[1] = (*ptr++ & 3) << 12; + values[1] |= *ptr++ << 4; + values[1] |= *ptr >> 4; + + values[2] = (*ptr++ & 0xf) << 10; + values[2] |= *ptr++ << 2; + values[2] |= *ptr >> 6; + + values[3] = (*ptr++ & 0x3f) << 8; + values[3] |= *ptr++; + + values[4] = *ptr++ << 6; + values[4] |= *ptr >> 2; + + values[5] = (*ptr++ & 3) << 12; + values[5] |= *ptr++ << 4; + values[5] |= *ptr >> 4; + + values[6] = (*ptr++ & 0xf) << 10; + values[6] |= *ptr++ << 2; + values[6] |= *ptr >> 6; + + values[7] = (*ptr++ & 0x3f) << 8; + values[7] |= *ptr; +} + +static inline void unpackBits15(uint64_t* values, const uint8_t* ptr) { + values[0] = *ptr++ << 7; + values[0] |= *ptr >> 1; + + values[1] = (*ptr++ & 1) << 14; + values[1] |= *ptr++ << 6; + values[1] |= *ptr >> 2; + + values[2] = (*ptr++ & 3) << 13; + values[2] |= *ptr++ << 5; + values[2] |= *ptr >> 3; + + values[3] = (*ptr++ & 7) << 12; + values[3] |= *ptr++ << 4; + values[3] |= *ptr >> 4; + + values[4] = (*ptr++ & 0xf) << 11; + values[4] |= *ptr++ << 3; + values[4] |= *ptr >> 5; + + values[5] = (*ptr++ & 0x1f) << 10; + values[5] |= *ptr++ << 2; + values[5] |= *ptr >> 6; + + values[6] = (*ptr++ & 0x3f) << 9; + values[6] |= *ptr++ << 1; + values[6] |= *ptr >> 7; + + values[7] = (*ptr++ & 0x7f) << 8; + values[7] |= *ptr; +} + +static inline void unpackBits16(uint64_t* values, const uint8_t* ptr) { + values[0] = *ptr++ << 8; + values[0] |= *ptr++; + values[1] = *ptr++ << 8; + values[1] |= *ptr++; + values[2] = *ptr++ << 8; + values[2] |= *ptr++; + values[3] = *ptr++ << 8; + values[3] |= *ptr++; + values[4] = *ptr++ << 8; + values[4] |= *ptr++; + values[5] = *ptr++ << 8; + values[5] |= *ptr++; + values[6] = *ptr++ << 8; + values[6] |= *ptr++; + values[7] = *ptr++ << 8; + values[7] |= *ptr; +} + +static inline void unpackBits17(uint64_t* values, const uint8_t* ptr) { + values[0] = *ptr++ << 9; + values[0] |= *ptr++ << 1; + values[0] |= *ptr >> 7; + + values[1] = (*ptr++ & 0x7f) << 10; + values[1] |= *ptr++ << 2; + values[1] |= *ptr >> 6; + + values[2] = (*ptr++ & 0x3f) << 11; + values[2] |= *ptr++ << 3; + values[2] |= *ptr >> 5; + + values[3] = (*ptr++ & 0x1f) << 12; + values[3] |= *ptr++ << 4; + values[3] |= *ptr >> 4; + + values[4] = (*ptr++ & 0xf) << 13; + values[4] |= *ptr++ << 5; + values[4] |= *ptr >> 3; + + values[5] = (*ptr++ & 7) << 14; + values[5] |= *ptr++ << 6; + values[5] |= *ptr >> 2; + + values[6] = (*ptr++ & 3) << 15; + values[6] |= *ptr++ << 7; + values[6] |= *ptr >> 1; + + values[7] = (*ptr++ & 1) << 16; + values[7] |= *ptr++ << 8; + values[7] |= *ptr; +} + +static inline void unpackBits18(uint64_t* values, const uint8_t* ptr) { + values[0] = *ptr++ << 10; + values[0] |= *ptr++ << 2; + values[0] |= *ptr >> 6; + + values[1] = (*ptr++ & 0x3f) << 12; + values[1] |= *ptr++ << 4; + values[1] |= *ptr >> 4; + + values[2] = (*ptr++ & 0xf) << 14; + values[2] |= *ptr++ << 6; + values[2] |= *ptr >> 2; + + values[3] = (*ptr++ & 3) << 16; + values[3] |= *ptr++ << 8; + values[3] |= *ptr++; + + values[4] = *ptr++ << 10; + values[4] |= *ptr++ << 2; + values[4] |= *ptr >> 6; + + values[5] = (*ptr++ & 0x3f) << 12; + values[5] |= *ptr++ << 4; + values[5] |= *ptr >> 4; + + values[6] = (*ptr++ & 0xf) << 14; + values[6] |= *ptr++ << 6; + values[6] |= *ptr >> 2; + + values[7] = (*ptr++ & 3) << 16; + values[7] |= *ptr++ << 8; + values[7] |= *ptr; +} + +static inline void unpackBits19(uint64_t* values, const uint8_t* ptr) { + values[0] = *ptr++ << 11; + values[0] |= *ptr++ << 3; + values[0] |= *ptr >> 5; + + values[1] = (*ptr++ & 0x1f) << 14; + values[1] |= *ptr++ << 6; + values[1] |= *ptr >> 2; + + values[2] = (*ptr++ & 3) << 17; + values[2] |= *ptr++ << 9; + values[2] |= *ptr++ << 1; + values[2] |= *ptr >> 7; + + values[3] = (*ptr++ & 0x7f) << 12; + values[3] |= *ptr++ << 4; + values[3] |= *ptr >> 4; + + values[4] = (*ptr++ & 0xf) << 15; + values[4] |= *ptr++ << 7; + values[4] |= *ptr >> 1; + + values[5] = (*ptr++ & 1) << 18; + values[5] |= *ptr++ << 10; + values[5] |= *ptr++ << 2; + values[5] |= *ptr >> 6; + + values[6] = (*ptr++ & 0x3f) << 13; + values[6] |= *ptr++ << 5; + values[6] |= *ptr >> 3; + + values[7] = (*ptr++ & 7) << 16; + values[7] |= *ptr++ << 8; + values[7] |= *ptr; +} + +static inline void unpackBits20(uint64_t* values, const uint8_t* ptr) { + values[0] = *ptr++ << 12; + values[0] |= *ptr++ << 4; + values[0] |= *ptr >> 4; + + values[1] = (*ptr++ & 0xf) << 16; + values[1] |= *ptr++ << 8; + values[1] |= *ptr++; + + values[2] = *ptr++ << 12; + values[2] |= *ptr++ << 4; + values[2] |= *ptr >> 4; + + values[3] = (*ptr++ & 0xf) << 16; + values[3] |= *ptr++ << 8; + values[3] |= *ptr++; + + values[4] = *ptr++ << 12; + values[4] |= *ptr++ << 4; + values[4] |= *ptr >> 4; + + values[5] = (*ptr++ & 0xf) << 16; + values[5] |= *ptr++ << 8; + values[5] |= *ptr++; + + values[6] = *ptr++ << 12; + values[6] |= *ptr++ << 4; + values[6] |= *ptr >> 4; + + values[7] = (*ptr++ & 0xf) << 16; + values[7] |= *ptr++ << 8; + values[7] |= *ptr; +} + +static inline void unpackBits21(uint64_t* values, const uint8_t* ptr) { + values[0] = *ptr++ << 13; + values[0] |= *ptr++ << 5; + values[0] |= *ptr >> 3; + + values[1] = (*ptr++ & 7) << 18; + values[1] |= *ptr++ << 10; + values[1] |= *ptr++ << 2; + values[1] |= *ptr >> 6; + + values[2] = (*ptr++ & 0x3f) << 15; + values[2] |= *ptr++ << 7; + values[2] |= *ptr >> 1; + + values[3] = (*ptr++ & 1) << 20; + values[3] |= *ptr++ << 12; + values[3] |= *ptr++ << 4; + values[3] |= *ptr >> 4; + + values[4] = (*ptr++ & 0xf) << 17; + values[4] |= *ptr++ << 9; + values[4] |= *ptr++ << 1; + values[4] |= *ptr >> 7; + + values[5] = (*ptr++ & 0x7f) << 14; + values[5] |= *ptr++ << 6; + values[5] |= *ptr >> 2; + + values[6] = (*ptr++ & 3) << 19; + values[6] |= *ptr++ << 11; + values[6] |= *ptr++ << 3; + values[6] |= *ptr >> 5; + + values[7] = (*ptr++ & 0x1f) << 16; + values[7] |= *ptr++ << 8; + values[7] |= *ptr; +} + +static inline void unpackBits22(uint64_t* values, const uint8_t* ptr) { + values[0] = *ptr++ << 14; + values[0] |= *ptr++ << 6; + values[0] |= *ptr >> 2; + + values[1] = (*ptr++ & 3) << 20; + values[1] |= *ptr++ << 12; + values[1] |= *ptr++ << 4; + values[1] |= *ptr >> 4; + + values[2] = (*ptr++ & 0xf) << 18; + values[2] |= *ptr++ << 10; + values[2] |= *ptr++ << 2; + values[2] |= *ptr >> 6; + + values[3] = (*ptr++ & 0x3f) << 16; + values[3] |= *ptr++ << 8; + values[3] |= *ptr++; + + values[4] = *ptr++ << 14; + values[4] |= *ptr++ << 6; + values[4] |= *ptr >> 2; + + values[5] = (*ptr++ & 3) << 20; + values[5] |= *ptr++ << 12; + values[5] |= *ptr++ << 4; + values[5] |= *ptr >> 4; + + values[6] = (*ptr++ & 0xf) << 18; + values[6] |= *ptr++ << 10; + values[6] |= *ptr++ << 2; + values[6] |= *ptr >> 6; + + values[7] = (*ptr++ & 0x3f) << 16; + values[7] |= *ptr++ << 8; + values[7] |= *ptr; +} + +static inline void unpackBits23(uint64_t* values, const uint8_t* ptr) { + values[0] = *ptr++ << 15; + values[0] |= *ptr++ << 7; + values[0] |= *ptr >> 1; + + values[1] = (*ptr++ & 1) << 22; + values[1] |= *ptr++ << 14; + values[1] |= *ptr++ << 6; + values[1] |= *ptr >> 2; + + values[2] = (*ptr++ & 3) << 21; + values[2] |= *ptr++ << 13; + values[2] |= *ptr++ << 5; + values[2] |= *ptr >> 3; + + values[3] = (*ptr++ & 7) << 20; + values[3] |= *ptr++ << 12; + values[3] |= *ptr++ << 4; + values[3] |= *ptr >> 4; + + values[4] = (*ptr++ & 0xf) << 19; + values[4] |= *ptr++ << 11; + values[4] |= *ptr++ << 3; + values[4] |= *ptr >> 5; + + values[5] = (*ptr++ & 0x1f) << 18; + values[5] |= *ptr++ << 10; + values[5] |= *ptr++ << 2; + values[5] |= *ptr >> 6; + + values[6] = (*ptr++ & 0x3f) << 17; + values[6] |= *ptr++ << 9; + values[6] |= *ptr++ << 1; + values[6] |= *ptr >> 7; + + values[7] = (*ptr++ & 0x7f) << 16; + values[7] |= *ptr++ << 8; + values[7] |= *ptr; +} + +static inline void unpackBits24(uint64_t* values, const uint8_t* ptr) { + values[0] = *ptr++ << 16; + values[0] |= *ptr++ << 8; + values[0] |= *ptr++; + values[1] = *ptr++ << 16; + values[1] |= *ptr++ << 8; + values[1] |= *ptr++; + values[2] = *ptr++ << 16; + values[2] |= *ptr++ << 8; + values[2] |= *ptr++; + values[3] = *ptr++ << 16; + values[3] |= *ptr++ << 8; + values[3] |= *ptr++; + values[4] = *ptr++ << 16; + values[4] |= *ptr++ << 8; + values[4] |= *ptr++; + values[5] = *ptr++ << 16; + values[5] |= *ptr++ << 8; + values[5] |= *ptr++; + values[6] = *ptr++ << 16; + values[6] |= *ptr++ << 8; + values[6] |= *ptr++; + values[7] = *ptr++ << 16; + values[7] |= *ptr++ << 8; + values[7] |= *ptr; +} + +static inline void unpackBits25(uint64_t* values, const uint8_t* ptr) { + values[0] = *ptr++ << 17; + values[0] |= *ptr++ << 9; + values[0] |= *ptr++ << 1; + values[0] |= *ptr >> 7; + + values[1] = (*ptr++ & 0x7f) << 18; + values[1] |= *ptr++ << 10; + values[1] |= *ptr++ << 2; + values[1] |= *ptr >> 6; + + values[2] = (*ptr++ & 0x3f) << 19; + values[2] |= *ptr++ << 11; + values[2] |= *ptr++ << 3; + values[2] |= *ptr >> 5; + + values[3] = (*ptr++ & 0x1f) << 20; + values[3] |= *ptr++ << 12; + values[3] |= *ptr++ << 4; + values[3] |= *ptr >> 4; + + values[4] = (*ptr++ & 0xf) << 21; + values[4] |= *ptr++ << 13; + values[4] |= *ptr++ << 5; + values[4] |= *ptr >> 3; + + values[5] = (*ptr++ & 7) << 22; + values[5] |= *ptr++ << 14; + values[5] |= *ptr++ << 6; + values[5] |= *ptr >> 2; + + values[6] = (*ptr++ & 3) << 23; + values[6] |= *ptr++ << 15; + values[6] |= *ptr++ << 7; + values[6] |= *ptr >> 1; + + values[7] = static_cast(*ptr++ & 1) << 24; + values[7] |= *ptr++ << 16; + values[7] |= *ptr++ << 8; + values[7] |= *ptr; +} + +static inline void unpackBits26(uint64_t* values, const uint8_t* ptr) { + values[0] = *ptr++ << 18; + values[0] |= *ptr++ << 10; + values[0] |= *ptr++ << 2; + values[0] |= *ptr >> 6; + + values[1] = (*ptr++ & 0x3f) << 20; + values[1] |= *ptr++ << 12; + values[1] |= *ptr++ << 4; + values[1] |= *ptr >> 4; + + values[2] = (*ptr++ & 0xf) << 22; + values[2] |= *ptr++ << 14; + values[2] |= *ptr++ << 6; + values[2] |= *ptr >> 2; + + values[3] = static_cast(*ptr++ & 3) << 24; + values[3] |= *ptr++ << 16; + values[3] |= *ptr++ << 8; + values[3] |= *ptr++; + + values[4] = *ptr++ << 18; + values[4] |= *ptr++ << 10; + values[4] |= *ptr++ << 2; + values[4] |= *ptr >> 6; + + values[5] = (*ptr++ & 0x3f) << 20; + values[5] |= *ptr++ << 12; + values[5] |= *ptr++ << 4; + values[5] |= *ptr >> 4; + + values[6] = (*ptr++ & 0xf) << 22; + values[6] |= *ptr++ << 14; + values[6] |= *ptr++ << 6; + values[6] |= *ptr >> 2; + + values[7] = static_cast(*ptr++ & 3) << 24; + values[7] |= *ptr++ << 16; + values[7] |= *ptr++ << 8; + values[7] |= *ptr; +} + +static inline void unpackBits27(uint64_t* values, const uint8_t* ptr) { + values[0] = *ptr++ << 19; + values[0] |= *ptr++ << 11; + values[0] |= *ptr++ << 3; + values[0] |= *ptr >> 5; + + values[1] = (*ptr++ & 0x1f) << 22; + values[1] |= *ptr++ << 14; + values[1] |= *ptr++ << 6; + values[1] |= *ptr >> 2; + + values[2] = static_cast(*ptr++ & 3) << 25; + values[2] |= *ptr++ << 17; + values[2] |= *ptr++ << 9; + values[2] |= *ptr++ << 1; + values[2] |= *ptr >> 7; + + values[3] = (*ptr++ & 0x7f) << 20; + values[3] |= *ptr++ << 12; + values[3] |= *ptr++ << 4; + values[3] |= *ptr >> 4; + + values[4] = (*ptr++ & 0xf) << 23; + values[4] |= *ptr++ << 15; + values[4] |= *ptr++ << 7; + values[4] |= *ptr >> 1; + + values[5] = static_cast(*ptr++ & 1) << 26; + values[5] |= *ptr++ << 18; + values[5] |= *ptr++ << 10; + values[5] |= *ptr++ << 2; + values[5] |= *ptr >> 6; + + values[6] = (*ptr++ & 0x3f) << 21; + values[6] |= *ptr++ << 13; + values[6] |= *ptr++ << 5; + values[6] |= *ptr >> 3; + + values[7] = static_cast(*ptr++ & 7) << 24; + values[7] |= *ptr++ << 16; + values[7] |= *ptr++ << 8; + values[7] |= *ptr; +} + +static inline void unpackBits28(uint64_t* values, const uint8_t* ptr) { + values[0] = *ptr++ << 20; + values[0] |= *ptr++ << 12; + values[0] |= *ptr++ << 4; + values[0] |= *ptr >> 4; + + values[1] = static_cast(*ptr++ & 0xf) << 24; + values[1] |= *ptr++ << 16; + values[1] |= *ptr++ << 8; + values[1] |= *ptr++; + + values[2] = *ptr++ << 20; + values[2] |= *ptr++ << 12; + values[2] |= *ptr++ << 4; + values[2] |= *ptr >> 4; + + values[3] = static_cast(*ptr++ & 0xf) << 24; + values[3] |= *ptr++ << 16; + values[3] |= *ptr++ << 8; + values[3] |= *ptr++; + + values[4] = *ptr++ << 20; + values[4] |= *ptr++ << 12; + values[4] |= *ptr++ << 4; + values[4] |= *ptr >> 4; + + values[5] = static_cast(*ptr++ & 0xf) << 24; + values[5] |= *ptr++ << 16; + values[5] |= *ptr++ << 8; + values[5] |= *ptr++; + + values[6] = *ptr++ << 20; + values[6] |= *ptr++ << 12; + values[6] |= *ptr++ << 4; + values[6] |= *ptr >> 4; + + values[7] = static_cast(*ptr++ & 0xf) << 24; + values[7] |= *ptr++ << 16; + values[7] |= *ptr++ << 8; + values[7] |= *ptr; +} + +static inline void unpackBits29(uint64_t* values, const uint8_t* ptr) { + values[0] = *ptr++ << 21; + values[0] |= *ptr++ << 13; + values[0] |= *ptr++ << 5; + values[0] |= *ptr >> 3; + + values[1] = static_cast(*ptr++ & 7) << 26; + values[1] |= *ptr++ << 18; + values[1] |= *ptr++ << 10; + values[1] |= *ptr++ << 2; + values[1] |= *ptr >> 6; + + values[2] = (*ptr++ & 0x3f) << 23; + values[2] |= *ptr++ << 15; + values[2] |= *ptr++ << 7; + values[2] |= *ptr >> 1; + + values[3] = static_cast(*ptr++ & 1) << 28; + values[3] |= *ptr++ << 20; + values[3] |= *ptr++ << 12; + values[3] |= *ptr++ << 4; + values[3] |= *ptr >> 4; + + values[4] = static_cast(*ptr++ & 0xf) << 25; + values[4] |= *ptr++ << 17; + values[4] |= *ptr++ << 9; + values[4] |= *ptr++ << 1; + values[4] |= *ptr >> 7; + + values[5] = (*ptr++ & 0x7f) << 22; + values[5] |= *ptr++ << 14; + values[5] |= *ptr++ << 6; + values[5] |= *ptr >> 2; + + values[6] = static_cast(*ptr++ & 3) << 27; + values[6] |= *ptr++ << 19; + values[6] |= *ptr++ << 11; + values[6] |= *ptr++ << 3; + values[6] |= *ptr >> 5; + + values[7] = static_cast(*ptr++ & 0x1f) << 24; + values[7] |= *ptr++ << 16; + values[7] |= *ptr++ << 8; + values[7] |= *ptr; +} + +static inline void unpackBits30(uint64_t* values, const uint8_t* ptr) { + values[0] = *ptr++ << 22; + values[0] |= *ptr++ << 14; + values[0] |= *ptr++ << 6; + values[0] |= *ptr >> 2; + + values[1] = static_cast(*ptr++ & 3) << 28; + values[1] |= *ptr++ << 20; + values[1] |= *ptr++ << 12; + values[1] |= *ptr++ << 4; + values[1] |= *ptr >> 4; + + values[2] = static_cast(*ptr++ & 0xf) << 26; + values[2] |= *ptr++ << 18; + values[2] |= *ptr++ << 10; + values[2] |= *ptr++ << 2; + values[2] |= *ptr >> 6; + + values[3] = static_cast(*ptr++ & 0x3f) << 24; + values[3] |= *ptr++ << 16; + values[3] |= *ptr++ << 8; + values[3] |= *ptr++; + + values[4] = *ptr++ << 22; + values[4] |= *ptr++ << 14; + values[4] |= *ptr++ << 6; + values[4] |= *ptr >> 2; + + values[5] = static_cast(*ptr++ & 3) << 28; + values[5] |= *ptr++ << 20; + values[5] |= *ptr++ << 12; + values[5] |= *ptr++ << 4; + values[5] |= *ptr >> 4; + + values[6] = static_cast(*ptr++ & 0xf) << 26; + values[6] |= *ptr++ << 18; + values[6] |= *ptr++ << 10; + values[6] |= *ptr++ << 2; + values[6] |= *ptr >> 6; + + values[7] = static_cast(*ptr++ & 0x3f) << 24; + values[7] |= *ptr++ << 16; + values[7] |= *ptr++ << 8; + values[7] |= *ptr; +} + +static inline void unpackBits31(uint64_t* values, const uint8_t* ptr) { + values[0] = *ptr++ << 23; + values[0] |= *ptr++ << 15; + values[0] |= *ptr++ << 7; + values[0] |= *ptr >> 1; + + values[1] = static_cast(*ptr++ & 1) << 30; + values[1] |= *ptr++ << 22; + values[1] |= *ptr++ << 14; + values[1] |= *ptr++ << 6; + values[1] |= *ptr >> 2; + + values[2] = static_cast(*ptr++ & 3) << 29; + values[2] |= *ptr++ << 21; + values[2] |= *ptr++ << 13; + values[2] |= *ptr++ << 5; + values[2] |= *ptr >> 3; + + values[3] = static_cast(*ptr++ & 7) << 28; + values[3] |= *ptr++ << 20; + values[3] |= *ptr++ << 12; + values[3] |= *ptr++ << 4; + values[3] |= *ptr >> 4; + + values[4] = static_cast(*ptr++ & 0xf) << 27; + values[4] |= *ptr++ << 19; + values[4] |= *ptr++ << 11; + values[4] |= *ptr++ << 3; + values[4] |= *ptr >> 5; + + values[5] = static_cast(*ptr++ & 0x1f) << 26; + values[5] |= *ptr++ << 18; + values[5] |= *ptr++ << 10; + values[5] |= *ptr++ << 2; + values[5] |= *ptr >> 6; + + values[6] = static_cast(*ptr++ & 0x3f) << 25; + values[6] |= *ptr++ << 17; + values[6] |= *ptr++ << 9; + values[6] |= *ptr++ << 1; + values[6] |= *ptr >> 7; + + values[7] = static_cast(*ptr++ & 0x7f) << 24; + values[7] |= *ptr++ << 16; + values[7] |= *ptr++ << 8; + values[7] |= *ptr; +} + +static inline void unpackBits32(uint64_t* values, const uint8_t* ptr) { + values[0] = static_cast(*ptr++) << 24; + values[0] |= *ptr++ << 16; + values[0] |= *ptr++ << 8; + values[0] |= *ptr++; + values[1] = static_cast(*ptr++) << 24; + values[1] |= *ptr++ << 16; + values[1] |= *ptr++ << 8; + values[1] |= *ptr++; + values[2] = static_cast(*ptr++) << 24; + values[2] |= *ptr++ << 16; + values[2] |= *ptr++ << 8; + values[2] |= *ptr++; + values[3] = static_cast(*ptr++) << 24; + values[3] |= *ptr++ << 16; + values[3] |= *ptr++ << 8; + values[3] |= *ptr++; + values[4] = static_cast(*ptr++) << 24; + values[4] |= *ptr++ << 16; + values[4] |= *ptr++ << 8; + values[4] |= *ptr++; + values[5] = static_cast(*ptr++) << 24; + values[5] |= *ptr++ << 16; + values[5] |= *ptr++ << 8; + values[5] |= *ptr++; + values[6] = static_cast(*ptr++) << 24; + values[6] |= *ptr++ << 16; + values[6] |= *ptr++ << 8; + values[6] |= *ptr++; + values[7] = static_cast(*ptr++) << 24; + values[7] |= *ptr++ << 16; + values[7] |= *ptr++ << 8; + values[7] |= *ptr; +} + +static inline void unpackBits33(uint64_t* values, const uint8_t* ptr) { + values[0] = static_cast(*ptr++) << 25; + values[0] |= *ptr++ << 17; + values[0] |= *ptr++ << 9; + values[0] |= *ptr++ << 1; + values[0] |= *ptr >> 7; + + values[1] = static_cast(*ptr++ & 0x7f) << 26; + values[1] |= *ptr++ << 18; + values[1] |= *ptr++ << 10; + values[1] |= *ptr++ << 2; + values[1] |= *ptr >> 6; + + values[2] = static_cast(*ptr++ & 0x3f) << 27; + values[2] |= *ptr++ << 19; + values[2] |= *ptr++ << 11; + values[2] |= *ptr++ << 3; + values[2] |= *ptr >> 5; + + values[3] = static_cast(*ptr++ & 0x1f) << 28; + values[3] |= *ptr++ << 20; + values[3] |= *ptr++ << 12; + values[3] |= *ptr++ << 4; + values[3] |= *ptr >> 4; + + values[4] = static_cast(*ptr++ & 0xf) << 29; + values[4] |= *ptr++ << 21; + values[4] |= *ptr++ << 13; + values[4] |= *ptr++ << 5; + values[4] |= *ptr >> 3; + + values[5] = static_cast(*ptr++ & 7) << 30; + values[5] |= *ptr++ << 22; + values[5] |= *ptr++ << 14; + values[5] |= *ptr++ << 6; + values[5] |= *ptr >> 2; + + values[6] = static_cast(*ptr++ & 3) << 31; + values[6] |= *ptr++ << 23; + values[6] |= *ptr++ << 15; + values[6] |= *ptr++ << 7; + values[6] |= *ptr >> 1; + + values[7] = static_cast(*ptr++ & 1) << 32; + values[7] |= static_cast(*ptr++) << 24; + values[7] |= *ptr++ << 16; + values[7] |= *ptr++ << 8; + values[7] |= *ptr; +} + +static inline void unpackBits34(uint64_t* values, const uint8_t* ptr) { + values[0] = static_cast(*ptr++) << 26; + values[0] |= *ptr++ << 18; + values[0] |= *ptr++ << 10; + values[0] |= *ptr++ << 2; + values[0] |= *ptr >> 6; + + values[1] = static_cast(*ptr++ & 0x3f) << 28; + values[1] |= *ptr++ << 20; + values[1] |= *ptr++ << 12; + values[1] |= *ptr++ << 4; + values[1] |= *ptr >> 4; + + values[2] = static_cast(*ptr++ & 0xf) << 30; + values[2] |= *ptr++ << 22; + values[2] |= *ptr++ << 14; + values[2] |= *ptr++ << 6; + values[2] |= *ptr >> 2; + + values[3] = static_cast(*ptr++ & 3) << 32; + values[3] |= static_cast(*ptr++) << 24; + values[3] |= *ptr++ << 16; + values[3] |= *ptr++ << 8; + values[3] |= *ptr++; + + values[4] = static_cast(*ptr++) << 26; + values[4] |= *ptr++ << 18; + values[4] |= *ptr++ << 10; + values[4] |= *ptr++ << 2; + values[4] |= *ptr >> 6; + + values[5] = static_cast(*ptr++ & 0x3f) << 28; + values[5] |= *ptr++ << 20; + values[5] |= *ptr++ << 12; + values[5] |= *ptr++ << 4; + values[5] |= *ptr >> 4; + + values[6] = static_cast(*ptr++ & 0xf) << 30; + values[6] |= *ptr++ << 22; + values[6] |= *ptr++ << 14; + values[6] |= *ptr++ << 6; + values[6] |= *ptr >> 2; + + values[7] = static_cast(*ptr++ & 3) << 32; + values[7] |= static_cast(*ptr++) << 24; + values[7] |= *ptr++ << 16; + values[7] |= *ptr++ << 8; + values[7] |= *ptr++; +} + +static inline void unpackBits35(uint64_t* values, const uint8_t* ptr) { + values[0] = static_cast(*ptr++) << 27; + values[0] |= *ptr++ << 19; + values[0] |= *ptr++ << 11; + values[0] |= *ptr++ << 3; + values[0] |= *ptr >> 5; + + values[1] = static_cast(*ptr++ & 0x1f) << 30; + values[1] |= *ptr++ << 22; + values[1] |= *ptr++ << 14; + values[1] |= *ptr++ << 6; + values[1] |= *ptr >> 2; + + values[2] = static_cast(*ptr++ & 3) << 33; + values[2] |= static_cast(*ptr++) << 25; + values[2] |= *ptr++ << 17; + values[2] |= *ptr++ << 9; + values[2] |= *ptr++ << 1; + values[2] |= *ptr >> 7; + + values[3] = static_cast(*ptr++ & 0x7f) << 28; + values[3] |= *ptr++ << 20; + values[3] |= *ptr++ << 12; + values[3] |= *ptr++ << 4; + values[3] |= *ptr >> 4; + + values[4] = static_cast(*ptr++ & 0xf) << 31; + values[4] |= *ptr++ << 23; + values[4] |= *ptr++ << 15; + values[4] |= *ptr++ << 7; + values[4] |= *ptr >> 1; + + values[5] = static_cast(*ptr++ & 1) << 34; + values[5] |= static_cast(*ptr++) << 26; + values[5] |= *ptr++ << 18; + values[5] |= *ptr++ << 10; + values[5] |= *ptr++ << 2; + values[5] |= *ptr >> 6; + + values[6] = static_cast(*ptr++ & 0x3f) << 29; + values[6] |= *ptr++ << 21; + values[6] |= *ptr++ << 13; + values[6] |= *ptr++ << 5; + values[6] |= *ptr >> 3; + + values[7] = static_cast(*ptr++ & 7) << 32; + values[7] |= static_cast(*ptr++) << 24; + values[7] |= *ptr++ << 16; + values[7] |= *ptr++ << 8; + values[7] |= *ptr; +} + +static inline void unpackBits36(uint64_t* values, const uint8_t* ptr) { + values[0] = static_cast(*ptr++) << 28; + values[0] |= *ptr++ << 20; + values[0] |= *ptr++ << 12; + values[0] |= *ptr++ << 4; + values[0] |= *ptr >> 4; + + values[1] = static_cast(*ptr++ & 0xf) << 32; + values[1] |= static_cast(*ptr++) << 24; + values[1] |= *ptr++ << 16; + values[1] |= *ptr++ << 8; + values[1] |= *ptr++; + + values[2] = static_cast(*ptr++) << 28; + values[2] |= *ptr++ << 20; + values[2] |= *ptr++ << 12; + values[2] |= *ptr++ << 4; + values[2] |= *ptr >> 4; + + values[3] = static_cast(*ptr++ & 0xf) << 32; + values[3] |= static_cast(*ptr++) << 24; + values[3] |= *ptr++ << 16; + values[3] |= *ptr++ << 8; + values[3] |= *ptr++; + + values[4] = static_cast(*ptr++) << 28; + values[4] |= *ptr++ << 20; + values[4] |= *ptr++ << 12; + values[4] |= *ptr++ << 4; + values[4] |= *ptr >> 4; + + values[5] = static_cast(*ptr++ & 0xf) << 32; + values[5] |= static_cast(*ptr++) << 24; + values[5] |= *ptr++ << 16; + values[5] |= *ptr++ << 8; + values[5] |= *ptr++; + + values[6] = static_cast(*ptr++) << 28; + values[6] |= *ptr++ << 20; + values[6] |= *ptr++ << 12; + values[6] |= *ptr++ << 4; + values[6] |= *ptr >> 4; + + values[7] = static_cast(*ptr++ & 0xf) << 32; + values[7] |= static_cast(*ptr++) << 24; + values[7] |= *ptr++ << 16; + values[7] |= *ptr++ << 8; + values[7] |= *ptr; +} + +static inline void unpackBits37(uint64_t* values, const uint8_t* ptr) { + values[0] = static_cast(*ptr++) << 29; + values[0] |= *ptr++ << 21; + values[0] |= *ptr++ << 13; + values[0] |= *ptr++ << 5; + values[0] |= *ptr >> 3; + + values[1] = static_cast(*ptr++ & 7) << 34; + values[1] |= static_cast(*ptr++) << 26; + values[1] |= *ptr++ << 18; + values[1] |= *ptr++ << 10; + values[1] |= *ptr++ << 2; + values[1] |= *ptr >> 6; + + values[2] = static_cast(*ptr++ & 0x3f) << 31; + values[2] |= static_cast(*ptr++) << 23; + values[2] |= *ptr++ << 15; + values[2] |= *ptr++ << 7; + values[2] |= *ptr >> 1; + + values[3] = static_cast(*ptr++ & 1) << 36; + values[3] |= static_cast(*ptr++) << 28; + values[3] |= *ptr++ << 20; + values[3] |= *ptr++ << 12; + values[3] |= *ptr++ << 4; + values[3] |= *ptr >> 4; + + values[4] = static_cast(*ptr++ & 0xf) << 33; + values[4] |= static_cast(*ptr++) << 25; + values[4] |= *ptr++ << 17; + values[4] |= *ptr++ << 9; + values[4] |= *ptr++ << 1; + values[4] |= *ptr >> 7; + + values[5] = static_cast(*ptr++ & 0x7f) << 30; + values[5] |= static_cast(*ptr++) << 22; + values[5] |= *ptr++ << 14; + values[5] |= *ptr++ << 6; + values[5] |= *ptr >> 2; + + values[6] = static_cast(*ptr++ & 3) << 35; + values[6] |= static_cast(*ptr++) << 27; + values[6] |= *ptr++ << 19; + values[6] |= *ptr++ << 11; + values[6] |= *ptr++ << 3; + values[6] |= *ptr >> 5; + + values[7] = static_cast(*ptr++ & 0x1f) << 32; + values[7] |= static_cast(*ptr++) << 24; + values[7] |= *ptr++ << 16; + values[7] |= *ptr++ << 8; + values[7] |= *ptr; +} + +static inline void unpackBits38(uint64_t* values, const uint8_t* ptr) { + values[0] = static_cast(*ptr++) << 30; + values[0] |= *ptr++ << 22; + values[0] |= *ptr++ << 14; + values[0] |= *ptr++ << 6; + values[0] |= *ptr >> 2; + + values[1] = static_cast(*ptr++ & 3) << 36; + values[1] |= static_cast(*ptr++) << 28; + values[1] |= *ptr++ << 20; + values[1] |= *ptr++ << 12; + values[1] |= *ptr++ << 4; + values[1] |= *ptr >> 4; + + values[2] = static_cast(*ptr++ & 0xf) << 34; + values[2] |= static_cast(*ptr++) << 26; + values[2] |= *ptr++ << 18; + values[2] |= *ptr++ << 10; + values[2] |= *ptr++ << 2; + values[2] |= *ptr >> 6; + + values[3] = static_cast(*ptr++ & 0x3f) << 32; + values[3] |= static_cast(*ptr++) << 24; + values[3] |= *ptr++ << 16; + values[3] |= *ptr++ << 8; + values[3] |= *ptr++; + + values[4] = static_cast(*ptr++) << 30; + values[4] |= *ptr++ << 22; + values[4] |= *ptr++ << 14; + values[4] |= *ptr++ << 6; + values[4] |= *ptr >> 2; + + values[5] = static_cast(*ptr++ & 3) << 36; + values[5] |= static_cast(*ptr++) << 28; + values[5] |= *ptr++ << 20; + values[5] |= *ptr++ << 12; + values[5] |= *ptr++ << 4; + values[5] |= *ptr >> 4; + + values[6] = static_cast(*ptr++ & 0xf) << 34; + values[6] |= static_cast(*ptr++) << 26; + values[6] |= *ptr++ << 18; + values[6] |= *ptr++ << 10; + values[6] |= *ptr++ << 2; + values[6] |= *ptr >> 6; + + values[7] = static_cast(*ptr++ & 0x3f) << 32; + values[7] |= static_cast(*ptr++) << 24; + values[7] |= *ptr++ << 16; + values[7] |= *ptr++ << 8; + values[7] |= *ptr; +} + +static inline void unpackBits39(uint64_t* values, const uint8_t* ptr) { + values[0] = static_cast(*ptr++) << 31; + values[0] |= *ptr++ << 23; + values[0] |= *ptr++ << 15; + values[0] |= *ptr++ << 7; + values[0] |= *ptr >> 1; + + values[1] = static_cast(*ptr++ & 1) << 38; + values[1] |= static_cast(*ptr++) << 30; + values[1] |= *ptr++ << 22; + values[1] |= *ptr++ << 14; + values[1] |= *ptr++ << 6; + values[1] |= *ptr >> 2; + + values[2] = static_cast(*ptr++ & 3) << 37; + values[2] |= static_cast(*ptr++) << 29; + values[2] |= *ptr++ << 21; + values[2] |= *ptr++ << 13; + values[2] |= *ptr++ << 5; + values[2] |= *ptr >> 3; + + values[3] = static_cast(*ptr++ & 7) << 36; + values[3] |= static_cast(*ptr++) << 28; + values[3] |= *ptr++ << 20; + values[3] |= *ptr++ << 12; + values[3] |= *ptr++ << 4; + values[3] |= *ptr >> 4; + + values[4] = static_cast(*ptr++ & 0xf) << 35; + values[4] |= static_cast(*ptr++) << 27; + values[4] |= *ptr++ << 19; + values[4] |= *ptr++ << 11; + values[4] |= *ptr++ << 3; + values[4] |= *ptr >> 5; + + values[5] = static_cast(*ptr++ & 0x1f) << 34; + values[5] |= static_cast(*ptr++) << 26; + values[5] |= *ptr++ << 18; + values[5] |= *ptr++ << 10; + values[5] |= *ptr++ << 2; + values[5] |= *ptr >> 6; + + values[6] = static_cast(*ptr++ & 0x3f) << 33; + values[6] |= static_cast(*ptr++) << 25; + values[6] |= *ptr++ << 17; + values[6] |= *ptr++ << 9; + values[6] |= *ptr++ << 1; + values[6] |= *ptr >> 7; + + values[7] = static_cast(*ptr++ & 0x7f) << 32; + values[7] |= static_cast(*ptr++) << 24; + values[7] |= *ptr++ << 16; + values[7] |= *ptr++ << 8; + values[7] |= *ptr; +} + +static inline void unpackBits40(uint64_t* values, const uint8_t* ptr) { + values[0] = static_cast(*ptr++) << 32; + values[0] |= static_cast(*ptr++) << 24; + values[0] |= *ptr++ << 16; + values[0] |= *ptr++ << 8; + values[0] |= *ptr++; + values[1] = static_cast(*ptr++) << 32; + values[1] |= static_cast(*ptr++) << 24; + values[1] |= *ptr++ << 16; + values[1] |= *ptr++ << 8; + values[1] |= *ptr++; + values[2] = static_cast(*ptr++) << 32; + values[2] |= static_cast(*ptr++) << 24; + values[2] |= *ptr++ << 16; + values[2] |= *ptr++ << 8; + values[2] |= *ptr++; + values[3] = static_cast(*ptr++) << 32; + values[3] |= static_cast(*ptr++) << 24; + values[3] |= *ptr++ << 16; + values[3] |= *ptr++ << 8; + values[3] |= *ptr++; + values[4] = static_cast(*ptr++) << 32; + values[4] |= static_cast(*ptr++) << 24; + values[4] |= *ptr++ << 16; + values[4] |= *ptr++ << 8; + values[4] |= *ptr++; + values[5] = static_cast(*ptr++) << 32; + values[5] |= static_cast(*ptr++) << 24; + values[5] |= *ptr++ << 16; + values[5] |= *ptr++ << 8; + values[5] |= *ptr++; + values[6] = static_cast(*ptr++) << 32; + values[6] |= static_cast(*ptr++) << 24; + values[6] |= *ptr++ << 16; + values[6] |= *ptr++ << 8; + values[6] |= *ptr++; + values[7] = static_cast(*ptr++) << 32; + values[7] |= static_cast(*ptr++) << 24; + values[7] |= *ptr++ << 16; + values[7] |= *ptr++ << 8; + values[7] |= *ptr; +} + +static inline void unpackBits41(uint64_t* values, const uint8_t* ptr) { + values[0] = static_cast(*ptr++) << 33; + values[0] |= static_cast(*ptr++) << 25; + values[0] |= *ptr++ << 17; + values[0] |= *ptr++ << 9; + values[0] |= *ptr++ << 1; + values[0] |= *ptr >> 7; + + values[1] = static_cast(*ptr++ & 0x7f) << 34; + values[1] |= static_cast(*ptr++) << 26; + values[1] |= *ptr++ << 18; + values[1] |= *ptr++ << 10; + values[1] |= *ptr++ << 2; + values[1] |= *ptr >> 6; + + values[2] = static_cast(*ptr++ & 0x3f) << 35; + values[2] |= static_cast(*ptr++) << 27; + values[2] |= *ptr++ << 19; + values[2] |= *ptr++ << 11; + values[2] |= *ptr++ << 3; + values[2] |= *ptr >> 5; + + values[3] = static_cast(*ptr++ & 0x1f) << 36; + values[3] |= static_cast(*ptr++) << 28; + values[3] |= *ptr++ << 20; + values[3] |= *ptr++ << 12; + values[3] |= *ptr++ << 4; + values[3] |= *ptr >> 4; + + values[4] = static_cast(*ptr++ & 0xf) << 37; + values[4] |= static_cast(*ptr++) << 29; + values[4] |= *ptr++ << 21; + values[4] |= *ptr++ << 13; + values[4] |= *ptr++ << 5; + values[4] |= *ptr >> 3; + + values[5] = static_cast(*ptr++ & 7) << 38; + values[5] |= static_cast(*ptr++) << 30; + values[5] |= *ptr++ << 22; + values[5] |= *ptr++ << 14; + values[5] |= *ptr++ << 6; + values[5] |= *ptr >> 2; + + values[6] = static_cast(*ptr++ & 3) << 39; + values[6] |= static_cast(*ptr++) << 31; + values[6] |= *ptr++ << 23; + values[6] |= *ptr++ << 15; + values[6] |= *ptr++ << 7; + values[6] |= *ptr >> 1; + + values[7] = static_cast(*ptr++ & 1) << 40; + values[7] |= static_cast(*ptr++) << 32; + values[7] |= static_cast(*ptr++) << 24; + values[7] |= *ptr++ << 16; + values[7] |= *ptr++ << 8; + values[7] |= *ptr; +} + +static inline void unpackBits42(uint64_t* values, const uint8_t* ptr) { + values[0] = static_cast(*ptr++) << 34; + values[0] |= static_cast(*ptr++) << 26; + values[0] |= *ptr++ << 18; + values[0] |= *ptr++ << 10; + values[0] |= *ptr++ << 2; + values[0] |= *ptr >> 6; + + values[1] = static_cast(*ptr++ & 0x3f) << 36; + values[1] |= static_cast(*ptr++) << 28; + values[1] |= *ptr++ << 20; + values[1] |= *ptr++ << 12; + values[1] |= *ptr++ << 4; + values[1] |= *ptr >> 4; + + values[2] = static_cast(*ptr++ & 0xf) << 38; + values[2] |= static_cast(*ptr++) << 30; + values[2] |= *ptr++ << 22; + values[2] |= *ptr++ << 14; + values[2] |= *ptr++ << 6; + values[2] |= *ptr >> 2; + + values[3] = static_cast(*ptr++ & 3) << 40; + values[3] |= static_cast(*ptr++) << 32; + values[3] |= static_cast(*ptr++) << 24; + values[3] |= *ptr++ << 16; + values[3] |= *ptr++ << 8; + values[3] |= *ptr++; + + values[4] = static_cast(*ptr++) << 34; + values[4] |= static_cast(*ptr++) << 26; + values[4] |= *ptr++ << 18; + values[4] |= *ptr++ << 10; + values[4] |= *ptr++ << 2; + values[4] |= *ptr >> 6; + + values[5] = static_cast(*ptr++ & 0x3f) << 36; + values[5] |= static_cast(*ptr++) << 28; + values[5] |= *ptr++ << 20; + values[5] |= *ptr++ << 12; + values[5] |= *ptr++ << 4; + values[5] |= *ptr >> 4; + + values[6] = static_cast(*ptr++ & 0xf) << 38; + values[6] |= static_cast(*ptr++) << 30; + values[6] |= *ptr++ << 22; + values[6] |= *ptr++ << 14; + values[6] |= *ptr++ << 6; + values[6] |= *ptr >> 2; + + values[7] = static_cast(*ptr++ & 3) << 40; + values[7] |= static_cast(*ptr++) << 32; + values[7] |= static_cast(*ptr++) << 24; + values[7] |= *ptr++ << 16; + values[7] |= *ptr++ << 8; + values[7] |= *ptr; +} + +static inline void unpackBits43(uint64_t* values, const uint8_t* ptr) { + values[0] = static_cast(*ptr++) << 35; + values[0] |= static_cast(*ptr++) << 27; + values[0] |= *ptr++ << 19; + values[0] |= *ptr++ << 11; + values[0] |= *ptr++ << 3; + values[0] |= *ptr >> 5; + + values[1] = static_cast(*ptr++ & 0x1f) << 38; + values[1] |= static_cast(*ptr++) << 30; + values[1] |= *ptr++ << 22; + values[1] |= *ptr++ << 14; + values[1] |= *ptr++ << 6; + values[1] |= *ptr >> 2; + + values[2] = static_cast(*ptr++ & 3) << 41; + values[2] |= static_cast(*ptr++) << 33; + values[2] |= static_cast(*ptr++) << 25; + values[2] |= *ptr++ << 17; + values[2] |= *ptr++ << 9; + values[2] |= *ptr++ << 1; + values[2] |= *ptr >> 7; + + values[3] = static_cast(*ptr++ & 0x7f) << 36; + values[3] |= static_cast(*ptr++) << 28; + values[3] |= *ptr++ << 20; + values[3] |= *ptr++ << 12; + values[3] |= *ptr++ << 4; + values[3] |= *ptr >> 4; + + values[4] = static_cast(*ptr++ & 0xf) << 39; + values[4] |= static_cast(*ptr++) << 31; + values[4] |= *ptr++ << 23; + values[4] |= *ptr++ << 15; + values[4] |= *ptr++ << 7; + values[4] |= *ptr >> 1; + + values[5] = static_cast(*ptr++ & 1) << 42; + values[5] |= static_cast(*ptr++) << 34; + values[5] |= static_cast(*ptr++) << 26; + values[5] |= *ptr++ << 18; + values[5] |= *ptr++ << 10; + values[5] |= *ptr++ << 2; + values[5] |= *ptr >> 6; + + values[6] = static_cast(*ptr++ & 0x3f) << 37; + values[6] |= static_cast(*ptr++) << 29; + values[6] |= *ptr++ << 21; + values[6] |= *ptr++ << 13; + values[6] |= *ptr++ << 5; + values[6] |= *ptr >> 3; + + values[7] = static_cast(*ptr++ & 7) << 40; + values[7] |= static_cast(*ptr++) << 32; + values[7] |= static_cast(*ptr++) << 24; + values[7] |= *ptr++ << 16; + values[7] |= *ptr++ << 8; + values[7] |= *ptr; +} + +static inline void unpackBits44(uint64_t* values, const uint8_t* ptr) { + values[0] = static_cast(*ptr++) << 36; + values[0] |= static_cast(*ptr++) << 28; + values[0] |= *ptr++ << 20; + values[0] |= *ptr++ << 12; + values[0] |= *ptr++ << 4; + values[0] |= *ptr >> 4; + + values[1] = static_cast(*ptr++ & 0xf) << 40; + values[1] |= static_cast(*ptr++) << 32; + values[1] |= static_cast(*ptr++) << 24; + values[1] |= *ptr++ << 16; + values[1] |= *ptr++ << 8; + values[1] |= *ptr++; + + values[2] = static_cast(*ptr++) << 36; + values[2] |= static_cast(*ptr++) << 28; + values[2] |= *ptr++ << 20; + values[2] |= *ptr++ << 12; + values[2] |= *ptr++ << 4; + values[2] |= *ptr >> 4; + + values[3] = static_cast(*ptr++ & 0xf) << 40; + values[3] |= static_cast(*ptr++) << 32; + values[3] |= static_cast(*ptr++) << 24; + values[3] |= *ptr++ << 16; + values[3] |= *ptr++ << 8; + values[3] |= *ptr++; + + values[4] = static_cast(*ptr++) << 36; + values[4] |= static_cast(*ptr++) << 28; + values[4] |= *ptr++ << 20; + values[4] |= *ptr++ << 12; + values[4] |= *ptr++ << 4; + values[4] |= *ptr >> 4; + + values[5] = static_cast(*ptr++ & 0xf) << 40; + values[5] |= static_cast(*ptr++) << 32; + values[5] |= static_cast(*ptr++) << 24; + values[5] |= *ptr++ << 16; + values[5] |= *ptr++ << 8; + values[5] |= *ptr++; + + values[6] = static_cast(*ptr++) << 36; + values[6] |= static_cast(*ptr++) << 28; + values[6] |= *ptr++ << 20; + values[6] |= *ptr++ << 12; + values[6] |= *ptr++ << 4; + values[6] |= *ptr >> 4; + + values[7] = static_cast(*ptr++ & 0xf) << 40; + values[7] |= static_cast(*ptr++) << 32; + values[7] |= static_cast(*ptr++) << 24; + values[7] |= *ptr++ << 16; + values[7] |= *ptr++ << 8; + values[7] |= *ptr; +} + +static inline void unpackBits45(uint64_t* values, const uint8_t* ptr) { + values[0] = static_cast(*ptr++) << 37; + values[0] |= static_cast(*ptr++) << 29; + values[0] |= *ptr++ << 21; + values[0] |= *ptr++ << 13; + values[0] |= *ptr++ << 5; + values[0] |= *ptr >> 3; + + values[1] = static_cast(*ptr++ & 7) << 42; + values[1] |= static_cast(*ptr++) << 34; + values[1] |= static_cast(*ptr++) << 26; + values[1] |= *ptr++ << 18; + values[1] |= *ptr++ << 10; + values[1] |= *ptr++ << 2; + values[1] |= *ptr >> 6; + + values[2] = static_cast(*ptr++ & 0x3f) << 39; + values[2] |= static_cast(*ptr++) << 31; + values[2] |= static_cast(*ptr++) << 23; + values[2] |= *ptr++ << 15; + values[2] |= *ptr++ << 7; + values[2] |= *ptr >> 1; + + values[3] = static_cast(*ptr++ & 1) << 44; + values[3] |= static_cast(*ptr++) << 36; + values[3] |= static_cast(*ptr++) << 28; + values[3] |= *ptr++ << 20; + values[3] |= *ptr++ << 12; + values[3] |= *ptr++ << 4; + values[3] |= *ptr >> 4; + + values[4] = static_cast(*ptr++ & 0xf) << 41; + values[4] |= static_cast(*ptr++) << 33; + values[4] |= static_cast(*ptr++) << 25; + values[4] |= *ptr++ << 17; + values[4] |= *ptr++ << 9; + values[4] |= *ptr++ << 1; + values[4] |= *ptr >> 7; + + values[5] = static_cast(*ptr++ & 0x7f) << 38; + values[5] |= static_cast(*ptr++) << 30; + values[5] |= static_cast(*ptr++) << 22; + values[5] |= *ptr++ << 14; + values[5] |= *ptr++ << 6; + values[5] |= *ptr >> 2; + + values[6] = static_cast(*ptr++ & 3) << 43; + values[6] |= static_cast(*ptr++) << 35; + values[6] |= static_cast(*ptr++) << 27; + values[6] |= *ptr++ << 19; + values[6] |= *ptr++ << 11; + values[6] |= *ptr++ << 3; + values[6] |= *ptr >> 5; + + values[7] = static_cast(*ptr++ & 0x1f) << 40; + values[7] |= static_cast(*ptr++) << 32; + values[7] |= static_cast(*ptr++) << 24; + values[7] |= *ptr++ << 16; + values[7] |= *ptr++ << 8; + values[7] |= *ptr; +} + +static inline void unpackBits46(uint64_t* values, const uint8_t* ptr) { + values[0] = static_cast(*ptr++) << 38; + values[0] |= static_cast(*ptr++) << 30; + values[0] |= *ptr++ << 22; + values[0] |= *ptr++ << 14; + values[0] |= *ptr++ << 6; + values[0] |= *ptr >> 2; + + values[1] = static_cast(*ptr++ & 3) << 44; + values[1] |= static_cast(*ptr++) << 36; + values[1] |= static_cast(*ptr++) << 28; + values[1] |= *ptr++ << 20; + values[1] |= *ptr++ << 12; + values[1] |= *ptr++ << 4; + values[1] |= *ptr >> 4; + + values[2] = static_cast(*ptr++ & 0xf) << 42; + values[2] |= static_cast(*ptr++) << 34; + values[2] |= static_cast(*ptr++) << 26; + values[2] |= *ptr++ << 18; + values[2] |= *ptr++ << 10; + values[2] |= *ptr++ << 2; + values[2] |= *ptr >> 6; + + values[3] = static_cast(*ptr++ & 0x3f) << 40; + values[3] |= static_cast(*ptr++) << 32; + values[3] |= static_cast(*ptr++) << 24; + values[3] |= *ptr++ << 16; + values[3] |= *ptr++ << 8; + values[3] |= *ptr++; + + values[4] = static_cast(*ptr++) << 38; + values[4] |= static_cast(*ptr++) << 30; + values[4] |= *ptr++ << 22; + values[4] |= *ptr++ << 14; + values[4] |= *ptr++ << 6; + values[4] |= *ptr >> 2; + + values[5] = static_cast(*ptr++ & 3) << 44; + values[5] |= static_cast(*ptr++) << 36; + values[5] |= static_cast(*ptr++) << 28; + values[5] |= *ptr++ << 20; + values[5] |= *ptr++ << 12; + values[5] |= *ptr++ << 4; + values[5] |= *ptr >> 4; + + values[6] = static_cast(*ptr++ & 0xf) << 42; + values[6] |= static_cast(*ptr++) << 34; + values[6] |= static_cast(*ptr++) << 26; + values[6] |= *ptr++ << 18; + values[6] |= *ptr++ << 10; + values[6] |= *ptr++ << 2; + values[6] |= *ptr >> 6; + + values[7] = static_cast(*ptr++ & 0x3f) << 40; + values[7] |= static_cast(*ptr++) << 32; + values[7] |= static_cast(*ptr++) << 24; + values[7] |= *ptr++ << 16; + values[7] |= *ptr++ << 8; + values[7] |= *ptr; +} + +static inline void unpackBits47(uint64_t* values, const uint8_t* ptr) { + values[0] = static_cast(*ptr++) << 39; + values[0] |= static_cast(*ptr++) << 31; + values[0] |= *ptr++ << 23; + values[0] |= *ptr++ << 15; + values[0] |= *ptr++ << 7; + values[0] |= *ptr >> 1; + + values[1] = static_cast(*ptr++ & 1) << 46; + values[1] |= static_cast(*ptr++) << 38; + values[1] |= static_cast(*ptr++) << 30; + values[1] |= *ptr++ << 22; + values[1] |= *ptr++ << 14; + values[1] |= *ptr++ << 6; + values[1] |= *ptr >> 2; + + values[2] = static_cast(*ptr++ & 3) << 45; + values[2] |= static_cast(*ptr++) << 37; + values[2] |= static_cast(*ptr++) << 29; + values[2] |= *ptr++ << 21; + values[2] |= *ptr++ << 13; + values[2] |= *ptr++ << 5; + values[2] |= *ptr >> 3; + + values[3] = static_cast(*ptr++ & 7) << 44; + values[3] |= static_cast(*ptr++) << 36; + values[3] |= static_cast(*ptr++) << 28; + values[3] |= *ptr++ << 20; + values[3] |= *ptr++ << 12; + values[3] |= *ptr++ << 4; + values[3] |= *ptr >> 4; + + values[4] = static_cast(*ptr++ & 0xf) << 43; + values[4] |= static_cast(*ptr++) << 35; + values[4] |= static_cast(*ptr++) << 27; + values[4] |= *ptr++ << 19; + values[4] |= *ptr++ << 11; + values[4] |= *ptr++ << 3; + values[4] |= *ptr >> 5; + + values[5] = static_cast(*ptr++ & 0x1f) << 42; + values[5] |= static_cast(*ptr++) << 34; + values[5] |= static_cast(*ptr++) << 26; + values[5] |= *ptr++ << 18; + values[5] |= *ptr++ << 10; + values[5] |= *ptr++ << 2; + values[5] |= *ptr >> 6; + + values[6] = static_cast(*ptr++ & 0x3f) << 41; + values[6] |= static_cast(*ptr++) << 33; + values[6] |= static_cast(*ptr++) << 25; + values[6] |= *ptr++ << 17; + values[6] |= *ptr++ << 9; + values[6] |= *ptr++ << 1; + values[6] |= *ptr >> 7; + + values[7] = static_cast(*ptr++ & 0x7f) << 40; + values[7] |= static_cast(*ptr++) << 32; + values[7] |= static_cast(*ptr++) << 24; + values[7] |= *ptr++ << 16; + values[7] |= *ptr++ << 8; + values[7] |= *ptr; +} + +static inline void unpackBits48(uint64_t* values, const uint8_t* ptr) { + values[0] = static_cast(*ptr++) << 40; + values[0] |= static_cast(*ptr++) << 32; + values[0] |= static_cast(*ptr++) << 24; + values[0] |= *ptr++ << 16; + values[0] |= *ptr++ << 8; + values[0] |= *ptr++; + values[1] = static_cast(*ptr++) << 40; + values[1] |= static_cast(*ptr++) << 32; + values[1] |= static_cast(*ptr++) << 24; + values[1] |= *ptr++ << 16; + values[1] |= *ptr++ << 8; + values[1] |= *ptr++; + values[2] = static_cast(*ptr++) << 40; + values[2] |= static_cast(*ptr++) << 32; + values[2] |= static_cast(*ptr++) << 24; + values[2] |= *ptr++ << 16; + values[2] |= *ptr++ << 8; + values[2] |= *ptr++; + values[3] = static_cast(*ptr++) << 40; + values[3] |= static_cast(*ptr++) << 32; + values[3] |= static_cast(*ptr++) << 24; + values[3] |= *ptr++ << 16; + values[3] |= *ptr++ << 8; + values[3] |= *ptr++; + values[4] = static_cast(*ptr++) << 40; + values[4] |= static_cast(*ptr++) << 32; + values[4] |= static_cast(*ptr++) << 24; + values[4] |= *ptr++ << 16; + values[4] |= *ptr++ << 8; + values[4] |= *ptr++; + values[5] = static_cast(*ptr++) << 40; + values[5] |= static_cast(*ptr++) << 32; + values[5] |= static_cast(*ptr++) << 24; + values[5] |= *ptr++ << 16; + values[5] |= *ptr++ << 8; + values[5] |= *ptr++; + values[6] = static_cast(*ptr++) << 40; + values[6] |= static_cast(*ptr++) << 32; + values[6] |= static_cast(*ptr++) << 24; + values[6] |= *ptr++ << 16; + values[6] |= *ptr++ << 8; + values[6] |= *ptr++; + values[7] = static_cast(*ptr++) << 40; + values[7] |= static_cast(*ptr++) << 32; + values[7] |= static_cast(*ptr++) << 24; + values[7] |= *ptr++ << 16; + values[7] |= *ptr++ << 8; + values[7] |= *ptr; +} + +static inline void unpackBits49(uint64_t* values, const uint8_t* ptr) { + values[0] = static_cast(*ptr++) << 41; + values[0] |= static_cast(*ptr++) << 33; + values[0] |= static_cast(*ptr++) << 25; + values[0] |= *ptr++ << 17; + values[0] |= *ptr++ << 9; + values[0] |= *ptr++ << 1; + values[0] |= *ptr >> 7; + + values[1] = static_cast(*ptr++ & 0x7f) << 42; + values[1] |= static_cast(*ptr++) << 34; + values[1] |= static_cast(*ptr++) << 26; + values[1] |= *ptr++ << 18; + values[1] |= *ptr++ << 10; + values[1] |= *ptr++ << 2; + values[1] |= *ptr >> 6; + + values[2] = static_cast(*ptr++ & 0x3f) << 43; + values[2] |= static_cast(*ptr++) << 35; + values[2] |= static_cast(*ptr++) << 27; + values[2] |= *ptr++ << 19; + values[2] |= *ptr++ << 11; + values[2] |= *ptr++ << 3; + values[2] |= *ptr >> 5; + + values[3] = static_cast(*ptr++ & 0x1f) << 44; + values[3] |= static_cast(*ptr++) << 36; + values[3] |= static_cast(*ptr++) << 28; + values[3] |= *ptr++ << 20; + values[3] |= *ptr++ << 12; + values[3] |= *ptr++ << 4; + values[3] |= *ptr >> 4; + + values[4] = static_cast(*ptr++ & 0xf) << 45; + values[4] |= static_cast(*ptr++) << 37; + values[4] |= static_cast(*ptr++) << 29; + values[4] |= *ptr++ << 21; + values[4] |= *ptr++ << 13; + values[4] |= *ptr++ << 5; + values[4] |= *ptr >> 3; + + values[5] = static_cast(*ptr++ & 7) << 46; + values[5] |= static_cast(*ptr++) << 38; + values[5] |= static_cast(*ptr++) << 30; + values[5] |= *ptr++ << 22; + values[5] |= *ptr++ << 14; + values[5] |= *ptr++ << 6; + values[5] |= *ptr >> 2; + + values[6] = static_cast(*ptr++ & 3) << 47; + values[6] |= static_cast(*ptr++) << 39; + values[6] |= static_cast(*ptr++) << 31; + values[6] |= *ptr++ << 23; + values[6] |= *ptr++ << 15; + values[6] |= *ptr++ << 7; + values[6] |= *ptr >> 1; + + values[7] = static_cast(*ptr++ & 1) << 48; + values[7] |= static_cast(*ptr++) << 40; + values[7] |= static_cast(*ptr++) << 32; + values[7] |= static_cast(*ptr++) << 24; + values[7] |= *ptr++ << 16; + values[7] |= *ptr++ << 8; + values[7] |= *ptr; +} + +static inline void unpackBits50(uint64_t* values, const uint8_t* ptr) { + values[0] = static_cast(*ptr++) << 42; + values[0] |= static_cast(*ptr++) << 34; + values[0] |= static_cast(*ptr++) << 26; + values[0] |= *ptr++ << 18; + values[0] |= *ptr++ << 10; + values[0] |= *ptr++ << 2; + values[0] |= *ptr >> 6; + + values[1] = static_cast(*ptr++ & 0x3f) << 44; + values[1] |= static_cast(*ptr++) << 36; + values[1] |= static_cast(*ptr++) << 28; + values[1] |= *ptr++ << 20; + values[1] |= *ptr++ << 12; + values[1] |= *ptr++ << 4; + values[1] |= *ptr >> 4; + + values[2] = static_cast(*ptr++ & 0xf) << 46; + values[2] |= static_cast(*ptr++) << 38; + values[2] |= static_cast(*ptr++) << 30; + values[2] |= *ptr++ << 22; + values[2] |= *ptr++ << 14; + values[2] |= *ptr++ << 6; + values[2] |= *ptr >> 2; + + values[3] = static_cast(*ptr++ & 3) << 48; + values[3] |= static_cast(*ptr++) << 40; + values[3] |= static_cast(*ptr++) << 32; + values[3] |= static_cast(*ptr++) << 24; + values[3] |= *ptr++ << 16; + values[3] |= *ptr++ << 8; + values[3] |= *ptr++; + + values[4] = static_cast(*ptr++) << 42; + values[4] |= static_cast(*ptr++) << 34; + values[4] |= static_cast(*ptr++) << 26; + values[4] |= *ptr++ << 18; + values[4] |= *ptr++ << 10; + values[4] |= *ptr++ << 2; + values[4] |= *ptr >> 6; + + values[5] = static_cast(*ptr++ & 0x3f) << 44; + values[5] |= static_cast(*ptr++) << 36; + values[5] |= static_cast(*ptr++) << 28; + values[5] |= *ptr++ << 20; + values[5] |= *ptr++ << 12; + values[5] |= *ptr++ << 4; + values[5] |= *ptr >> 4; + + values[6] = static_cast(*ptr++ & 0xf) << 46; + values[6] |= static_cast(*ptr++) << 38; + values[6] |= static_cast(*ptr++) << 30; + values[6] |= *ptr++ << 22; + values[6] |= *ptr++ << 14; + values[6] |= *ptr++ << 6; + values[6] |= *ptr >> 2; + + values[7] = static_cast(*ptr++ & 3) << 48; + values[7] |= static_cast(*ptr++) << 40; + values[7] |= static_cast(*ptr++) << 32; + values[7] |= static_cast(*ptr++) << 24; + values[7] |= *ptr++ << 16; + values[7] |= *ptr++ << 8; + values[7] |= *ptr; +} + +static inline void unpackBits51(uint64_t* values, const uint8_t* ptr) { + values[0] = static_cast(*ptr++) << 43; + values[0] |= static_cast(*ptr++) << 35; + values[0] |= static_cast(*ptr++) << 27; + values[0] |= *ptr++ << 19; + values[0] |= *ptr++ << 11; + values[0] |= *ptr++ << 3; + values[0] |= *ptr >> 5; + + values[1] = static_cast(*ptr++ & 0x1f) << 46; + values[1] |= static_cast(*ptr++) << 38; + values[1] |= static_cast(*ptr++) << 30; + values[1] |= *ptr++ << 22; + values[1] |= *ptr++ << 14; + values[1] |= *ptr++ << 6; + values[1] |= *ptr >> 2; + + values[2] = static_cast(*ptr++ & 3) << 49; + values[2] |= static_cast(*ptr++) << 41; + values[2] |= static_cast(*ptr++) << 33; + values[2] |= static_cast(*ptr++) << 25; + values[2] |= *ptr++ << 17; + values[2] |= *ptr++ << 9; + values[2] |= *ptr++ << 1; + values[2] |= *ptr >> 7; + + values[3] = static_cast(*ptr++ & 0x7f) << 44; + values[3] |= static_cast(*ptr++) << 36; + values[3] |= static_cast(*ptr++) << 28; + values[3] |= *ptr++ << 20; + values[3] |= *ptr++ << 12; + values[3] |= *ptr++ << 4; + values[3] |= *ptr >> 4; + + values[4] = static_cast(*ptr++ & 0xf) << 47; + values[4] |= static_cast(*ptr++) << 39; + values[4] |= static_cast(*ptr++) << 31; + values[4] |= *ptr++ << 23; + values[4] |= *ptr++ << 15; + values[4] |= *ptr++ << 7; + values[4] |= *ptr >> 1; + + values[5] = static_cast(*ptr++ & 1) << 50; + values[5] |= static_cast(*ptr++) << 42; + values[5] |= static_cast(*ptr++) << 34; + values[5] |= static_cast(*ptr++) << 26; + values[5] |= *ptr++ << 18; + values[5] |= *ptr++ << 10; + values[5] |= *ptr++ << 2; + values[5] |= *ptr >> 6; + + values[6] = static_cast(*ptr++ & 0x3f) << 45; + values[6] |= static_cast(*ptr++) << 37; + values[6] |= static_cast(*ptr++) << 29; + values[6] |= *ptr++ << 21; + values[6] |= *ptr++ << 13; + values[6] |= *ptr++ << 5; + values[6] |= *ptr >> 3; + + values[7] = static_cast(*ptr++ & 7) << 48; + values[7] |= static_cast(*ptr++) << 40; + values[7] |= static_cast(*ptr++) << 32; + values[7] |= static_cast(*ptr++) << 24; + values[7] |= *ptr++ << 16; + values[7] |= *ptr++ << 8; + values[7] |= *ptr; +} + +static inline void unpackBits52(uint64_t* values, const uint8_t* ptr) { + values[0] = static_cast(*ptr++) << 44; + values[0] |= static_cast(*ptr++) << 36; + values[0] |= static_cast(*ptr++) << 28; + values[0] |= *ptr++ << 20; + values[0] |= *ptr++ << 12; + values[0] |= *ptr++ << 4; + values[0] |= *ptr >> 4; + + values[1] = static_cast(*ptr++ & 0xf) << 48; + values[1] |= static_cast(*ptr++) << 40; + values[1] |= static_cast(*ptr++) << 32; + values[1] |= static_cast(*ptr++) << 24; + values[1] |= *ptr++ << 16; + values[1] |= *ptr++ << 8; + values[1] |= *ptr++; + + values[2] = static_cast(*ptr++) << 44; + values[2] |= static_cast(*ptr++) << 36; + values[2] |= static_cast(*ptr++) << 28; + values[2] |= *ptr++ << 20; + values[2] |= *ptr++ << 12; + values[2] |= *ptr++ << 4; + values[2] |= *ptr >> 4; + + values[3] = static_cast(*ptr++ & 0xf) << 48; + values[3] |= static_cast(*ptr++) << 40; + values[3] |= static_cast(*ptr++) << 32; + values[3] |= static_cast(*ptr++) << 24; + values[3] |= *ptr++ << 16; + values[3] |= *ptr++ << 8; + values[3] |= *ptr++; + + values[4] = static_cast(*ptr++) << 44; + values[4] |= static_cast(*ptr++) << 36; + values[4] |= static_cast(*ptr++) << 28; + values[4] |= *ptr++ << 20; + values[4] |= *ptr++ << 12; + values[4] |= *ptr++ << 4; + values[4] |= *ptr >> 4; + + values[5] = static_cast(*ptr++ & 0xf) << 48; + values[5] |= static_cast(*ptr++) << 40; + values[5] |= static_cast(*ptr++) << 32; + values[5] |= static_cast(*ptr++) << 24; + values[5] |= *ptr++ << 16; + values[5] |= *ptr++ << 8; + values[5] |= *ptr++; + + values[6] = static_cast(*ptr++) << 44; + values[6] |= static_cast(*ptr++) << 36; + values[6] |= static_cast(*ptr++) << 28; + values[6] |= *ptr++ << 20; + values[6] |= *ptr++ << 12; + values[6] |= *ptr++ << 4; + values[6] |= *ptr >> 4; + + values[7] = static_cast(*ptr++ & 0xf) << 48; + values[7] |= static_cast(*ptr++) << 40; + values[7] |= static_cast(*ptr++) << 32; + values[7] |= static_cast(*ptr++) << 24; + values[7] |= *ptr++ << 16; + values[7] |= *ptr++ << 8; + values[7] |= *ptr; +} + +static inline void unpackBits53(uint64_t* values, const uint8_t* ptr) { + values[0] = static_cast(*ptr++) << 45; + values[0] |= static_cast(*ptr++) << 37; + values[0] |= static_cast(*ptr++) << 29; + values[0] |= *ptr++ << 21; + values[0] |= *ptr++ << 13; + values[0] |= *ptr++ << 5; + values[0] |= *ptr >> 3; + + values[1] = static_cast(*ptr++ & 7) << 50; + values[1] |= static_cast(*ptr++) << 42; + values[1] |= static_cast(*ptr++) << 34; + values[1] |= static_cast(*ptr++) << 26; + values[1] |= *ptr++ << 18; + values[1] |= *ptr++ << 10; + values[1] |= *ptr++ << 2; + values[1] |= *ptr >> 6; + + values[2] = static_cast(*ptr++ & 0x3f) << 47; + values[2] |= static_cast(*ptr++) << 39; + values[2] |= static_cast(*ptr++) << 31; + values[2] |= static_cast(*ptr++) << 23; + values[2] |= *ptr++ << 15; + values[2] |= *ptr++ << 7; + values[2] |= *ptr >> 1; + + values[3] = static_cast(*ptr++ & 1) << 52; + values[3] |= static_cast(*ptr++) << 44; + values[3] |= static_cast(*ptr++) << 36; + values[3] |= static_cast(*ptr++) << 28; + values[3] |= *ptr++ << 20; + values[3] |= *ptr++ << 12; + values[3] |= *ptr++ << 4; + values[3] |= *ptr >> 4; + + values[4] = static_cast(*ptr++ & 0xf) << 49; + values[4] |= static_cast(*ptr++) << 41; + values[4] |= static_cast(*ptr++) << 33; + values[4] |= static_cast(*ptr++) << 25; + values[4] |= *ptr++ << 17; + values[4] |= *ptr++ << 9; + values[4] |= *ptr++ << 1; + values[4] |= *ptr >> 7; + + values[5] = static_cast(*ptr++ & 0x7f) << 46; + values[5] |= static_cast(*ptr++) << 38; + values[5] |= static_cast(*ptr++) << 30; + values[5] |= *ptr++ << 22; + values[5] |= *ptr++ << 14; + values[5] |= *ptr++ << 6; + values[5] |= *ptr >> 2; + + values[6] = static_cast(*ptr++ & 3) << 51; + values[6] |= static_cast(*ptr++) << 43; + values[6] |= static_cast(*ptr++) << 35; + values[6] |= static_cast(*ptr++) << 27; + values[6] |= *ptr++ << 19; + values[6] |= *ptr++ << 11; + values[6] |= *ptr++ << 3; + values[6] |= *ptr >> 5; + + values[7] = static_cast(*ptr++ & 0x1f) << 48; + values[7] |= static_cast(*ptr++) << 40; + values[7] |= static_cast(*ptr++) << 32; + values[7] |= static_cast(*ptr++) << 24; + values[7] |= *ptr++ << 16; + values[7] |= *ptr++ << 8; + values[7] |= *ptr; +} + +static inline void unpackBits54(uint64_t* values, const uint8_t* ptr) { + values[0] = static_cast(*ptr++) << 46; + values[0] |= static_cast(*ptr++) << 38; + values[0] |= static_cast(*ptr++) << 30; + values[0] |= *ptr++ << 22; + values[0] |= *ptr++ << 14; + values[0] |= *ptr++ << 6; + values[0] |= *ptr >> 2; + + values[1] = static_cast(*ptr++ & 3) << 52; + values[1] |= static_cast(*ptr++) << 44; + values[1] |= static_cast(*ptr++) << 36; + values[1] |= static_cast(*ptr++) << 28; + values[1] |= *ptr++ << 20; + values[1] |= *ptr++ << 12; + values[1] |= *ptr++ << 4; + values[1] |= *ptr >> 4; + + values[2] = static_cast(*ptr++ & 0xf) << 50; + values[2] |= static_cast(*ptr++) << 42; + values[2] |= static_cast(*ptr++) << 34; + values[2] |= static_cast(*ptr++) << 26; + values[2] |= *ptr++ << 18; + values[2] |= *ptr++ << 10; + values[2] |= *ptr++ << 2; + values[2] |= *ptr >> 6; + + values[3] = static_cast(*ptr++ & 0x3f) << 48; + values[3] |= static_cast(*ptr++) << 40; + values[3] |= static_cast(*ptr++) << 32; + values[3] |= static_cast(*ptr++) << 24; + values[3] |= *ptr++ << 16; + values[3] |= *ptr++ << 8; + values[3] |= *ptr++; + + values[4] = static_cast(*ptr++) << 46; + values[4] |= static_cast(*ptr++) << 38; + values[4] |= static_cast(*ptr++) << 30; + values[4] |= *ptr++ << 22; + values[4] |= *ptr++ << 14; + values[4] |= *ptr++ << 6; + values[4] |= *ptr >> 2; + + values[5] = static_cast(*ptr++ & 3) << 52; + values[5] |= static_cast(*ptr++) << 44; + values[5] |= static_cast(*ptr++) << 36; + values[5] |= static_cast(*ptr++) << 28; + values[5] |= *ptr++ << 20; + values[5] |= *ptr++ << 12; + values[5] |= *ptr++ << 4; + values[5] |= *ptr >> 4; + + values[6] = static_cast(*ptr++ & 0xf) << 50; + values[6] |= static_cast(*ptr++) << 42; + values[6] |= static_cast(*ptr++) << 34; + values[6] |= static_cast(*ptr++) << 26; + values[6] |= *ptr++ << 18; + values[6] |= *ptr++ << 10; + values[6] |= *ptr++ << 2; + values[6] |= *ptr >> 6; + + values[7] = static_cast(*ptr++ & 0x3f) << 48; + values[7] |= static_cast(*ptr++) << 40; + values[7] |= static_cast(*ptr++) << 32; + values[7] |= static_cast(*ptr++) << 24; + values[7] |= *ptr++ << 16; + values[7] |= *ptr++ << 8; + values[7] |= *ptr++; +} + +static inline void unpackBits55(uint64_t* values, const uint8_t* ptr) { + values[0] = static_cast(*ptr++) << 47; + values[0] |= static_cast(*ptr++) << 39; + values[0] |= static_cast(*ptr++) << 31; + values[0] |= *ptr++ << 23; + values[0] |= *ptr++ << 15; + values[0] |= *ptr++ << 7; + values[0] |= *ptr >> 1; + + values[1] = static_cast(*ptr++ & 1) << 54; + values[1] |= static_cast(*ptr++) << 46; + values[1] |= static_cast(*ptr++) << 38; + values[1] |= static_cast(*ptr++) << 30; + values[1] |= *ptr++ << 22; + values[1] |= *ptr++ << 14; + values[1] |= *ptr++ << 6; + values[1] |= *ptr >> 2; + + values[2] = static_cast(*ptr++ & 3) << 53; + values[2] |= static_cast(*ptr++) << 45; + values[2] |= static_cast(*ptr++) << 37; + values[2] |= static_cast(*ptr++) << 29; + values[2] |= *ptr++ << 21; + values[2] |= *ptr++ << 13; + values[2] |= *ptr++ << 5; + values[2] |= *ptr >> 3; + + values[3] = static_cast(*ptr++ & 7) << 52; + values[3] |= static_cast(*ptr++) << 44; + values[3] |= static_cast(*ptr++) << 36; + values[3] |= static_cast(*ptr++) << 28; + values[3] |= *ptr++ << 20; + values[3] |= *ptr++ << 12; + values[3] |= *ptr++ << 4; + values[3] |= *ptr >> 4; + + values[4] = static_cast(*ptr++ & 0xf) << 51; + values[4] |= static_cast(*ptr++) << 43; + values[4] |= static_cast(*ptr++) << 35; + values[4] |= static_cast(*ptr++) << 27; + values[4] |= *ptr++ << 19; + values[4] |= *ptr++ << 11; + values[4] |= *ptr++ << 3; + values[4] |= *ptr >> 5; + + values[5] = static_cast(*ptr++ & 0x1f) << 50; + values[5] |= static_cast(*ptr++) << 42; + values[5] |= static_cast(*ptr++) << 34; + values[5] |= static_cast(*ptr++) << 26; + values[5] |= *ptr++ << 18; + values[5] |= *ptr++ << 10; + values[5] |= *ptr++ << 2; + values[5] |= *ptr >> 6; + + values[6] = static_cast(*ptr++ & 0x3f) << 49; + values[6] |= static_cast(*ptr++) << 41; + values[6] |= static_cast(*ptr++) << 33; + values[6] |= static_cast(*ptr++) << 25; + values[6] |= *ptr++ << 17; + values[6] |= *ptr++ << 9; + values[6] |= *ptr++ << 1; + values[6] |= *ptr >> 7; + + values[7] = static_cast(*ptr++ & 0x7f) << 48; + values[7] |= static_cast(*ptr++) << 40; + values[7] |= static_cast(*ptr++) << 32; + values[7] |= static_cast(*ptr++) << 24; + values[7] |= *ptr++ << 16; + values[7] |= *ptr++ << 8; + values[7] |= *ptr; +} + +static inline void unpackBits56(uint64_t* values, const uint8_t* ptr) { + values[0] = static_cast(*ptr++) << 48; + values[0] |= static_cast(*ptr++) << 40; + values[0] |= static_cast(*ptr++) << 32; + values[0] |= static_cast(*ptr++) << 24; + values[0] |= *ptr++ << 16; + values[0] |= *ptr++ << 8; + values[0] |= *ptr++; + values[1] = static_cast(*ptr++) << 48; + values[1] |= static_cast(*ptr++) << 40; + values[1] |= static_cast(*ptr++) << 32; + values[1] |= static_cast(*ptr++) << 24; + values[1] |= *ptr++ << 16; + values[1] |= *ptr++ << 8; + values[1] |= *ptr++; + values[2] = static_cast(*ptr++) << 48; + values[2] |= static_cast(*ptr++) << 40; + values[2] |= static_cast(*ptr++) << 32; + values[2] |= static_cast(*ptr++) << 24; + values[2] |= *ptr++ << 16; + values[2] |= *ptr++ << 8; + values[2] |= *ptr++; + values[3] = static_cast(*ptr++) << 48; + values[3] |= static_cast(*ptr++) << 40; + values[3] |= static_cast(*ptr++) << 32; + values[3] |= static_cast(*ptr++) << 24; + values[3] |= *ptr++ << 16; + values[3] |= *ptr++ << 8; + values[3] |= *ptr++; + values[4] = static_cast(*ptr++) << 48; + values[4] |= static_cast(*ptr++) << 40; + values[4] |= static_cast(*ptr++) << 32; + values[4] |= static_cast(*ptr++) << 24; + values[4] |= *ptr++ << 16; + values[4] |= *ptr++ << 8; + values[4] |= *ptr++; + values[5] = static_cast(*ptr++) << 48; + values[5] |= static_cast(*ptr++) << 40; + values[5] |= static_cast(*ptr++) << 32; + values[5] |= static_cast(*ptr++) << 24; + values[5] |= *ptr++ << 16; + values[5] |= *ptr++ << 8; + values[5] |= *ptr++; + values[6] = static_cast(*ptr++) << 48; + values[6] |= static_cast(*ptr++) << 40; + values[6] |= static_cast(*ptr++) << 32; + values[6] |= static_cast(*ptr++) << 24; + values[6] |= *ptr++ << 16; + values[6] |= *ptr++ << 8; + values[6] |= *ptr++; + values[7] = static_cast(*ptr++) << 48; + values[7] |= static_cast(*ptr++) << 40; + values[7] |= static_cast(*ptr++) << 32; + values[7] |= static_cast(*ptr++) << 24; + values[7] |= *ptr++ << 16; + values[7] |= *ptr++ << 8; + values[7] |= *ptr; +} + +static inline void unpackBits57(uint64_t* values, const uint8_t* ptr) { + values[0] = static_cast(*ptr++) << 49; + values[0] |= static_cast(*ptr++) << 41; + values[0] |= static_cast(*ptr++) << 33; + values[0] |= static_cast(*ptr++) << 25; + values[0] |= *ptr++ << 17; + values[0] |= *ptr++ << 9; + values[0] |= *ptr++ << 1; + values[0] |= *ptr >> 7; + + values[1] = static_cast(*ptr++ & 0x7f) << 50; + values[1] |= static_cast(*ptr++) << 42; + values[1] |= static_cast(*ptr++) << 34; + values[1] |= static_cast(*ptr++) << 26; + values[1] |= *ptr++ << 18; + values[1] |= *ptr++ << 10; + values[1] |= *ptr++ << 2; + values[1] |= *ptr >> 6; + + values[2] = static_cast(*ptr++ & 0x3f) << 51; + values[2] |= static_cast(*ptr++) << 43; + values[2] |= static_cast(*ptr++) << 35; + values[2] |= static_cast(*ptr++) << 27; + values[2] |= *ptr++ << 19; + values[2] |= *ptr++ << 11; + values[2] |= *ptr++ << 3; + values[2] |= *ptr >> 5; + + values[3] = static_cast(*ptr++ & 0x1f) << 52; + values[3] |= static_cast(*ptr++) << 44; + values[3] |= static_cast(*ptr++) << 36; + values[3] |= static_cast(*ptr++) << 28; + values[3] |= *ptr++ << 20; + values[3] |= *ptr++ << 12; + values[3] |= *ptr++ << 4; + values[3] |= *ptr >> 4; + + values[4] = static_cast(*ptr++ & 0xf) << 53; + values[4] |= static_cast(*ptr++) << 45; + values[4] |= static_cast(*ptr++) << 37; + values[4] |= static_cast(*ptr++) << 29; + values[4] |= *ptr++ << 21; + values[4] |= *ptr++ << 13; + values[4] |= *ptr++ << 5; + values[4] |= *ptr >> 3; + + values[5] = static_cast(*ptr++ & 7) << 54; + values[5] |= static_cast(*ptr++) << 46; + values[5] |= static_cast(*ptr++) << 38; + values[5] |= static_cast(*ptr++) << 30; + values[5] |= *ptr++ << 22; + values[5] |= *ptr++ << 14; + values[5] |= *ptr++ << 6; + values[5] |= *ptr >> 2; + + values[6] = static_cast(*ptr++ & 3) << 55; + values[6] |= static_cast(*ptr++) << 47; + values[6] |= static_cast(*ptr++) << 39; + values[6] |= static_cast(*ptr++) << 31; + values[6] |= *ptr++ << 23; + values[6] |= *ptr++ << 15; + values[6] |= *ptr++ << 7; + values[6] |= *ptr >> 1; + + values[7] = static_cast(*ptr++ & 1) << 56; + values[7] |= static_cast(*ptr++) << 48; + values[7] |= static_cast(*ptr++) << 40; + values[7] |= static_cast(*ptr++) << 32; + values[7] |= static_cast(*ptr++) << 24; + values[7] |= *ptr++ << 16; + values[7] |= *ptr++ << 8; + values[7] |= *ptr; +} + +static inline void unpackBits58(uint64_t* values, const uint8_t* ptr) { + values[0] = static_cast(*ptr++) << 50; + values[0] |= static_cast(*ptr++) << 42; + values[0] |= static_cast(*ptr++) << 34; + values[0] |= static_cast(*ptr++) << 26; + values[0] |= *ptr++ << 18; + values[0] |= *ptr++ << 10; + values[0] |= *ptr++ << 2; + values[0] |= *ptr >> 6; + + values[1] = static_cast(*ptr++ & 0x3f) << 52; + values[1] |= static_cast(*ptr++) << 44; + values[1] |= static_cast(*ptr++) << 36; + values[1] |= static_cast(*ptr++) << 28; + values[1] |= *ptr++ << 20; + values[1] |= *ptr++ << 12; + values[1] |= *ptr++ << 4; + values[1] |= *ptr >> 4; + + values[2] = static_cast(*ptr++ & 0xf) << 54; + values[2] |= static_cast(*ptr++) << 46; + values[2] |= static_cast(*ptr++) << 38; + values[2] |= static_cast(*ptr++) << 30; + values[2] |= *ptr++ << 22; + values[2] |= *ptr++ << 14; + values[2] |= *ptr++ << 6; + values[2] |= *ptr >> 2; + + values[3] = static_cast(*ptr++ & 3) << 56; + values[3] |= static_cast(*ptr++) << 48; + values[3] |= static_cast(*ptr++) << 40; + values[3] |= static_cast(*ptr++) << 32; + values[3] |= static_cast(*ptr++) << 24; + values[3] |= *ptr++ << 16; + values[3] |= *ptr++ << 8; + values[3] |= *ptr++; + + values[4] = static_cast(*ptr++) << 50; + values[4] |= static_cast(*ptr++) << 42; + values[4] |= static_cast(*ptr++) << 34; + values[4] |= static_cast(*ptr++) << 26; + values[4] |= *ptr++ << 18; + values[4] |= *ptr++ << 10; + values[4] |= *ptr++ << 2; + values[4] |= *ptr >> 6; + + values[5] = static_cast(*ptr++ & 0x3f) << 52; + values[5] |= static_cast(*ptr++) << 44; + values[5] |= static_cast(*ptr++) << 36; + values[5] |= static_cast(*ptr++) << 28; + values[5] |= *ptr++ << 20; + values[5] |= *ptr++ << 12; + values[5] |= *ptr++ << 4; + values[5] |= *ptr >> 4; + + values[6] = static_cast(*ptr++ & 0xf) << 54; + values[6] |= static_cast(*ptr++) << 46; + values[6] |= static_cast(*ptr++) << 38; + values[6] |= static_cast(*ptr++) << 30; + values[6] |= *ptr++ << 22; + values[6] |= *ptr++ << 14; + values[6] |= *ptr++ << 6; + values[6] |= *ptr >> 2; + + values[7] = static_cast(*ptr++ & 3) << 56; + values[7] |= static_cast(*ptr++) << 48; + values[7] |= static_cast(*ptr++) << 40; + values[7] |= static_cast(*ptr++) << 32; + values[7] |= static_cast(*ptr++) << 24; + values[7] |= *ptr++ << 16; + values[7] |= *ptr++ << 8; + values[7] |= *ptr++; +} + +static inline void unpackBits59(uint64_t* values, const uint8_t* ptr) { + values[0] = static_cast(*ptr++) << 51; + values[0] |= static_cast(*ptr++) << 43; + values[0] |= static_cast(*ptr++) << 35; + values[0] |= static_cast(*ptr++) << 27; + values[0] |= *ptr++ << 19; + values[0] |= *ptr++ << 11; + values[0] |= *ptr++ << 3; + values[0] |= *ptr >> 5; + + values[1] = static_cast(*ptr++ & 0x1f) << 54; + values[1] |= static_cast(*ptr++) << 46; + values[1] |= static_cast(*ptr++) << 38; + values[1] |= static_cast(*ptr++) << 30; + values[1] |= *ptr++ << 22; + values[1] |= *ptr++ << 14; + values[1] |= *ptr++ << 6; + values[1] |= *ptr >> 2; + + values[2] = static_cast(*ptr++ & 3) << 57; + values[2] |= static_cast(*ptr++) << 49; + values[2] |= static_cast(*ptr++) << 41; + values[2] |= static_cast(*ptr++) << 33; + values[2] |= static_cast(*ptr++) << 25; + values[2] |= *ptr++ << 17; + values[2] |= *ptr++ << 9; + values[2] |= *ptr++ << 1; + values[2] |= *ptr >> 7; + + values[3] = static_cast(*ptr++ & 0x7f) << 52; + values[3] |= static_cast(*ptr++) << 44; + values[3] |= static_cast(*ptr++) << 36; + values[3] |= static_cast(*ptr++) << 28; + values[3] |= *ptr++ << 20; + values[3] |= *ptr++ << 12; + values[3] |= *ptr++ << 4; + values[3] |= *ptr >> 4; + + values[4] = static_cast(*ptr++ & 0xf) << 55; + values[4] |= static_cast(*ptr++) << 47; + values[4] |= static_cast(*ptr++) << 39; + values[4] |= static_cast(*ptr++) << 31; + values[4] |= *ptr++ << 23; + values[4] |= *ptr++ << 15; + values[4] |= *ptr++ << 7; + values[4] |= *ptr >> 1; + + values[5] = static_cast(*ptr++ & 1) << 58; + values[5] |= static_cast(*ptr++) << 50; + values[5] |= static_cast(*ptr++) << 42; + values[5] |= static_cast(*ptr++) << 34; + values[5] |= static_cast(*ptr++) << 26; + values[5] |= *ptr++ << 18; + values[5] |= *ptr++ << 10; + values[5] |= *ptr++ << 2; + values[5] |= *ptr >> 6; + + values[6] = static_cast(*ptr++ & 0x3f) << 53; + values[6] |= static_cast(*ptr++) << 45; + values[6] |= static_cast(*ptr++) << 37; + values[6] |= static_cast(*ptr++) << 29; + values[6] |= *ptr++ << 21; + values[6] |= *ptr++ << 13; + values[6] |= *ptr++ << 5; + values[6] |= *ptr >> 3; + + values[7] = static_cast(*ptr++ & 7) << 56; + values[7] |= static_cast(*ptr++) << 48; + values[7] |= static_cast(*ptr++) << 40; + values[7] |= static_cast(*ptr++) << 32; + values[7] |= static_cast(*ptr++) << 24; + values[7] |= *ptr++ << 16; + values[7] |= *ptr++ << 8; + values[7] |= *ptr; +} + +static inline void unpackBits60(uint64_t* values, const uint8_t* ptr) { + values[0] = static_cast(*ptr++) << 52; + values[0] |= static_cast(*ptr++) << 44; + values[0] |= static_cast(*ptr++) << 36; + values[0] |= static_cast(*ptr++) << 28; + values[0] |= *ptr++ << 20; + values[0] |= *ptr++ << 12; + values[0] |= *ptr++ << 4; + values[0] |= *ptr >> 4; + + values[1] = static_cast(*ptr++ & 0xf) << 56; + values[1] |= static_cast(*ptr++) << 48; + values[1] |= static_cast(*ptr++) << 40; + values[1] |= static_cast(*ptr++) << 32; + values[1] |= static_cast(*ptr++) << 24; + values[1] |= *ptr++ << 16; + values[1] |= *ptr++ << 8; + values[1] |= *ptr++; + + values[2] = static_cast(*ptr++) << 52; + values[2] |= static_cast(*ptr++) << 44; + values[2] |= static_cast(*ptr++) << 36; + values[2] |= static_cast(*ptr++) << 28; + values[2] |= *ptr++ << 20; + values[2] |= *ptr++ << 12; + values[2] |= *ptr++ << 4; + values[2] |= *ptr >> 4; + + values[3] = static_cast(*ptr++ & 0xf) << 56; + values[3] |= static_cast(*ptr++) << 48; + values[3] |= static_cast(*ptr++) << 40; + values[3] |= static_cast(*ptr++) << 32; + values[3] |= static_cast(*ptr++) << 24; + values[3] |= *ptr++ << 16; + values[3] |= *ptr++ << 8; + values[3] |= *ptr++; + + values[4] = static_cast(*ptr++) << 52; + values[4] |= static_cast(*ptr++) << 44; + values[4] |= static_cast(*ptr++) << 36; + values[4] |= static_cast(*ptr++) << 28; + values[4] |= *ptr++ << 20; + values[4] |= *ptr++ << 12; + values[4] |= *ptr++ << 4; + values[4] |= *ptr >> 4; + + values[5] = static_cast(*ptr++ & 0xf) << 56; + values[5] |= static_cast(*ptr++) << 48; + values[5] |= static_cast(*ptr++) << 40; + values[5] |= static_cast(*ptr++) << 32; + values[5] |= static_cast(*ptr++) << 24; + values[5] |= *ptr++ << 16; + values[5] |= *ptr++ << 8; + values[5] |= *ptr++; + + values[6] = static_cast(*ptr++) << 52; + values[6] |= static_cast(*ptr++) << 44; + values[6] |= static_cast(*ptr++) << 36; + values[6] |= static_cast(*ptr++) << 28; + values[6] |= *ptr++ << 20; + values[6] |= *ptr++ << 12; + values[6] |= *ptr++ << 4; + values[6] |= *ptr >> 4; + + values[7] = static_cast(*ptr++ & 0xf) << 56; + values[7] |= static_cast(*ptr++) << 48; + values[7] |= static_cast(*ptr++) << 40; + values[7] |= static_cast(*ptr++) << 32; + values[7] |= static_cast(*ptr++) << 24; + values[7] |= *ptr++ << 16; + values[7] |= *ptr++ << 8; + values[7] |= *ptr; +} + +static inline void unpackBits61(uint64_t* values, const uint8_t* ptr) { + values[0] = static_cast(*ptr++) << 53; + values[0] |= static_cast(*ptr++) << 45; + values[0] |= static_cast(*ptr++) << 37; + values[0] |= static_cast(*ptr++) << 29; + values[0] |= *ptr++ << 21; + values[0] |= *ptr++ << 13; + values[0] |= *ptr++ << 5; + values[0] |= *ptr >> 3; + + values[1] = static_cast(*ptr++ & 7) << 58; + values[1] |= static_cast(*ptr++) << 50; + values[1] |= static_cast(*ptr++) << 42; + values[1] |= static_cast(*ptr++) << 34; + values[1] |= static_cast(*ptr++) << 26; + values[1] |= *ptr++ << 18; + values[1] |= *ptr++ << 10; + values[1] |= *ptr++ << 2; + values[1] |= *ptr >> 6; + + values[2] = static_cast(*ptr++ & 0x3f) << 55; + values[2] |= static_cast(*ptr++) << 47; + values[2] |= static_cast(*ptr++) << 39; + values[2] |= static_cast(*ptr++) << 31; + values[2] |= *ptr++ << 23; + values[2] |= *ptr++ << 15; + values[2] |= *ptr++ << 7; + values[2] |= *ptr >> 1; + + values[3] = static_cast(*ptr++ & 1) << 60; + values[3] |= static_cast(*ptr++) << 52; + values[3] |= static_cast(*ptr++) << 44; + values[3] |= static_cast(*ptr++) << 36; + values[3] |= static_cast(*ptr++) << 28; + values[3] |= *ptr++ << 20; + values[3] |= *ptr++ << 12; + values[3] |= *ptr++ << 4; + values[3] |= *ptr >> 4; + + values[4] = static_cast(*ptr++ & 0xf) << 57; + values[4] |= static_cast(*ptr++) << 49; + values[4] |= static_cast(*ptr++) << 41; + values[4] |= static_cast(*ptr++) << 33; + values[4] |= static_cast(*ptr++) << 25; + values[4] |= *ptr++ << 17; + values[4] |= *ptr++ << 9; + values[4] |= *ptr++ << 1; + values[4] |= *ptr >> 7; + + values[5] = static_cast(*ptr++ & 0x7f) << 54; + values[5] |= static_cast(*ptr++) << 46; + values[5] |= static_cast(*ptr++) << 38; + values[5] |= static_cast(*ptr++) << 30; + values[5] |= *ptr++ << 22; + values[5] |= *ptr++ << 14; + values[5] |= *ptr++ << 6; + values[5] |= *ptr >> 2; + + values[6] = static_cast(*ptr++ & 3) << 59; + values[6] |= static_cast(*ptr++) << 51; + values[6] |= static_cast(*ptr++) << 43; + values[6] |= static_cast(*ptr++) << 35; + values[6] |= static_cast(*ptr++) << 27; + values[6] |= *ptr++ << 19; + values[6] |= *ptr++ << 11; + values[6] |= *ptr++ << 3; + values[6] |= *ptr >> 5; + + values[7] = static_cast(*ptr++ & 0x1f) << 56; + values[7] |= static_cast(*ptr++) << 48; + values[7] |= static_cast(*ptr++) << 40; + values[7] |= static_cast(*ptr++) << 32; + values[7] |= static_cast(*ptr++) << 24; + values[7] |= *ptr++ << 16; + values[7] |= *ptr++ << 8; + values[7] |= *ptr; +} + +static inline void unpackBits62(uint64_t* values, const uint8_t* ptr) { + values[0] = static_cast(*ptr++) << 54; + values[0] |= static_cast(*ptr++) << 46; + values[0] |= static_cast(*ptr++) << 38; + values[0] |= static_cast(*ptr++) << 30; + values[0] |= *ptr++ << 22; + values[0] |= *ptr++ << 14; + values[0] |= *ptr++ << 6; + values[0] |= *ptr >> 2; + + values[1] = static_cast(*ptr++ & 3) << 60; + values[1] |= static_cast(*ptr++) << 52; + values[1] |= static_cast(*ptr++) << 44; + values[1] |= static_cast(*ptr++) << 36; + values[1] |= static_cast(*ptr++) << 28; + values[1] |= *ptr++ << 20; + values[1] |= *ptr++ << 12; + values[1] |= *ptr++ << 4; + values[1] |= *ptr >> 4; + + values[2] = static_cast(*ptr++ & 0xf) << 58; + values[2] |= static_cast(*ptr++) << 50; + values[2] |= static_cast(*ptr++) << 42; + values[2] |= static_cast(*ptr++) << 34; + values[2] |= static_cast(*ptr++) << 26; + values[2] |= *ptr++ << 18; + values[2] |= *ptr++ << 10; + values[2] |= *ptr++ << 2; + values[2] |= *ptr >> 6; + + values[3] = static_cast(*ptr++ & 0x3f) << 56; + values[3] |= static_cast(*ptr++) << 48; + values[3] |= static_cast(*ptr++) << 40; + values[3] |= static_cast(*ptr++) << 32; + values[3] |= static_cast(*ptr++) << 24; + values[3] |= *ptr++ << 16; + values[3] |= *ptr++ << 8; + values[3] |= *ptr++; + + values[4] = static_cast(*ptr++) << 54; + values[4] |= static_cast(*ptr++) << 46; + values[4] |= static_cast(*ptr++) << 38; + values[4] |= static_cast(*ptr++) << 30; + values[4] |= *ptr++ << 22; + values[4] |= *ptr++ << 14; + values[4] |= *ptr++ << 6; + values[4] |= *ptr >> 2; + + values[5] = static_cast(*ptr++ & 3) << 60; + values[5] |= static_cast(*ptr++) << 52; + values[5] |= static_cast(*ptr++) << 44; + values[5] |= static_cast(*ptr++) << 36; + values[5] |= static_cast(*ptr++) << 28; + values[5] |= *ptr++ << 20; + values[5] |= *ptr++ << 12; + values[5] |= *ptr++ << 4; + values[5] |= *ptr >> 4; + + values[6] = static_cast(*ptr++ & 0xf) << 58; + values[6] |= static_cast(*ptr++) << 50; + values[6] |= static_cast(*ptr++) << 42; + values[6] |= static_cast(*ptr++) << 34; + values[6] |= static_cast(*ptr++) << 26; + values[6] |= *ptr++ << 18; + values[6] |= *ptr++ << 10; + values[6] |= *ptr++ << 2; + values[6] |= *ptr >> 6; + + values[7] = static_cast(*ptr++ & 0x3f) << 56; + values[7] |= static_cast(*ptr++) << 48; + values[7] |= static_cast(*ptr++) << 40; + values[7] |= static_cast(*ptr++) << 32; + values[7] |= static_cast(*ptr++) << 24; + values[7] |= *ptr++ << 16; + values[7] |= *ptr++ << 8; + values[7] |= *ptr; +} + +static inline void unpackBits63(uint64_t* values, const uint8_t* ptr) { + values[0] = static_cast(*ptr++) << 55; + values[0] |= static_cast(*ptr++) << 47; + values[0] |= static_cast(*ptr++) << 39; + values[0] |= static_cast(*ptr++) << 31; + values[0] |= *ptr++ << 23; + values[0] |= *ptr++ << 15; + values[0] |= *ptr++ << 7; + values[0] |= *ptr >> 1; + + values[1] = static_cast(*ptr++ & 1) << 62; + values[1] |= static_cast(*ptr++) << 54; + values[1] |= static_cast(*ptr++) << 46; + values[1] |= static_cast(*ptr++) << 38; + values[1] |= static_cast(*ptr++) << 30; + values[1] |= *ptr++ << 22; + values[1] |= *ptr++ << 14; + values[1] |= *ptr++ << 6; + values[1] |= *ptr >> 2; + + values[2] = static_cast(*ptr++ & 3) << 61; + values[2] |= static_cast(*ptr++) << 53; + values[2] |= static_cast(*ptr++) << 45; + values[2] |= static_cast(*ptr++) << 37; + values[2] |= static_cast(*ptr++) << 29; + values[2] |= *ptr++ << 21; + values[2] |= *ptr++ << 13; + values[2] |= *ptr++ << 5; + values[2] |= *ptr >> 3; + + values[3] = static_cast(*ptr++ & 7) << 60; + values[3] |= static_cast(*ptr++) << 52; + values[3] |= static_cast(*ptr++) << 44; + values[3] |= static_cast(*ptr++) << 36; + values[3] |= static_cast(*ptr++) << 28; + values[3] |= *ptr++ << 20; + values[3] |= *ptr++ << 12; + values[3] |= *ptr++ << 4; + values[3] |= *ptr >> 4; + + values[4] = static_cast(*ptr++ & 0xf) << 59; + values[4] |= static_cast(*ptr++) << 51; + values[4] |= static_cast(*ptr++) << 43; + values[4] |= static_cast(*ptr++) << 35; + values[4] |= static_cast(*ptr++) << 27; + values[4] |= *ptr++ << 19; + values[4] |= *ptr++ << 11; + values[4] |= *ptr++ << 3; + values[4] |= *ptr >> 5; + + values[5] = static_cast(*ptr++ & 0x1f) << 58; + values[5] |= static_cast(*ptr++) << 50; + values[5] |= static_cast(*ptr++) << 42; + values[5] |= static_cast(*ptr++) << 34; + values[5] |= static_cast(*ptr++) << 26; + values[5] |= *ptr++ << 18; + values[5] |= *ptr++ << 10; + values[5] |= *ptr++ << 2; + values[5] |= *ptr >> 6; + + values[6] = static_cast(*ptr++ & 0x3f) << 57; + values[6] |= static_cast(*ptr++) << 49; + values[6] |= static_cast(*ptr++) << 41; + values[6] |= static_cast(*ptr++) << 33; + values[6] |= static_cast(*ptr++) << 25; + values[6] |= *ptr++ << 17; + values[6] |= *ptr++ << 9; + values[6] |= *ptr++ << 1; + values[6] |= *ptr >> 7; + + values[7] = static_cast(*ptr++ & 0x7f) << 56; + values[7] |= static_cast(*ptr++) << 48; + values[7] |= static_cast(*ptr++) << 40; + values[7] |= static_cast(*ptr++) << 32; + values[7] |= static_cast(*ptr++) << 24; + values[7] |= *ptr++ << 16; + values[7] |= *ptr++ << 8; + values[7] |= *ptr; +} + +static inline void +packBitsBlock8(const uint64_t* values, uint8_t* ptr, uint8_t bits) { + switch (bits) { + case 1: + packBits1(values, ptr); + break; + case 2: + packBits2(values, ptr); + break; + case 3: + packBits3(values, ptr); + break; + case 4: + packBits4(values, ptr); + break; + case 5: + packBits5(values, ptr); + break; + case 6: + packBits6(values, ptr); + break; + case 7: + packBits7(values, ptr); + break; + case 8: + packBits8(values, ptr); + break; + case 9: + packBits9(values, ptr); + break; + case 10: + packBits10(values, ptr); + break; + case 11: + packBits11(values, ptr); + break; + case 12: + packBits12(values, ptr); + break; + case 13: + packBits13(values, ptr); + break; + case 14: + packBits14(values, ptr); + break; + case 15: + packBits15(values, ptr); + break; + case 16: + packBits16(values, ptr); + break; + case 17: + packBits17(values, ptr); + break; + case 18: + packBits18(values, ptr); + break; + case 19: + packBits19(values, ptr); + break; + case 20: + packBits20(values, ptr); + break; + case 21: + packBits21(values, ptr); + break; + case 22: + packBits22(values, ptr); + break; + case 23: + packBits23(values, ptr); + break; + case 24: + packBits24(values, ptr); + break; + case 25: + packBits25(values, ptr); + break; + case 26: + packBits26(values, ptr); + break; + case 27: + packBits27(values, ptr); + break; + case 28: + packBits28(values, ptr); + break; + case 29: + packBits29(values, ptr); + break; + case 30: + packBits30(values, ptr); + break; + case 31: + packBits31(values, ptr); + break; + case 32: + packBits32(values, ptr); + break; + case 33: + packBits33(values, ptr); + break; + case 34: + packBits34(values, ptr); + break; + case 35: + packBits35(values, ptr); + break; + case 36: + packBits36(values, ptr); + break; + case 37: + packBits37(values, ptr); + break; + case 38: + packBits38(values, ptr); + break; + case 39: + packBits39(values, ptr); + break; + case 40: + packBits40(values, ptr); + break; + case 41: + packBits41(values, ptr); + break; + case 42: + packBits42(values, ptr); + break; + case 43: + packBits43(values, ptr); + break; + case 44: + packBits44(values, ptr); + break; + case 45: + packBits45(values, ptr); + break; + case 46: + packBits46(values, ptr); + break; + case 47: + packBits47(values, ptr); + break; + case 48: + packBits48(values, ptr); + break; + case 49: + packBits49(values, ptr); + break; + case 50: + packBits50(values, ptr); + break; + case 51: + packBits51(values, ptr); + break; + case 52: + packBits52(values, ptr); + break; + case 53: + packBits53(values, ptr); + break; + case 54: + packBits54(values, ptr); + break; + case 55: + packBits55(values, ptr); + break; + case 56: + packBits56(values, ptr); + break; + case 57: + packBits57(values, ptr); + break; + case 58: + packBits58(values, ptr); + break; + case 59: + packBits59(values, ptr); + break; + case 60: + packBits60(values, ptr); + break; + case 61: + packBits61(values, ptr); + break; + case 62: + packBits62(values, ptr); + break; + case 63: + packBits63(values, ptr); + break; + default: + throw VeloxRuntimeError( + __FILE__, + __LINE__, + __FUNCTION__, + "", + "wrong number of bits in packBitsBlock8: " + std::to_string(bits), + error_source::kErrorSourceUser, + error_code::kInvalidArgument, + false /*retriable*/); + } +} + +static inline void +unpackBitsBlock8(uint64_t* values, const uint8_t* ptr, uint8_t bits) { + switch (bits) { + case 1: + unpackBits1(values, ptr); + break; + case 2: + unpackBits2(values, ptr); + break; + case 3: + unpackBits3(values, ptr); + break; + case 4: + unpackBits4(values, ptr); + break; + case 5: + unpackBits5(values, ptr); + break; + case 6: + unpackBits6(values, ptr); + break; + case 7: + unpackBits7(values, ptr); + break; + case 8: + unpackBits8(values, ptr); + break; + case 9: + unpackBits9(values, ptr); + break; + case 10: + unpackBits10(values, ptr); + break; + case 11: + unpackBits11(values, ptr); + break; + case 12: + unpackBits12(values, ptr); + break; + case 13: + unpackBits13(values, ptr); + break; + case 14: + unpackBits14(values, ptr); + break; + case 15: + unpackBits15(values, ptr); + break; + case 16: + unpackBits16(values, ptr); + break; + case 17: + unpackBits17(values, ptr); + break; + case 18: + unpackBits18(values, ptr); + break; + case 19: + unpackBits19(values, ptr); + break; + case 20: + unpackBits20(values, ptr); + break; + case 21: + unpackBits21(values, ptr); + break; + case 22: + unpackBits22(values, ptr); + break; + case 23: + unpackBits23(values, ptr); + break; + case 24: + unpackBits24(values, ptr); + break; + case 25: + unpackBits25(values, ptr); + break; + case 26: + unpackBits26(values, ptr); + break; + case 27: + unpackBits27(values, ptr); + break; + case 28: + unpackBits28(values, ptr); + break; + case 29: + unpackBits29(values, ptr); + break; + case 30: + unpackBits30(values, ptr); + break; + case 31: + unpackBits31(values, ptr); + break; + case 32: + unpackBits32(values, ptr); + break; + case 33: + unpackBits33(values, ptr); + break; + case 34: + unpackBits34(values, ptr); + break; + case 35: + unpackBits35(values, ptr); + break; + case 36: + unpackBits36(values, ptr); + break; + case 37: + unpackBits37(values, ptr); + break; + case 38: + unpackBits38(values, ptr); + break; + case 39: + unpackBits39(values, ptr); + break; + case 40: + unpackBits40(values, ptr); + break; + case 41: + unpackBits41(values, ptr); + break; + case 42: + unpackBits42(values, ptr); + break; + case 43: + unpackBits43(values, ptr); + break; + case 44: + unpackBits44(values, ptr); + break; + case 45: + unpackBits45(values, ptr); + break; + case 46: + unpackBits46(values, ptr); + break; + case 47: + unpackBits47(values, ptr); + break; + case 48: + unpackBits48(values, ptr); + break; + case 49: + unpackBits49(values, ptr); + break; + case 50: + unpackBits50(values, ptr); + break; + case 51: + unpackBits51(values, ptr); + break; + case 52: + unpackBits52(values, ptr); + break; + case 53: + unpackBits53(values, ptr); + break; + case 54: + unpackBits54(values, ptr); + break; + case 55: + unpackBits55(values, ptr); + break; + case 56: + unpackBits56(values, ptr); + break; + case 57: + unpackBits57(values, ptr); + break; + case 58: + unpackBits58(values, ptr); + break; + case 59: + unpackBits59(values, ptr); + break; + case 60: + unpackBits60(values, ptr); + break; + case 61: + unpackBits61(values, ptr); + break; + case 62: + unpackBits62(values, ptr); + break; + case 63: + unpackBits63(values, ptr); + break; + default: + throw VeloxRuntimeError( + __FILE__, + __LINE__, + __FUNCTION__, + "", + "wrong number of bits in unpackBitsBlock8: " + std::to_string(bits), + error_source::kErrorSourceUser, + error_code::kInvalidArgument, + false /*retriable*/); + } +} + +} // namespace facebook::velox::common::theta diff --git a/velox/external/theta/CMakeLists.txt b/velox/external/theta/CMakeLists.txt new file mode 100644 index 00000000000..9f6182f5e2f --- /dev/null +++ b/velox/external/theta/CMakeLists.txt @@ -0,0 +1,29 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +velox_add_library( + velox_common_theta + CompactThetaSketchParser.cpp + ThetaUpdateSketchBase.cpp + ThetaSketch.cpp + ThetaUnionBase.cpp + ThetaUnion.cpp) + +velox_link_libraries( + velox_common_theta + PUBLIC velox_memory + PRIVATE velox_exception) + +if(${VELOX_BUILD_TESTING}) + add_subdirectory(tests) +endif() diff --git a/velox/external/theta/CommonDefs.h b/velox/external/theta/CommonDefs.h new file mode 100644 index 00000000000..b5ba42dde07 --- /dev/null +++ b/velox/external/theta/CommonDefs.h @@ -0,0 +1,131 @@ +/* +* Copyright (c) Facebook, Inc. and its affiliates. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. + */ + +// Adapted from Apache DataSketches + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace facebook::velox::common::theta { + +static const uint64_t DEFAULT_SEED = 9001; + +enum resizeFactor { X1 = 0, X2, X4, X8 }; + +template +using string = std::basic_string< + char, + std::char_traits, + typename std::allocator_traits::template rebind_alloc>; + +// common random declarations +namespace randomUtils { +static std::random_device rd; // possibly unsafe in MinGW with GCC < 9.2 +static thread_local std::mt19937_64 rand(rd()); +static thread_local std::uniform_real_distribution<> next_double(0.0, 1.0); +static thread_local std::uniform_int_distribution next_uint64( + 0, + UINT64_MAX); + +// thread-safe random bit +static thread_local std::independent_bits_engine + random_bit(static_cast( + std::chrono::system_clock::now().time_since_epoch().count() + + std::hash{}(std::this_thread::get_id()))); + +inline void overrideSeed(uint64_t s) { + rand.seed(s); +} +} // namespace randomUtils + +// utility function to hide unused compiler warning +// usually has no additional cost +template +void unused(T&&...) {} + +// common helping functions +// TODO: find a better place for them + +constexpr uint8_t log2(uint32_t n) { + return (n > 1) ? 1 + log2(n >> 1) : 0; +} + +constexpr uint8_t lgSizeFromCount(uint32_t n, double load_factor) { + return log2(n) + + ((n > static_cast((1 << (log2(n) + 1)) * load_factor)) ? 2 : 1); +} + +// stream helpers to hide casts +template +static inline T read(std::istream& is) { + T value; + is.read(reinterpret_cast(&value), sizeof(T)); + return value; +} + +template +static inline void read(std::istream& is, T* ptr, size_t size_bytes) { + is.read(reinterpret_cast(ptr), size_bytes); +} + +template +static inline void write(std::ostream& os, T value) { + os.write(reinterpret_cast(&value), sizeof(T)); +} + +template +static inline void write(std::ostream& os, const T* ptr, size_t size_bytes) { + os.write(reinterpret_cast(ptr), size_bytes); +} + +template +T byteswap(T value) { + char* ptr = static_cast(static_cast(&value)); + const int len = sizeof(T); + for (size_t i = 0; i < len / 2; ++i) { + std::swap(ptr[i], ptr[len - i - 1]); + } + return value; +} + +template +static inline T readBigEndian(std::istream& is) { + T value; + is.read(reinterpret_cast(&value), sizeof(T)); + return byteswap(value); +} + +// wrapper for iterators to implement operator-> returning temporary value +template +class returnValueHolder { + public: + returnValueHolder(T value) : value_(value) {} + const T* operator->() const { + return std::addressof(value_); + } + + private: + T value_; +}; + +} // namespace facebook::velox::common::theta diff --git a/velox/external/theta/CompactThetaSketchParser.cpp b/velox/external/theta/CompactThetaSketchParser.cpp new file mode 100644 index 00000000000..1925f311d71 --- /dev/null +++ b/velox/external/theta/CompactThetaSketchParser.cpp @@ -0,0 +1,261 @@ +/* +* Copyright (c) Facebook, Inc. and its affiliates. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. + */ + +// Adapted from Apache DataSketches + +#ifndef COMPACT_THETA_SKETCH_PARSER_CPP +#define COMPACT_THETA_SKETCH_PARSER_CPP + +#include "CompactThetaSketchParser.h" +#include "MurmurHash3.h" +#include "ThetaHelpers.h" + +#include +#include + +namespace facebook::velox::common::theta { + +template +auto CompactThetaSketchParser::parse( + const void* ptr, + size_t size, + uint64_t seed, + bool dump_on_error) -> CompactThetaSketchData { + checkMemorySize(ptr, size, 8, dump_on_error); + checker::checkSketchType( + reinterpret_cast(ptr)[COMPACT_SKETCH_TYPE_BYTE], + COMPACT_SKETCH_TYPE); + uint8_t serial_version = + reinterpret_cast(ptr)[COMPACT_SKETCH_SERIAL_VERSION_BYTE]; + switch (serial_version) { + case 4: { + // version 4 sketches are ordered and always have entries (single item in + // exact mode is v3) + const uint16_t seed_hash = + reinterpret_cast(ptr)[COMPACT_SKETCH_SEED_HASH_U16]; + checker::checkSeedHash(seed_hash, compute_seed_hash(seed)); + const bool has_theta = reinterpret_cast( + ptr)[COMPACT_SKETCH_PRE_LONGS_BYTE] > 1; + uint64_t theta = ThetaConstants::MAX_THETA; + if (has_theta) { + checkMemorySize(ptr, size, 16, dump_on_error); + theta = + reinterpret_cast(ptr)[COMPACT_SKETCH_V4_THETA_U64]; + } + const uint8_t num_entries_bytes = reinterpret_cast( + ptr)[COMPACT_SKETCH_V4_NUM_ENTRIES_BYTES_BYTE]; + size_t data_offset_bytes = has_theta + ? COMPACT_SKETCH_V4_PACKED_DATA_ESTIMATION_BYTE + : COMPACT_SKETCH_V4_PACKED_DATA_EXACT_BYTE; + checkMemorySize( + ptr, size, data_offset_bytes + num_entries_bytes, dump_on_error); + uint32_t num_entries = 0; + const uint8_t* num_entries_ptr = + reinterpret_cast(ptr) + data_offset_bytes; + for (unsigned i = 0; i < num_entries_bytes; ++i) { + num_entries |= (*num_entries_ptr++) << (i << 3); + } + data_offset_bytes += num_entries_bytes; + const uint8_t entry_bits = reinterpret_cast( + ptr)[COMPACT_SKETCH_V4_ENTRY_BITS_BYTE]; + const size_t expected_bits = entry_bits * num_entries; + const size_t expected_size_bytes = + data_offset_bytes + wholeBytesToHoldBits(expected_bits); + checkMemorySize(ptr, size, expected_size_bytes, dump_on_error); + return { + false, + true, + seed_hash, + num_entries, + theta, + reinterpret_cast(ptr) + data_offset_bytes, + entry_bits}; + } + case 3: { + uint64_t theta = ThetaConstants::MAX_THETA; + const uint16_t seed_hash = + reinterpret_cast(ptr)[COMPACT_SKETCH_SEED_HASH_U16]; + if (reinterpret_cast(ptr)[COMPACT_SKETCH_FLAGS_BYTE] & + (1 << COMPACT_SKETCH_IS_EMPTY_FLAG)) { + return {true, true, seed_hash, 0, theta, nullptr, 64}; + } + checker::checkSeedHash(seed_hash, compute_seed_hash(seed)); + const bool has_theta = reinterpret_cast( + ptr)[COMPACT_SKETCH_PRE_LONGS_BYTE] > 2; + if (has_theta) { + checkMemorySize( + ptr, + size, + (COMPACT_SKETCH_THETA_U64 + 1) * sizeof(uint64_t), + dump_on_error); + theta = + reinterpret_cast(ptr)[COMPACT_SKETCH_THETA_U64]; + } + if (reinterpret_cast( + ptr)[COMPACT_SKETCH_PRE_LONGS_BYTE] == 1) { + checkMemorySize(ptr, size, 16, dump_on_error); + return { + false, + true, + seed_hash, + 1, + theta, + reinterpret_cast(ptr) + + COMPACT_SKETCH_SINGLE_ENTRY_U64, + 64}; + } + const uint32_t num_entries = reinterpret_cast( + ptr)[COMPACT_SKETCH_NUM_ENTRIES_U32]; + const size_t entries_start_u64 = has_theta + ? COMPACT_SKETCH_ENTRIES_ESTIMATION_U64 + : COMPACT_SKETCH_ENTRIES_EXACT_U64; + const uint64_t* entries = + reinterpret_cast(ptr) + entries_start_u64; + const size_t expected_size_bytes = + (entries_start_u64 + num_entries) * sizeof(uint64_t); + checkMemorySize(ptr, size, expected_size_bytes, dump_on_error); + const bool is_ordered = + reinterpret_cast(ptr)[COMPACT_SKETCH_FLAGS_BYTE] & + (1 << COMPACT_SKETCH_IS_ORDERED_FLAG); + return {false, is_ordered, seed_hash, num_entries, theta, entries, 64}; + } + case 1: { + uint16_t seed_hash = compute_seed_hash(seed); + const uint32_t num_entries = reinterpret_cast( + ptr)[COMPACT_SKETCH_NUM_ENTRIES_U32]; + uint64_t theta = + reinterpret_cast(ptr)[COMPACT_SKETCH_THETA_U64]; + bool is_empty = + (num_entries == 0) && (theta == ThetaConstants::MAX_THETA); + if (is_empty) + return {true, true, seed_hash, 0, theta, nullptr, 64}; + const uint64_t* entries = reinterpret_cast(ptr) + + COMPACT_SKETCH_ENTRIES_ESTIMATION_U64; + const size_t expected_size_bytes = + (COMPACT_SKETCH_ENTRIES_ESTIMATION_U64 + num_entries) * + sizeof(uint64_t); + checkMemorySize(ptr, size, expected_size_bytes, dump_on_error); + return {false, true, seed_hash, num_entries, theta, entries, 64}; + } + case 2: { + uint8_t preamble_size = + reinterpret_cast(ptr)[COMPACT_SKETCH_PRE_LONGS_BYTE]; + const uint16_t seed_hash = + reinterpret_cast(ptr)[COMPACT_SKETCH_SEED_HASH_U16]; + checker::checkSeedHash(seed_hash, compute_seed_hash(seed)); + if (preamble_size == 1) { + return { + true, true, seed_hash, 0, ThetaConstants::MAX_THETA, nullptr, 64}; + } else if (preamble_size == 2) { + const uint32_t num_entries = reinterpret_cast( + ptr)[COMPACT_SKETCH_NUM_ENTRIES_U32]; + if (num_entries == 0) { + return { + true, true, seed_hash, 0, ThetaConstants::MAX_THETA, nullptr, 64}; + } else { + const size_t expected_size_bytes = (preamble_size + num_entries) << 3; + checkMemorySize(ptr, size, expected_size_bytes, dump_on_error); + const uint64_t* entries = reinterpret_cast(ptr) + + COMPACT_SKETCH_ENTRIES_EXACT_U64; + return { + false, + true, + seed_hash, + num_entries, + ThetaConstants::MAX_THETA, + entries, + 64}; + } + } else if (preamble_size == 3) { + const uint32_t num_entries = reinterpret_cast( + ptr)[COMPACT_SKETCH_NUM_ENTRIES_U32]; + uint64_t theta = + reinterpret_cast(ptr)[COMPACT_SKETCH_THETA_U64]; + bool is_empty = + (num_entries == 0) && (theta == ThetaConstants::MAX_THETA); + if (is_empty) + return {true, true, seed_hash, 0, theta, nullptr, 64}; + const uint64_t* entries = reinterpret_cast(ptr) + + COMPACT_SKETCH_ENTRIES_ESTIMATION_U64; + const size_t expected_size_bytes = + (COMPACT_SKETCH_ENTRIES_ESTIMATION_U64 + num_entries) * + sizeof(uint64_t); + checkMemorySize(ptr, size, expected_size_bytes, dump_on_error); + return {false, true, seed_hash, num_entries, theta, entries, 64}; + } else { + throw VeloxUserError( + __FILE__, + __LINE__, + __FUNCTION__, + "", + " longs of premable, but expected 1, 2, or 3", + error_source::kErrorSourceUser, + error_code::kInvalidArgument, + false /*retriable*/); + } + } + default: + throw VeloxUserError( + __FILE__, + __LINE__, + __FUNCTION__, + "", + "unsupported serial version " + std::to_string(serial_version), + error_source::kErrorSourceUser, + error_code::kInvalidArgument, + false /*retriable*/); + } +} + +template +void CompactThetaSketchParser::checkMemorySize( + const void* ptr, + size_t actual_bytes, + size_t expected_bytes, + bool dump_on_error) { + if (actual_bytes < expected_bytes) { + auto msg = "at least " + std::to_string(expected_bytes) + + " bytes expected, actual " + std::to_string(actual_bytes) + + (dump_on_error + ? (", sketch dump: " + + hexDump(reinterpret_cast(ptr), actual_bytes)) + : ""); + throw VeloxUserError( + __FILE__, + __LINE__, + __FUNCTION__, + "", + msg, + error_source::kErrorSourceUser, + error_code::kInvalidArgument, + false /*retriable*/); + } +} + +template +std::string CompactThetaSketchParser::hexDump( + const uint8_t* ptr, + size_t size) { + std::stringstream s; + s << std::hex << std::setfill('0') << std::uppercase; + for (size_t i = 0; i < size; ++i) + s << std::setw(2) << (ptr[i] & 0xff); + return s.str(); +} + +} // namespace facebook::velox::common::theta + +#endif diff --git a/velox/external/theta/CompactThetaSketchParser.h b/velox/external/theta/CompactThetaSketchParser.h new file mode 100644 index 00000000000..64867ed61c5 --- /dev/null +++ b/velox/external/theta/CompactThetaSketchParser.h @@ -0,0 +1,82 @@ +/* +* Copyright (c) Facebook, Inc. and its affiliates. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. + */ + +// Adapted from Apache DataSketches +#pragma once + +#include + +namespace facebook::velox::common::theta { + +template +T wholeBytesToHoldBits(T bits) { + static_assert(std::is_integral::value, "integral type expected"); + return (bits >> 3) + ((bits & 7) > 0); +} + +template +class CompactThetaSketchParser { + public: + struct CompactThetaSketchData { + bool isEmpty; + bool isOrdered; + uint16_t seedHash; + uint32_t numEntries; + uint64_t theta; + const void* entriesStartPtr; + uint8_t entryBits; + }; + + static CompactThetaSketchData parse( + const void* ptr, + size_t size, + uint64_t seed, + bool dump_on_error = false); + + private: + // offsets are in sizeof(type) + static const size_t COMPACT_SKETCH_PRE_LONGS_BYTE = 0; + static const size_t COMPACT_SKETCH_SERIAL_VERSION_BYTE = 1; + static const size_t COMPACT_SKETCH_TYPE_BYTE = 2; + static const size_t COMPACT_SKETCH_FLAGS_BYTE = 5; + static const size_t COMPACT_SKETCH_SEED_HASH_U16 = 3; + static const size_t COMPACT_SKETCH_SINGLE_ENTRY_U64 = 1; // ver 3 + static const size_t COMPACT_SKETCH_NUM_ENTRIES_U32 = 2; // ver 1-3 + static const size_t COMPACT_SKETCH_ENTRIES_EXACT_U64 = 2; // ver 1-3 + static const size_t COMPACT_SKETCH_ENTRIES_ESTIMATION_U64 = 3; // ver 1-3 + static const size_t COMPACT_SKETCH_THETA_U64 = 2; // ver 1-3 + static const size_t COMPACT_SKETCH_V4_ENTRY_BITS_BYTE = 3; + static const size_t COMPACT_SKETCH_V4_NUM_ENTRIES_BYTES_BYTE = 4; + static const size_t COMPACT_SKETCH_V4_THETA_U64 = 1; + static const size_t COMPACT_SKETCH_V4_PACKED_DATA_EXACT_BYTE = 8; + static const size_t COMPACT_SKETCH_V4_PACKED_DATA_ESTIMATION_BYTE = 16; + + static const uint8_t COMPACT_SKETCH_IS_EMPTY_FLAG = 2; + static const uint8_t COMPACT_SKETCH_IS_ORDERED_FLAG = 4; + + static const uint8_t COMPACT_SKETCH_TYPE = 3; + + static void checkMemorySize( + const void* ptr, + size_t actual_bytes, + size_t expected_bytes, + bool dump_on_error); + static std::string hexDump(const uint8_t* ptr, size_t size); +}; + +} // namespace facebook::velox::common::theta + +#include "CompactThetaSketchParser.cpp" diff --git a/velox/external/theta/ConditionalForward.h b/velox/external/theta/ConditionalForward.h new file mode 100644 index 00000000000..f90e920fddb --- /dev/null +++ b/velox/external/theta/ConditionalForward.h @@ -0,0 +1,81 @@ +/* +* Copyright (c) Facebook, Inc. and its affiliates. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. + */ + +// Adapted from Apache DataSketches + +#pragma once + +#include +#include + +namespace facebook::velox::common::theta { + +// Forward type T2 as rvalue reference if type T1 is rvalue reference + +template +using fwdType = typename std::conditional< + std::is_lvalue_reference::value, + T2, + typename std::remove_reference::type&&>::type; + +template +fwdType conditionalForward(T2&& value) { + return std::forward>(std::forward(value)); +} + +// Forward container as iterators + +template +auto forwardBegin(Container&& c) -> typename std::enable_if< + std::is_lvalue_reference::value || + std::is_same< + typename std::remove_reference::type::const_iterator, + decltype(c.begin())>::value, + decltype(c.begin())>::type { + return c.begin(); +} + +template +auto forwardBegin(Container&& c) -> typename std::enable_if< + !std::is_lvalue_reference::value && + !std::is_same< + typename std::remove_reference::type::const_iterator, + decltype(c.begin())>::value, + decltype(std::make_move_iterator(c.begin()))>::type { + return std::make_move_iterator(c.begin()); +} + +template +auto forwardEnd(Container&& c) -> typename std::enable_if< + std::is_lvalue_reference::value || + std::is_same< + typename std::remove_reference::type::const_iterator, + decltype(c.begin())>::value, + decltype(c.end())>::type { + return c.end(); +} + +template +auto forwardEnd(Container&& c) -> typename std::enable_if< + !std::is_lvalue_reference::value && + !std::is_same< + typename std::remove_reference::type::const_iterator, + decltype(c.begin())>::value, + decltype(std::make_move_iterator(c.end()))>::type { + return std::make_move_iterator(c.end()); +} + +} // namespace facebook::velox::common::theta diff --git a/velox/external/theta/CountZeros.h b/velox/external/theta/CountZeros.h new file mode 100644 index 00000000000..c898b16a52d --- /dev/null +++ b/velox/external/theta/CountZeros.h @@ -0,0 +1,109 @@ +/* +* Copyright (c) Facebook, Inc. and its affiliates. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. + */ + +// Adapted from Apache DataSketches + +#pragma once + +#include + +namespace facebook::velox::common::theta { + +static const uint8_t byteLeadingZerosTable[256] = { + 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + +static const uint8_t byteTrailingZerosTable[256] = { + 8, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, + 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, + 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, + 3, 0, 1, 0, 2, 0, 1, 0, 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, + 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, + 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0}; + +static const uint64_t FCLZ_MASK_56 = 0x00ffffffffffffff; +static const uint64_t FCLZ_MASK_48 = 0x0000ffffffffffff; +static const uint64_t FCLZ_MASK_40 = 0x000000ffffffffff; +static const uint64_t FCLZ_MASK_32 = 0x00000000ffffffff; +static const uint64_t FCLZ_MASK_24 = 0x0000000000ffffff; +static const uint64_t FCLZ_MASK_16 = 0x000000000000ffff; +static const uint64_t FCLZ_MASK_08 = 0x00000000000000ff; + +static inline uint8_t countLeadingZerosInU64(uint64_t input) { + if (input > FCLZ_MASK_56) + return byteLeadingZerosTable[(input >> 56) & FCLZ_MASK_08]; + if (input > FCLZ_MASK_48) + return 8 + byteLeadingZerosTable[(input >> 48) & FCLZ_MASK_08]; + if (input > FCLZ_MASK_40) + return 16 + byteLeadingZerosTable[(input >> 40) & FCLZ_MASK_08]; + if (input > FCLZ_MASK_32) + return 24 + byteLeadingZerosTable[(input >> 32) & FCLZ_MASK_08]; + if (input > FCLZ_MASK_24) + return 32 + byteLeadingZerosTable[(input >> 24) & FCLZ_MASK_08]; + if (input > FCLZ_MASK_16) + return 40 + byteLeadingZerosTable[(input >> 16) & FCLZ_MASK_08]; + if (input > FCLZ_MASK_08) + return 48 + byteLeadingZerosTable[(input >> 8) & FCLZ_MASK_08]; + if (true) + return 56 + byteLeadingZerosTable[(input)&FCLZ_MASK_08]; +} + +static inline uint8_t countLeadingZerosInU32(uint32_t input) { + if (input > FCLZ_MASK_24) + return byteLeadingZerosTable[(input >> 24) & FCLZ_MASK_08]; + if (input > FCLZ_MASK_16) + return 8 + byteLeadingZerosTable[(input >> 16) & FCLZ_MASK_08]; + if (input > FCLZ_MASK_08) + return 16 + byteLeadingZerosTable[(input >> 8) & FCLZ_MASK_08]; + if (true) + return 24 + byteLeadingZerosTable[(input)&FCLZ_MASK_08]; +} + +static inline uint8_t countTrailingZerosInU32(uint32_t input) { + for (int i = 0; i < 4; i++) { + const int byte = input & 0xff; + if (byte != 0) + return static_cast((i << 3) + byteLeadingZerosTable[byte]); + input >>= 8; + } + return 32; +} + +static inline uint8_t countTrailingZerosInU64(uint64_t input) { + for (int i = 0; i < 8; i++) { + const int byte = input & 0xff; + if (byte != 0) + return static_cast((i << 3) + byteLeadingZerosTable[byte]); + input >>= 8; + } + return 64; +} + +} // namespace facebook::velox::common::theta diff --git a/velox/external/theta/MemoryOperations.h b/velox/external/theta/MemoryOperations.h new file mode 100644 index 00000000000..81c2e972b3f --- /dev/null +++ b/velox/external/theta/MemoryOperations.h @@ -0,0 +1,90 @@ +/* +* Copyright (c) Facebook, Inc. and its affiliates. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. + */ + +// Adapted from Apache DataSketches + +#pragma once + +#include "velox/common/base/Exceptions.h" +#include +#include +#include +#include +#include + +namespace facebook::velox::common::theta { + +static inline void ensureMinimumMemory( + size_t bytes_available, + size_t min_needed) { + if (bytes_available < min_needed) { + auto msg = "Insufficient buffer size detected: bytes available " + + std::to_string(bytes_available) + ", minimum needed " + + std::to_string(min_needed); + throw VeloxRuntimeError( + __FILE__, + __LINE__, + __FUNCTION__, + "", + msg, + error_source::kErrorSourceUser, + error_code::kGenericUserError, + false /*retriable*/); + } +} + +static inline void checkMemorySize(size_t requested_index, size_t capacity) { + if (requested_index > capacity) { + auto msg = "Attempt to access memory beyond limits: requested index " + + std::to_string(requested_index) + ", capacity " + + std::to_string(capacity); + throw VeloxRuntimeError( + __FILE__, + __LINE__, + __FUNCTION__, + "", + msg, + error_source::kErrorSourceUser, + error_code::kGenericUserError, + false /*retriable*/); + } +} + +// note: size is in bytes, not items +static inline size_t copyFromMem(const void* src, void* dst, size_t size) { + memcpy(dst, src, size); + return size; +} + +// note: size is in bytes, not items +static inline size_t copyToMem(const void* src, void* dst, size_t size) { + memcpy(dst, src, size); + return size; +} + +template +static inline size_t copyFromMem(const void* src, T& item) { + memcpy(&item, src, sizeof(T)); + return sizeof(T); +} + +template +static inline size_t copyToMem(T item, void* dst) { + memcpy(dst, &item, sizeof(T)); + return sizeof(T); +} + +} // namespace facebook::velox::common::theta diff --git a/velox/external/theta/MurmurHash3.h b/velox/external/theta/MurmurHash3.h new file mode 100644 index 00000000000..bc5dd67fd76 --- /dev/null +++ b/velox/external/theta/MurmurHash3.h @@ -0,0 +1,221 @@ +/* +* Copyright (c) Facebook, Inc. and its affiliates. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. + */ + +// Adapted from Apache DataSketches + +// Minimally modified from Austin Applebee's code: +// * Removed MurmurHash3_x86_32 and MurmurHash3_x86_128 +// * Changed input seed in MurmurHash3_x64_128 to uint64_t +// * Define and use HashState reference to return result +// * Made entire hash function defined inline +// * Added compute_seed_hash +//----------------------------------------------------------------------------- +// MurmurHash3 was written by Austin Appleby, and is placed in the public +// domain. The author hereby disclaims copyright to this source code. + +// Note - The x86 and x64 versions do _not_ produce the same results, as the +// algorithms are optimized for their respective platforms. You can still +// compile and run any of them on any platform, but your performance with the +// non-native version will be less than optimal. + +#pragma once + +#include + +//----------------------------------------------------------------------------- +// Platform-specific functions and macros + +// Microsoft Visual Studio + +#if defined(_MSC_VER) + +typedef unsigned char uint8_t; +typedef unsigned int uint32_t; +typedef unsigned __int64 uint64_t; + +#define MURMUR3_FORCE_INLINE __forceinline + +#include + +#define MURMUR3_ROTL64(x, y) _rotl64(x, y) + +#define MURMUR3_BIG_CONSTANT(x) (x) + +// Other compilers + +#else // defined(_MSC_VER) + +#include + +#define MURMUR3_FORCE_INLINE inline __attribute__((always_inline)) + +inline uint64_t rotl64(uint64_t x, int8_t r) { + return (x << r) | (x >> (64 - r)); +} + +#define MURMUR3_ROTL64(x, y) rotl64(x, y) + +#define MURMUR3_BIG_CONSTANT(x) (x##LLU) + +#endif // !defined(_MSC_VER) + +//----------------------------------------------------------------------------- + +//----------------------------------------------------------------------------- +// Return type - Using C++ reference for return type which should allow better +// compiler optimization than a void* pointer +typedef struct { + uint64_t h1; + uint64_t h2; +} HashState; + +//----------------------------------------------------------------------------- +// Block read - if your platform needs to do endian-swapping or can only +// handle aligned reads, do the conversion here + +MURMUR3_FORCE_INLINE uint64_t getblock64(const uint8_t* p, size_t i) { + uint64_t res; + memcpy(&res, p + i * sizeof(uint64_t), sizeof(res)); + return res; +} + +//----------------------------------------------------------------------------- +// Finalization mix - force all bits of a hash block to avalanche + +MURMUR3_FORCE_INLINE uint64_t fmix64(uint64_t k) { + k ^= k >> 33; + k *= MURMUR3_BIG_CONSTANT(0xff51afd7ed558ccd); + k ^= k >> 33; + k *= MURMUR3_BIG_CONSTANT(0xc4ceb9fe1a85ec53); + k ^= k >> 33; + + return k; +} + +MURMUR3_FORCE_INLINE void MurmurHash3_x64_128( + const void* key, + size_t lenBytes, + uint64_t seed, + HashState& out) { + static const uint64_t c1 = MURMUR3_BIG_CONSTANT(0x87c37b91114253d5); + static const uint64_t c2 = MURMUR3_BIG_CONSTANT(0x4cf5ad432745937f); + + const uint8_t* data = (const uint8_t*)key; + + out.h1 = seed; + out.h2 = seed; + + // Number of full 128-bit blocks of 16 bytes. + // Possible exclusion of a remainder of up to 15 bytes. + const size_t nblocks = lenBytes >> 4; // bytes / 16 + + // Process the 128-bit blocks (the body) into the hash + for (size_t i = 0; i < nblocks; ++i) { // 16 bytes per block + uint64_t k1 = getblock64(data, i * 2 + 0); + uint64_t k2 = getblock64(data, i * 2 + 1); + + k1 *= c1; + k1 = MURMUR3_ROTL64(k1, 31); + k1 *= c2; + out.h1 ^= k1; + out.h1 = MURMUR3_ROTL64(out.h1, 27); + out.h1 += out.h2; + out.h1 = out.h1 * 5 + 0x52dce729; + + k2 *= c2; + k2 = MURMUR3_ROTL64(k2, 33); + k2 *= c1; + out.h2 ^= k2; + out.h2 = MURMUR3_ROTL64(out.h2, 31); + out.h2 += out.h1; + out.h2 = out.h2 * 5 + 0x38495ab5; + } + + // tail + const uint8_t* tail = (const uint8_t*)(data + (nblocks << 4)); + + uint64_t k1 = 0; + uint64_t k2 = 0; + + switch (lenBytes & 15) { + case 15: + k2 ^= ((uint64_t)tail[14]) << 48; // falls through + case 14: + k2 ^= ((uint64_t)tail[13]) << 40; // falls through + case 13: + k2 ^= ((uint64_t)tail[12]) << 32; // falls through + case 12: + k2 ^= ((uint64_t)tail[11]) << 24; // falls through + case 11: + k2 ^= ((uint64_t)tail[10]) << 16; // falls through + case 10: + k2 ^= ((uint64_t)tail[9]) << 8; // falls through + case 9: + k2 ^= ((uint64_t)tail[8]) << 0; + k2 *= c2; + k2 = MURMUR3_ROTL64(k2, 33); + k2 *= c1; + out.h2 ^= k2; + // falls through + case 8: + k1 ^= ((uint64_t)tail[7]) << 56; // falls through + case 7: + k1 ^= ((uint64_t)tail[6]) << 48; // falls through + case 6: + k1 ^= ((uint64_t)tail[5]) << 40; // falls through + case 5: + k1 ^= ((uint64_t)tail[4]) << 32; // falls through + case 4: + k1 ^= ((uint64_t)tail[3]) << 24; // falls through + case 3: + k1 ^= ((uint64_t)tail[2]) << 16; // falls through + case 2: + k1 ^= ((uint64_t)tail[1]) << 8; // falls through + case 1: + k1 ^= ((uint64_t)tail[0]) << 0; + k1 *= c1; + k1 = MURMUR3_ROTL64(k1, 31); + k1 *= c2; + out.h1 ^= k1; + }; + + //---------- + // finalization + + out.h1 ^= lenBytes; + out.h2 ^= lenBytes; + + out.h1 += out.h2; + out.h2 += out.h1; + + out.h1 = fmix64(out.h1); + out.h2 = fmix64(out.h2); + + out.h1 += out.h2; + out.h2 += out.h1; +} + +//----------------------------------------------------------------------------- + +MURMUR3_FORCE_INLINE uint16_t compute_seed_hash(uint64_t seed) { + HashState hashes; + MurmurHash3_x64_128(&seed, sizeof(seed), 0, hashes); + return static_cast(hashes.h1 & 0xffff); +} + +#undef MURMUR3_FORCE_INLINE +#undef MURMUR3_ROTL64 +#undef MURMUR3_BIG_CONSTANT diff --git a/velox/external/theta/Serde.h b/velox/external/theta/Serde.h new file mode 100644 index 00000000000..3215e62a866 --- /dev/null +++ b/velox/external/theta/Serde.h @@ -0,0 +1,297 @@ +/* +* Copyright (c) Facebook, Inc. and its affiliates. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. + */ + +// Adapted from Apache DataSketches + +#pragma once + +#include +#include +#include +#include +#include + +#include "MemoryOperations.h" +#include "velox/common/base/Exceptions.h" + +namespace facebook::velox::common::theta { + +/// Interface for serializing and deserializing items +template +struct serde { + /** + * Stream serialization + * @param os output stream + * @param items pointer to array of items + * @param num number of items + */ + void serialize(std::ostream& os, const T* items, unsigned num) const; + + /** + * Stream deserialization + * @param is input stream + * @param items pointer to array of items (items in the array are allocated + * but not initialized) + * @param num number of items + */ + void deserialize(std::istream& is, T* items, unsigned num) const; + + /** + * Raw bytes serialization + * @param ptr pointer to output buffer + * @param capacity size of the buffer in bytes + * @param items pointer to array of items + * @param num number of items + */ + size_t serialize(void* ptr, size_t capacity, const T* items, unsigned num) + const; + + /** + * Raw bytes deserialization + * @param ptr pointer to input buffer + * @param capacity size of the buffer in bytes + * @param items pointer to array of items (items in the array are allocated + * but not initialized) + * @param num number of items + */ + size_t deserialize(const void* ptr, size_t capacity, T* items, unsigned num) + const; + + /** + * Size of the given item + * @param item to be sized + * @return size of the given item in bytes + */ + size_t size_of_item(const T& item) const; +}; + +/// serde for all fixed-size arithmetic types (int and float of different +/// sizes). in particular, kll_sketch should produce sketches +/// binary-compatible with LongsSketch and ItemsSketch with +/// ArrayOfLongsSerDe in Java +template +struct serde::value>::type> { + /// @copydoc serde::serialize + void serialize(std::ostream& os, const T* items, unsigned num) const { + bool failure = false; + try { + os.write(reinterpret_cast(items), sizeof(T) * num); + } catch (std::ostream::failure&) { + failure = true; + } + if (failure || !os.good()) { + throw VeloxRuntimeError( + __FILE__, + __LINE__, + __FUNCTION__, + "", + "error writing to std::ostream with " + std::to_string(num) + + " items", + error_source::kErrorSourceUser, + error_code::kGenericUserError, + false /*retriable*/); + } + } + + void deserialize(std::istream& is, T* items, unsigned num) const { + bool failure = false; + try { + is.read((char*)items, sizeof(T) * num); + } catch (std::istream::failure&) { + failure = true; + } + if (failure || !is.good()) { + throw VeloxRuntimeError( + __FILE__, + __LINE__, + __FUNCTION__, + "", + "error reading from std::istream with " + std::to_string(num) + + " items", + error_source::kErrorSourceUser, + error_code::kGenericUserError, + false /*retriable*/); + } + } + + /// @copydoc serde::serialize(void*,size_t,const T*,unsigned) const + size_t serialize(void* ptr, size_t capacity, const T* items, unsigned num) + const { + const size_t bytes_written = sizeof(T) * num; + checkMemorySize(bytes_written, capacity); + memcpy(ptr, items, bytes_written); + return bytes_written; + } + + /// @copydoc serde::deserialize(const void*,size_t,T*,unsigned) const + size_t deserialize(const void* ptr, size_t capacity, T* items, unsigned num) + const { + const size_t bytes_read = sizeof(T) * num; + checkMemorySize(bytes_read, capacity); + memcpy(items, ptr, bytes_read); + return bytes_read; + } + + /// @copydoc serde::size_of_item + size_t size_of_item(const T& item) const { + unused(item); + return sizeof(T); + } +}; + +/// serde for std::string items. +/// This should produce sketches binary-compatible with +/// ItemsSketch with ArrayOfStringsSerDe in Java. +/// The length of each string is stored as a 32-bit integer (historically), +/// which may be too wasteful. Treat this as an example. +template <> +struct serde { + /// @copydoc serde::serialize + void serialize(std::ostream& os, const std::string* items, unsigned num) + const { + unsigned i = 0; + bool failure = false; + try { + for (; i < num && os.good(); i++) { + uint32_t length = static_cast(items[i].size()); + os.write((char*)&length, sizeof(length)); + os.write(items[i].c_str(), length); + } + } catch (std::ostream::failure&) { + failure = true; + } + if (failure || !os.good()) { + throw VeloxRuntimeError( + __FILE__, + __LINE__, + __FUNCTION__, + "", + "error writing to std::ostream at item " + std::to_string(i), + error_source::kErrorSourceUser, + error_code::kGenericUserError, + false /*retriable*/); + } + } + + /// @copydoc serde::deserialize + void deserialize(std::istream& is, std::string* items, unsigned num) const { + unsigned i = 0; + bool failure = false; + try { + for (; i < num; i++) { + uint32_t length; + is.read((char*)&length, sizeof(length)); + if (!is.good()) { + break; + } + std::string str; + str.reserve(length); + for (uint32_t j = 0; j < length; j++) { + str.push_back(static_cast(is.get())); + } + if (!is.good()) { + break; + } + new (&items[i]) std::string(std::move(str)); + } + } catch (std::istream::failure&) { + failure = true; + } + if (failure || !is.good()) { + // clean up what we've already allocated + for (unsigned j = 0; j < i; ++j) { + items[j].~basic_string(); + } + throw VeloxRuntimeError( + __FILE__, + __LINE__, + __FUNCTION__, + "", + "error reading from std::istream at item " + std::to_string(i), + error_source::kErrorSourceUser, + error_code::kGenericUserError, + false /*retriable*/); + } + } + + /// @copydoc serde::serialize(void*,size_t,const T*,unsigned) const + size_t serialize( + void* ptr, + size_t capacity, + const std::string* items, + unsigned num) const { + size_t bytes_written = 0; + for (unsigned i = 0; i < num; ++i) { + const uint32_t length = static_cast(items[i].size()); + const size_t new_bytes = length + sizeof(length); + checkMemorySize(bytes_written + new_bytes, capacity); + memcpy(ptr, &length, sizeof(length)); + ptr = static_cast(ptr) + sizeof(uint32_t); + memcpy(ptr, items[i].c_str(), length); + ptr = static_cast(ptr) + length; + bytes_written += new_bytes; + } + return bytes_written; + } + + /// @copydoc serde::deserialize(const void*,size_t,T*,unsigned) const + size_t deserialize( + const void* ptr, + size_t capacity, + std::string* items, + unsigned num) const { + size_t bytes_read = 0; + unsigned i = 0; + bool failure = false; + for (; i < num && !failure; ++i) { + uint32_t length; + if (bytes_read + sizeof(length) > capacity) { + bytes_read += sizeof(length); // we'll use this to report the error + failure = true; + break; + } + memcpy(&length, ptr, sizeof(length)); + ptr = static_cast(ptr) + sizeof(uint32_t); + bytes_read += sizeof(length); + + if (bytes_read + length > capacity) { + bytes_read += length; // we'll use this to report the error + failure = true; + break; + } + new (&items[i]) std::string(static_cast(ptr), length); + ptr = static_cast(ptr) + length; + bytes_read += length; + } + + if (failure) { + // clean up what we've already allocated + for (unsigned j = 0; j < i; ++j) + items[j].~basic_string(); + // using this for a consistent error message + checkMemorySize(bytes_read, capacity); + } + + return bytes_read; + } + + /// @copydoc serde::size_of_item + size_t size_of_item(const std::string& item) const { + return sizeof(uint32_t) + item.size(); + } +}; + +} // namespace facebook::velox::common::theta diff --git a/velox/external/theta/ThetaComparators.h b/velox/external/theta/ThetaComparators.h new file mode 100644 index 00000000000..e35eb96578c --- /dev/null +++ b/velox/external/theta/ThetaComparators.h @@ -0,0 +1,48 @@ +/* +* Copyright (c) Facebook, Inc. and its affiliates. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. + */ + +// Adapted from Apache DataSketches + +#pragma once + +#include + +namespace facebook::velox::common::theta { + +template +struct compareByKey { + template + bool operator()(Entry1&& a, Entry2&& b) const { + return ExtractKey()(std::forward(a)) < + ExtractKey()(std::forward(b)); + } +}; + +// less than + +template +class KeyLessThan { + public: + explicit KeyLessThan(const Key& key) : key(key) {} + bool operator()(const Entry& entry) const { + return ExtractKey()(entry) < this->key; + } + + private: + Key key; +}; + +} // namespace facebook::velox::common::theta diff --git a/velox/external/theta/ThetaConstants.h b/velox/external/theta/ThetaConstants.h new file mode 100644 index 00000000000..e59a2b6c61f --- /dev/null +++ b/velox/external/theta/ThetaConstants.h @@ -0,0 +1,43 @@ +/* +* Copyright (c) Facebook, Inc. and its affiliates. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. + */ + +// Adapted from Apache DataSketches + +#pragma once + +#include +#include "CommonDefs.h" + +namespace facebook::velox::common::theta { + +/// Theta constants +namespace ThetaConstants { +/// hash table resize factor +using resizeFactor = facebook::velox::common::theta::resizeFactor; +/// default resize factor +const resizeFactor DEFAULT_RESIZE_FACTOR = resizeFactor::X8; + +/// max theta - signed max for compatibility with Java +const uint64_t MAX_THETA = LLONG_MAX; +/// min log2 of K +const uint8_t MIN_LG_K = 5; +/// max log2 of K +const uint8_t MAX_LG_K = 26; +/// default log2 of K +const uint8_t DEFAULT_LG_K = 12; +} // namespace ThetaConstants + +} // namespace facebook::velox::common::theta diff --git a/velox/external/theta/ThetaHelpers.h b/velox/external/theta/ThetaHelpers.h new file mode 100644 index 00000000000..27c32f8cb10 --- /dev/null +++ b/velox/external/theta/ThetaHelpers.h @@ -0,0 +1,83 @@ +/* +* Copyright (c) Facebook, Inc. and its affiliates. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. + */ + +// Adapted from Apache DataSketches + +#pragma once + +#include +#include + +#include "ThetaConstants.h" +#include "velox/common/base/Exceptions.h" + +namespace facebook::velox::common::theta { + +template +static void checkValue(T actual, T expected, const char* description) { + if (actual != expected) { + auto msg = std::string(description) + " mismatch: expected " + + std::to_string(expected) + ", actual " + std::to_string(actual); + throw VeloxUserError( + __FILE__, + __LINE__, + __FUNCTION__, + "", + msg, + error_source::kErrorSourceUser, + error_code::kInvalidArgument, + false /*retriable*/); + } +} + +template +class checker { + public: + static void checkSerialVersion(uint8_t actual, uint8_t expected) { + checkValue(actual, expected, "serial version"); + } + static void checkSketchFamily(uint8_t actual, uint8_t expected) { + checkValue(actual, expected, "sketch family"); + } + static void checkSketchType(uint8_t actual, uint8_t expected) { + checkValue(actual, expected, "sketch type"); + } + static void checkSeedHash(uint16_t actual, uint16_t expected) { + checkValue(actual, expected, "seed hash"); + } +}; + +template +class ThetaBuildHelper { + public: + // consistent way of initializing theta from p + // avoids multiplication if p == 1 since it might not yield MAX_THETA exactly + static uint64_t startingThetaFromP(float p) { + if (p < 1) + return static_cast( + static_cast(ThetaConstants::MAX_THETA) * p); + return ThetaConstants::MAX_THETA; + } + + static uint8_t + startingSubMultiple(uint8_t lg_tgt, uint8_t lg_min, uint8_t lg_rf) { + return (lg_tgt <= lg_min) ? lg_min + : (lg_rf == 0) ? lg_tgt + : ((lg_tgt - lg_min) % lg_rf) + lg_min; + } +}; + +} // namespace facebook::velox::common::theta diff --git a/velox/external/theta/ThetaSketch.cpp b/velox/external/theta/ThetaSketch.cpp new file mode 100644 index 00000000000..e72929268f7 --- /dev/null +++ b/velox/external/theta/ThetaSketch.cpp @@ -0,0 +1,1122 @@ +/* +* Copyright (c) Facebook, Inc. and its affiliates. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. + */ + +// Adapted from Apache DataSketches + +#ifndef THETA_SKETCH_CPP +#define THETA_SKETCH_CPP + +#include +#include + +#include "BinomialBounds.h" +#include "BitPacking.h" +#include "CompactThetaSketchParser.h" +#include "CountZeros.h" +#include "MemoryOperations.h" +#include "ThetaSketch.h" +#include "velox/common/base/Exceptions.h" + +namespace facebook::velox::common::theta { + +template +bool BaseThetaSketchAlloc::isEstimationMode() const { + return getTheta64() < ThetaConstants::MAX_THETA && !isEmpty(); +} + +template +double BaseThetaSketchAlloc::getTheta() const { + return static_cast(getTheta64()) / + static_cast(ThetaConstants::MAX_THETA); +} + +template +double BaseThetaSketchAlloc::getEstimate() const { + return getNumRetained() / getTheta(); +} + +template +double BaseThetaSketchAlloc::getLowerBound(uint8_t num_std_devs) const { + if (!isEstimationMode()) + return getNumRetained(); + return BinomialBounds::getLowerBound( + getNumRetained(), getTheta(), num_std_devs); +} + +template +double BaseThetaSketchAlloc::getUpperBound(uint8_t num_std_devs) const { + if (!isEstimationMode()) + return getNumRetained(); + return BinomialBounds::getUpperBound( + getNumRetained(), getTheta(), num_std_devs); +} + +template +string BaseThetaSketchAlloc::toString(bool print_details) const { + // Using a temporary stream for implementation here does not comply with + // AllocatorAwareContainer requirements. The stream does not support passing + // an allocator instance, and alternatives are complicated. + std::ostringstream os; + os << "### Theta sketch summary:" << std::endl; + os << " num retained entries : " << this->getNumRetained() << std::endl; + os << " seed hash : " << this->getSeedHash() << std::endl; + os << " empty? : " << (this->isEmpty() ? "true" : "false") + << std::endl; + os << " ordered? : " << (this->isOrdered() ? "true" : "false") + << std::endl; + os << " estimation mode? : " + << (this->isEstimationMode() ? "true" : "false") << std::endl; + os << " theta (fraction) : " << this->getTheta() << std::endl; + os << " theta (raw 64-bit) : " << this->getTheta64() << std::endl; + os << " estimate : " << this->getEstimate() << std::endl; + os << " lower bound 95% conf : " << this->getLowerBound(2) << std::endl; + os << " upper bound 95% conf : " << this->getUpperBound(2) << std::endl; + printSpecifics(os); + os << "### End sketch summary" << std::endl; + if (print_details) { + printItems(os); + } + return string(os.str().c_str(), this->getAllocator()); +} + +template +void ThetaSketchAlloc::printItems(std::ostringstream& os) const { + os << "### Retained entries" << std::endl; + for (const auto& hash : *this) { + os << hash << std::endl; + } + os << "### End retained entries" << std::endl; +} + +// update sketch + +template +UpdateThetaSketchAlloc::UpdateThetaSketchAlloc( + uint8_t lgCurSize, + uint8_t lgNomSize, + resizeFactor rf, + float p, + uint64_t theta, + uint64_t seed, + const A& allocator) + : table_(lgCurSize, lgNomSize, rf, p, theta, seed, allocator) {} + +template +A UpdateThetaSketchAlloc::getAllocator() const { + return table_.allocator_; +} + +template +bool UpdateThetaSketchAlloc::isEmpty() const { + return table_.isEmpty_; +} + +template +bool UpdateThetaSketchAlloc::isOrdered() const { + return table_.numEntries_ > 1 ? false : true; +} + +template +uint64_t UpdateThetaSketchAlloc::getTheta64() const { + return isEmpty() ? ThetaConstants::MAX_THETA : table_.theta_; +} + +template +uint32_t UpdateThetaSketchAlloc::getNumRetained() const { + return table_.numEntries_; +} + +template +uint16_t UpdateThetaSketchAlloc::getSeedHash() const { + return compute_seed_hash(table_.seed_); +} + +template +uint8_t UpdateThetaSketchAlloc::getLgK() const { + return table_.lgNomSize_; +} + +template +auto UpdateThetaSketchAlloc::getRf() const -> resizeFactor { + return table_.rf_; +} + +template +void UpdateThetaSketchAlloc::update(uint64_t value) { + update(&value, sizeof(value)); +} + +template +void UpdateThetaSketchAlloc::update(int64_t value) { + update(&value, sizeof(value)); +} + +template +void UpdateThetaSketchAlloc::update(uint32_t value) { + update(static_cast(value)); +} + +template +void UpdateThetaSketchAlloc::update(int32_t value) { + update(static_cast(value)); +} + +template +void UpdateThetaSketchAlloc::update(uint16_t value) { + update(static_cast(value)); +} + +template +void UpdateThetaSketchAlloc::update(int16_t value) { + update(static_cast(value)); +} + +template +void UpdateThetaSketchAlloc::update(uint8_t value) { + update(static_cast(value)); +} + +template +void UpdateThetaSketchAlloc::update(int8_t value) { + update(static_cast(value)); +} + +template +void UpdateThetaSketchAlloc::update(double value) { + update(canonical_double(value)); +} + +template +void UpdateThetaSketchAlloc::update(float value) { + update(static_cast(value)); +} + +template +void UpdateThetaSketchAlloc::update(const std::string& value) { + if (value.empty()) + return; + update(value.c_str(), value.length()); +} + +template +void UpdateThetaSketchAlloc::update(const void* data, size_t length) { + const uint64_t hash = table_.hashAndScreen(data, length); + if (hash == 0) + return; + auto result = table_.find(hash); + if (!result.second) { + table_.insert(result.first, hash); + } +} + +template +void UpdateThetaSketchAlloc::trim() { + table_.trim(); +} + +template +void UpdateThetaSketchAlloc::reset() { + table_.reset(); +} + +template +auto UpdateThetaSketchAlloc::begin() -> iterator { + return iterator(table_.entries_, 1 << table_.lgCurSize_, 0); +} + +template +auto UpdateThetaSketchAlloc::end() -> iterator { + return iterator(nullptr, 0, 1 << table_.lgCurSize_); +} + +template +auto UpdateThetaSketchAlloc::begin() const -> const_iterator { + return const_iterator(table_.entries_, 1 << table_.lgCurSize_, 0); +} + +template +auto UpdateThetaSketchAlloc::end() const -> const_iterator { + return const_iterator(nullptr, 0, 1 << table_.lgCurSize_); +} + +template +CompactThetaSketchAlloc UpdateThetaSketchAlloc::compact( + bool ordered) const { + return CompactThetaSketchAlloc(*this, ordered); +} + +template +void UpdateThetaSketchAlloc::printSpecifics(std::ostringstream& os) const { + os << " lg nominal size : " << static_cast(table_.lgNomSize_) + << std::endl; + os << " lg current size : " << static_cast(table_.lgCurSize_) + << std::endl; + os << " resize factor : " << (1 << table_.rf_) << std::endl; +} + +// builder + +template +UpdateThetaSketchAlloc::builder::builder(const A& allocator) + : ThetaBaseBuilder(allocator) {} + +template +UpdateThetaSketchAlloc UpdateThetaSketchAlloc::builder::build() const { + return UpdateThetaSketchAlloc( + this->startingLgSize(), + this->lg_k_, + this->rf_, + this->p_, + this->startingTheta(), + this->seed_, + this->allocator_); +} + +// compact sketch + +template +template +CompactThetaSketchAlloc::CompactThetaSketchAlloc( + const Other& other, + bool ordered) + : isEmpty_(other.isEmpty()), + isOrdered_(other.isOrdered() || ordered), + seedHash_(other.getSeedHash()), + theta_(other.getTheta64()), + entries_(other.getAllocator()) { + if (!other.isEmpty()) { + entries_.reserve(other.getNumRetained()); + std::copy(other.begin(), other.end(), std::back_inserter(entries_)); + if (ordered && !other.isOrdered()) + std::sort(entries_.begin(), entries_.end()); + } +} + +template +CompactThetaSketchAlloc::CompactThetaSketchAlloc( + bool isEmpty, + bool isOrdered, + uint16_t seedHash, + uint64_t theta, + std::vector&& entries) + : isEmpty_(isEmpty), + isOrdered_(isOrdered || (entries.size() <= 1ULL)), + seedHash_(seedHash), + theta_(theta), + entries_(std::move(entries)) {} + +template +A CompactThetaSketchAlloc::getAllocator() const { + return entries_.get_allocator(); +} + +template +bool CompactThetaSketchAlloc::isEmpty() const { + return isEmpty_; +} + +template +bool CompactThetaSketchAlloc::isOrdered() const { + return isOrdered_; +} + +template +uint64_t CompactThetaSketchAlloc::getTheta64() const { + return theta_; +} + +template +uint32_t CompactThetaSketchAlloc::getNumRetained() const { + return static_cast(entries_.size()); +} + +template +uint16_t CompactThetaSketchAlloc::getSeedHash() const { + return seedHash_; +} + +template +auto CompactThetaSketchAlloc::begin() -> iterator { + return iterator(entries_.data(), static_cast(entries_.size()), 0); +} + +template +auto CompactThetaSketchAlloc::end() -> iterator { + return iterator(nullptr, 0, static_cast(entries_.size())); +} + +template +auto CompactThetaSketchAlloc::begin() const -> const_iterator { + return const_iterator( + entries_.data(), static_cast(entries_.size()), 0); +} + +template +auto CompactThetaSketchAlloc::end() const -> const_iterator { + return const_iterator(nullptr, 0, static_cast(entries_.size())); +} + +template +void CompactThetaSketchAlloc::printSpecifics(std::ostringstream&) const {} + +template +uint8_t CompactThetaSketchAlloc::getPreambleLongs(bool compressed) const { + if (compressed) { + return this->isEstimationMode() ? 2 : 1; + } + return this->isEstimationMode() ? 3 + : this->isEmpty() || entries_.size() == 1 ? 1 + : 2; +} + +template +size_t CompactThetaSketchAlloc::getMaxSerializedSizeBytes(uint8_t lg_k) { + return sizeof(uint64_t) * + (3 + UpdateThetaSketchAlloc::ThetaTable::getCapacity(lg_k + 1, lg_k)); +} + +template +size_t CompactThetaSketchAlloc::getSerializedSizeBytes( + bool compressed) const { + if (compressed && isSuitableForCompression()) { + return getCompressedSerializedSizeBytes( + computeEntryBits(), getNumEntriesBytes()); + } + return sizeof(uint64_t) * getPreambleLongs(false) + + sizeof(uint64_t) * entries_.size(); +} + +// store num_entries as whole bytes since whole-byte blocks will follow (most +// probably) +template +uint8_t CompactThetaSketchAlloc::getNumEntriesBytes() const { + return wholeBytesToHoldBits( + 32 - countLeadingZerosInU32(static_cast(entries_.size()))); +} + +template +size_t CompactThetaSketchAlloc::getCompressedSerializedSizeBytes( + uint8_t entry_bits, + uint8_t num_entries_bytes) const { + const size_t compressed_bits = entry_bits * entries_.size(); + return sizeof(uint64_t) * getPreambleLongs(true) + num_entries_bytes + + wholeBytesToHoldBits(compressed_bits); +} + +template +void CompactThetaSketchAlloc::serialize(std::ostream& os) const { + const uint8_t preamble_longs = this->isEstimationMode() ? 3 + : this->isEmpty() || entries_.size() == 1 ? 1 + : 2; + write(os, preamble_longs); + write(os, UNCOMPRESSED_SERIAL_VERSION); + write(os, SKETCH_TYPE); + write(os, 0); // unused + const uint8_t flags_byte( + (1 << flags::IS_COMPACT) | (1 << flags::IS_READ_ONLY) | + (this->isEmpty() ? 1 << flags::IS_EMPTY : 0) | + (this->isOrdered() ? 1 << flags::IS_ORDERED : 0)); + write(os, flags_byte); + write(os, getSeedHash()); + if (preamble_longs > 1) { + write(os, static_cast(entries_.size())); + write(os, 0); // unused + } + if (this->isEstimationMode()) + write(os, this->theta_); + if (entries_.size() > 0) + write(os, entries_.data(), entries_.size() * sizeof(uint64_t)); +} + +template +auto CompactThetaSketchAlloc::serialize(unsigned header_size_bytes) const + -> vector_bytes { + const size_t size = getSerializedSizeBytes() + header_size_bytes; + vector_bytes bytes(size, 0, entries_.get_allocator()); + uint8_t* ptr = bytes.data() + header_size_bytes; + const uint8_t preamble_longs = getPreambleLongs(false); + *ptr++ = preamble_longs; + *ptr++ = UNCOMPRESSED_SERIAL_VERSION; + *ptr++ = SKETCH_TYPE; + ptr += sizeof(uint16_t); // unused + const uint8_t flags_byte( + (1 << flags::IS_COMPACT) | (1 << flags::IS_READ_ONLY) | + (this->isEmpty() ? 1 << flags::IS_EMPTY : 0) | + (this->isOrdered() ? 1 << flags::IS_ORDERED : 0)); + *ptr++ = flags_byte; + ptr += copyToMem(getSeedHash(), ptr); + if (preamble_longs > 1) { + ptr += copyToMem(static_cast(entries_.size()), ptr); + ptr += sizeof(uint32_t); // unused + } + if (this->isEstimationMode()) + ptr += copyToMem(theta_, ptr); + if (entries_.size() > 0) + ptr += copyToMem(entries_.data(), ptr, entries_.size() * sizeof(uint64_t)); + return bytes; +} + +template +bool CompactThetaSketchAlloc::isSuitableForCompression() const { + if (!this->isOrdered() || entries_.size() == 0 || + (entries_.size() == 1 && !this->isEstimationMode())) + return false; + return true; +} + +template +void CompactThetaSketchAlloc::serializeCompressed(std::ostream& os) const { + if (isSuitableForCompression()) + return serializeVersion4(os); + return serialize(os); +} + +template +auto CompactThetaSketchAlloc::serializeCompressed( + unsigned header_size_bytes) const -> vector_bytes { + if (isSuitableForCompression()) + return serializeVersion4(header_size_bytes); + return serialize(header_size_bytes); +} + +template +uint8_t CompactThetaSketchAlloc::computeEntryBits() const { + // compression is based on leading zeros in deltas between ordered hash values + // assumes ordered sketch + uint64_t previous = 0; + uint64_t ored = 0; + for (const uint64_t entry : entries_) { + const uint64_t delta = entry - previous; + ored |= delta; + previous = entry; + } + return 64 - countLeadingZerosInU64(ored); +} + +template +void CompactThetaSketchAlloc::serializeVersion4(std::ostream& os) const { + const uint8_t preamble_longs = this->isEstimationMode() ? 2 : 1; + const uint8_t entry_bits = computeEntryBits(); + const uint8_t num_entries_bytes = getNumEntriesBytes(); + + write(os, preamble_longs); + write(os, COMPRESSED_SERIAL_VERSION); + write(os, SKETCH_TYPE); + write(os, entry_bits); + write(os, num_entries_bytes); + const uint8_t flags_byte( + (1 << flags::IS_COMPACT) | (1 << flags::IS_READ_ONLY) | + (1 << flags::IS_ORDERED)); + write(os, flags_byte); + write(os, getSeedHash()); + if (this->isEstimationMode()) + write(os, this->theta_); + uint32_t num_entries = static_cast(entries_.size()); + for (unsigned i = 0; i < num_entries_bytes; ++i) { + write(os, num_entries & 0xff); + num_entries >>= 8; + } + + uint64_t previous = 0; + uint64_t deltas[8]; + vector_bytes buffer( + entry_bits, + 0, + entries_.get_allocator()); // block of 8 entries takes entry_bits bytes + + // pack blocks of 8 deltas + unsigned i; + for (i = 0; i + 7 < entries_.size(); i += 8) { + for (unsigned j = 0; j < 8; ++j) { + deltas[j] = entries_[i + j] - previous; + previous = entries_[i + j]; + } + packBitsBlock8(deltas, buffer.data(), entry_bits); + write(os, buffer.data(), buffer.size()); + } + + // pack extra deltas if fewer than 8 of them left + if (i < entries_.size()) { + uint8_t offset = 0; + uint8_t* ptr = buffer.data(); + for (; i < entries_.size(); ++i) { + const uint64_t delta = entries_[i] - previous; + previous = entries_[i]; + offset = packBits(delta, entry_bits, ptr, offset); + } + if (offset > 0) + ++ptr; + write(os, buffer.data(), ptr - buffer.data()); + } +} + +template +auto CompactThetaSketchAlloc::serializeVersion4( + unsigned header_size_bytes) const -> vector_bytes { + const uint8_t entry_bits = computeEntryBits(); + const uint8_t num_entries_bytes = getNumEntriesBytes(); + const size_t size = + getCompressedSerializedSizeBytes(entry_bits, num_entries_bytes) + + header_size_bytes; + vector_bytes bytes(size, 0, entries_.get_allocator()); + uint8_t* ptr = bytes.data() + header_size_bytes; + + *ptr++ = getPreambleLongs(true); + *ptr++ = COMPRESSED_SERIAL_VERSION; + *ptr++ = SKETCH_TYPE; + *ptr++ = entry_bits; + *ptr++ = num_entries_bytes; + const uint8_t flags_byte( + (1 << flags::IS_COMPACT) | (1 << flags::IS_READ_ONLY) | + (1 << flags::IS_ORDERED)); + *ptr++ = flags_byte; + ptr += copyToMem(getSeedHash(), ptr); + if (this->isEstimationMode()) { + ptr += copyToMem(theta_, ptr); + } + uint32_t num_entries = static_cast(entries_.size()); + for (unsigned i = 0; i < num_entries_bytes; ++i) { + *ptr++ = num_entries & 0xff; + num_entries >>= 8; + } + + uint64_t previous = 0; + uint64_t deltas[8]; + + // pack blocks of 8 deltas + unsigned i; + for (i = 0; i + 7 < entries_.size(); i += 8) { + for (unsigned j = 0; j < 8; ++j) { + deltas[j] = entries_[i + j] - previous; + previous = entries_[i + j]; + } + packBitsBlock8(deltas, ptr, entry_bits); + ptr += entry_bits; + } + + // pack extra deltas if fewer than 8 of them left + uint8_t offset = 0; + for (; i < entries_.size(); ++i) { + const uint64_t delta = entries_[i] - previous; + previous = entries_[i]; + offset = packBits(delta, entry_bits, ptr, offset); + } + return bytes; +} + +template +CompactThetaSketchAlloc CompactThetaSketchAlloc::deserialize( + std::istream& is, + uint64_t seed, + const A& allocator) { + const auto preamble_longs = read(is); + const auto serial_version = read(is); + const auto type = read(is); + checker::checkSketchType(type, SKETCH_TYPE); + switch (serial_version) { + case 4: + return deserializeV4(preamble_longs, is, seed, allocator); + case 3: + return deserializeV3(preamble_longs, is, seed, allocator); + case 1: + return deserializeV1(preamble_longs, is, seed, allocator); + case 2: + return deserializeV2(preamble_longs, is, seed, allocator); + default: + throw VeloxUserError( + __FILE__, + __LINE__, + __FUNCTION__, + "", + "unexpected sketch serialization version " + + std::to_string(serial_version), + error_source::kErrorSourceRuntime, + error_code::kInvalidArgument, + false /*retriable*/); + } +} + +template +CompactThetaSketchAlloc CompactThetaSketchAlloc::deserializeV1( + uint8_t, + std::istream& is, + uint64_t seed, + const A& allocator) { + const auto seed_hash = compute_seed_hash(seed); + read(is); // unused + read(is); // unused + const auto num_entries = read(is); + read(is); // unused + const auto theta = read(is); + std::vector entries(num_entries, 0, allocator); + bool isEmpty = (num_entries == 0) && (theta == ThetaConstants::MAX_THETA); + if (!isEmpty) + read(is, entries.data(), sizeof(uint64_t) * entries.size()); + if (!is.good()) { + throw VeloxRuntimeError( + __FILE__, + __LINE__, + __FUNCTION__, + "", + "error reading from std::istream", + error_source::kErrorSourceRuntime, + error_code::kInvalidArgument, + false /*retriable*/); + } + return CompactThetaSketchAlloc( + isEmpty, true, seed_hash, theta, std::move(entries)); +} + +template +CompactThetaSketchAlloc CompactThetaSketchAlloc::deserializeV2( + uint8_t preamble_longs, + std::istream& is, + uint64_t seed, + const A& allocator) { + read(is); // unused + read(is); // unused + const uint16_t seed_hash = read(is); + checker::checkSeedHash(seed_hash, compute_seed_hash(seed)); + if (preamble_longs == 1) { + if (!is.good()) { + throw VeloxRuntimeError( + __FILE__, + __LINE__, + __FUNCTION__, + "", + "error reading from std::istream", + error_source::kErrorSourceRuntime, + error_code::kInvalidArgument, + false /*retriable*/); + } + std::vector entries(0, 0, allocator); + return CompactThetaSketchAlloc( + true, true, seed_hash, ThetaConstants::MAX_THETA, std::move(entries)); + } else if (preamble_longs == 2) { + const uint32_t num_entries = read(is); + read(is); // unused + std::vector entries(num_entries, 0, allocator); + if (num_entries == 0) { + return CompactThetaSketchAlloc( + true, true, seed_hash, ThetaConstants::MAX_THETA, std::move(entries)); + } + read(is, entries.data(), entries.size() * sizeof(uint64_t)); + if (!is.good()) { + throw VeloxRuntimeError( + __FILE__, + __LINE__, + __FUNCTION__, + "", + "error reading from std::istream", + error_source::kErrorSourceRuntime, + error_code::kInvalidArgument, + false /*retriable*/); + } + return CompactThetaSketchAlloc( + false, true, seed_hash, ThetaConstants::MAX_THETA, std::move(entries)); + } else if (preamble_longs == 3) { + const uint32_t num_entries = read(is); + read(is); // unused + const auto theta = read(is); + bool isEmpty = (num_entries == 0) && (theta == ThetaConstants::MAX_THETA); + std::vector entries(num_entries, 0, allocator); + if (isEmpty) { + if (!is.good()) { + throw VeloxRuntimeError( + __FILE__, + __LINE__, + __FUNCTION__, + "", + "error reading from std::istream", + error_source::kErrorSourceRuntime, + error_code::kInvalidArgument, + false /*retriable*/); + } + return CompactThetaSketchAlloc( + true, true, seed_hash, theta, std::move(entries)); + } else { + read(is, entries.data(), sizeof(uint64_t) * entries.size()); + if (!is.good()) { + throw VeloxRuntimeError( + __FILE__, + __LINE__, + __FUNCTION__, + "", + "error reading from std::istream", + error_source::kErrorSourceRuntime, + error_code::kInvalidArgument, + false /*retriable*/); + } + return CompactThetaSketchAlloc( + false, true, seed_hash, theta, std::move(entries)); + } + } else { + throw VeloxRuntimeError( + __FILE__, + __LINE__, + __FUNCTION__, + "", + std::to_string(preamble_longs) + + " longs of premable, but expected 1, 2, or 3", + error_source::kErrorSourceRuntime, + error_code::kInvalidArgument, + false /*retriable*/); + } +} + +template +CompactThetaSketchAlloc CompactThetaSketchAlloc::deserializeV3( + uint8_t preamble_longs, + std::istream& is, + uint64_t seed, + const A& allocator) { + read(is); // unused + const auto flags_byte = read(is); + const auto seed_hash = read(is); + const bool isEmpty = flags_byte & (1 << flags::IS_EMPTY); + if (!isEmpty) + checker::checkSeedHash(seed_hash, compute_seed_hash(seed)); + uint64_t theta = ThetaConstants::MAX_THETA; + uint32_t num_entries = 0; + if (!isEmpty) { + if (preamble_longs == 1) { + num_entries = 1; + } else { + num_entries = read(is); + read(is); // unused + if (preamble_longs > 2) + theta = read(is); + } + } + std::vector entries(num_entries, 0, allocator); + if (!isEmpty) + read(is, entries.data(), sizeof(uint64_t) * entries.size()); + const bool isOrdered = flags_byte & (1 << flags::IS_ORDERED); + if (!is.good()) { + throw VeloxRuntimeError( + __FILE__, + __LINE__, + __FUNCTION__, + "", + "error reading from std::istream", + error_source::kErrorSourceRuntime, + error_code::kInvalidArgument, + false /*retriable*/); + } + return CompactThetaSketchAlloc( + isEmpty, isOrdered, seed_hash, theta, std::move(entries)); +} + +template +CompactThetaSketchAlloc CompactThetaSketchAlloc::deserializeV4( + uint8_t preamble_longs, + std::istream& is, + uint64_t seed, + const A& allocator) { + const auto entry_bits = read(is); + const auto num_entries_bytes = read(is); + const auto flags_byte = read(is); + const auto seed_hash = read(is); + const bool isEmpty = flags_byte & (1 << flags::IS_EMPTY); + if (!isEmpty) + checker::checkSeedHash(seed_hash, compute_seed_hash(seed)); + uint64_t theta = ThetaConstants::MAX_THETA; + if (preamble_longs > 1) + theta = read(is); + uint32_t num_entries = 0; + for (unsigned i = 0; i < num_entries_bytes; ++i) { + num_entries |= read(is) << (i << 3); + } + vector_bytes buffer( + entry_bits, 0, allocator); // block of 8 entries takes entry_bits bytes + std::vector entries(num_entries, 0, allocator); + + // unpack blocks of 8 deltas + unsigned i; + for (i = 0; i + 7 < num_entries; i += 8) { + read(is, buffer.data(), buffer.size()); + unpackBitsBlock8(&entries[i], buffer.data(), entry_bits); + } + // unpack extra deltas if fewer than 8 of them left + if (i < num_entries) + read( + is, + buffer.data(), + wholeBytesToHoldBits((num_entries - i) * entry_bits)); + if (!is.good()) { + throw VeloxRuntimeError( + __FILE__, + __LINE__, + __FUNCTION__, + "", + "error reading from std::istream", + error_source::kErrorSourceRuntime, + error_code::kInvalidArgument, + false /*retriable*/); + } + const uint8_t* ptr = buffer.data(); + uint8_t offset = 0; + for (; i < num_entries; ++i) { + offset = unpackBits(entries[i], entry_bits, ptr, offset); + } + // undo deltas + uint64_t previous = 0; + for (i = 0; i < num_entries; ++i) { + entries[i] += previous; + previous = entries[i]; + } + const bool isOrdered = flags_byte & (1 << flags::IS_ORDERED); + return CompactThetaSketchAlloc( + isEmpty, isOrdered, seed_hash, theta, std::move(entries)); +} + +template +CompactThetaSketchAlloc CompactThetaSketchAlloc::deserialize( + const void* bytes, + size_t size, + uint64_t seed, + const A& allocator) { + auto data = CompactThetaSketchParser::parse(bytes, size, seed, false); + if (data.entryBits == 64) { // versions 1 to 3 + const uint64_t* entries = + reinterpret_cast(data.entriesStartPtr); + return CompactThetaSketchAlloc( + data.isEmpty, + data.isOrdered, + data.seedHash, + data.theta, + std::vector( + entries, entries + data.numEntries, allocator)); + } else { // version 4 + std::vector entries(data.numEntries, 0, allocator); + const uint8_t* ptr = reinterpret_cast(data.entriesStartPtr); + // unpack blocks of 8 deltas + unsigned i; + for (i = 0; i + 7 < data.numEntries; i += 8) { + unpackBitsBlock8(&entries[i], ptr, data.entryBits); + ptr += data.entryBits; + } + // unpack extra deltas if fewer than 8 of them left + uint8_t offset = 0; + for (; i < data.numEntries; ++i) { + offset = unpackBits(entries[i], data.entryBits, ptr, offset); + } + // undo deltas + uint64_t previous = 0; + for (i = 0; i < data.numEntries; ++i) { + entries[i] += previous; + previous = entries[i]; + } + return CompactThetaSketchAlloc( + data.isEmpty, + data.isOrdered, + data.seedHash, + data.theta, + std::move(entries)); + } +} + +// wrapped compact sketch + +template +WrappedCompactThetaSketchAlloc::WrappedCompactThetaSketchAlloc( + const data_type& data) + : data_(data) {} + +template +const WrappedCompactThetaSketchAlloc WrappedCompactThetaSketchAlloc::wrap( + const void* bytes, + size_t size, + uint64_t seed, + bool dump_on_error) { + return WrappedCompactThetaSketchAlloc( + CompactThetaSketchParser::parse(bytes, size, seed, dump_on_error)); +} + +template +A WrappedCompactThetaSketchAlloc::getAllocator() const { + return A(); +} + +template +bool WrappedCompactThetaSketchAlloc::isEmpty() const { + return data_.isEmpty; +} + +template +bool WrappedCompactThetaSketchAlloc::isOrdered() const { + return data_.isOrdered; +} + +template +uint64_t WrappedCompactThetaSketchAlloc::getTheta64() const { + return data_.theta; +} + +template +uint32_t WrappedCompactThetaSketchAlloc::getNumRetained() const { + return data_.numEntries; +} + +template +uint16_t WrappedCompactThetaSketchAlloc::getSeedHash() const { + return data_.seedHash; +} + +template +auto WrappedCompactThetaSketchAlloc::begin() const -> const_iterator { + return const_iterator( + data_.entriesStartPtr, data_.entryBits, data_.numEntries, 0); +} + +template +auto WrappedCompactThetaSketchAlloc::end() const -> const_iterator { + return const_iterator( + data_.entriesStartPtr, + data_.entryBits, + data_.numEntries, + data_.numEntries); +} + +template +void WrappedCompactThetaSketchAlloc::printSpecifics( + std::ostringstream&) const {} + +template +void WrappedCompactThetaSketchAlloc::printItems( + std::ostringstream& os) const { + os << "### Retained entries" << std::endl; + for (const auto hash : *this) { + os << hash << std::endl; + } + os << "### End retained entries" << std::endl; +} + +// assumes index == 0 or index == num_entries +template +WrappedCompactThetaSketchAlloc::const_iterator::const_iterator( + const void* ptr, + uint8_t entry_bits, + uint32_t num_entries, + uint32_t index) + : ptr_(ptr), + entry_bits_(entry_bits), + num_entries_(num_entries), + index_(index), + previous_(0), + is_block_mode_(num_entries_ >= 8), + offset_(0) { + if (entry_bits == 64) { // no compression + ptr_ = reinterpret_cast(ptr) + index; + } else if (index < num_entries) { + if (is_block_mode_) { + unpack8(); + } else { + unpack1(); + } + } +} + +template +auto WrappedCompactThetaSketchAlloc::const_iterator::operator++() + -> const_iterator& { + if (entry_bits_ == 64) { // no compression + ptr_ = reinterpret_cast(ptr_) + 1; + return *this; + } + if (++index_ < num_entries_) { + if (is_block_mode_) { + if ((index_ & 7) == 0) { + if (num_entries_ - index_ >= 8) { + unpack8(); + } else { + is_block_mode_ = false; + unpack1(); + } + } + } else { + unpack1(); + } + } + return *this; +} + +template +void WrappedCompactThetaSketchAlloc::const_iterator::unpack1() { + const uint32_t i = index_ & 7; + offset_ = unpackBits( + buffer_[i], + entry_bits_, + reinterpret_cast(ptr_), + offset_); + buffer_[i] += previous_; + previous_ = buffer_[i]; +} + +template +void WrappedCompactThetaSketchAlloc::const_iterator::unpack8() { + unpackBitsBlock8( + buffer_, reinterpret_cast(ptr_), entry_bits_); + ptr_ = reinterpret_cast(ptr_) + entry_bits_; + for (int i = 0; i < 8; ++i) { + buffer_[i] += previous_; + previous_ = buffer_[i]; + } +} + +template +auto WrappedCompactThetaSketchAlloc::const_iterator::operator++(int) + -> const_iterator { + const_iterator tmp(*this); + operator++(); + return tmp; +} + +template +bool WrappedCompactThetaSketchAlloc::const_iterator::operator!=( + const const_iterator& other) const { + if (entry_bits_ == 64) + return ptr_ != other.ptr_; + return index_ != other.index_; +} + +template +bool WrappedCompactThetaSketchAlloc::const_iterator::operator==( + const const_iterator& other) const { + if (entry_bits_ == 64) + return ptr_ == other.ptr_; + return index_ == other.index_; +} + +template +auto WrappedCompactThetaSketchAlloc::const_iterator::operator*() + const -> reference { + if (entry_bits_ == 64) + return *reinterpret_cast(ptr_); + return buffer_[index_ & 7]; +} + +template +auto WrappedCompactThetaSketchAlloc::const_iterator::operator->() + const -> pointer { + if (entry_bits_ == 64) + return reinterpret_cast(ptr_); + return buffer_ + (index_ & 7); +} + +} // namespace facebook::velox::common::theta + +#endif diff --git a/velox/external/theta/ThetaSketch.h b/velox/external/theta/ThetaSketch.h new file mode 100644 index 00000000000..ff31a2c064e --- /dev/null +++ b/velox/external/theta/ThetaSketch.h @@ -0,0 +1,699 @@ +/* +* Copyright (c) Facebook, Inc. and its affiliates. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. + */ + +// Adapted from Apache DataSketches + +#pragma once + +#include "CompactThetaSketchParser.h" +#include "ThetaUpdateSketchBase.h" + +namespace facebook::velox::common::theta { + +// forward declarations +template +class ThetaSketchAlloc; +template +class UpdateThetaSketchAlloc; +template +class CompactThetaSketchAlloc; +template +class WrappedCompactThetaSketchAlloc; + +/// Theta sketch alias with default allocator +using thetaSketch = ThetaSketchAlloc>; +/// Update Theta sketch alias with default allocator +using updateThetaSketch = UpdateThetaSketchAlloc>; +/// Compact Theta sketch alias with default allocator +using compactThetaSketch = CompactThetaSketchAlloc>; +/// Wrapped Compact Theta sketch alias with default allocator +using wrappedCompactThetaSketch = + WrappedCompactThetaSketchAlloc>; + +/// Abstract base class for Theta sketch +template > +class BaseThetaSketchAlloc { + public: + virtual ~BaseThetaSketchAlloc() = default; + + /** + * @return allocator + */ + virtual Allocator getAllocator() const = 0; + + /** + * @return true if this sketch represents an empty set (not the same as no + * retained entries!) + */ + virtual bool isEmpty() const = 0; + + /** + * @return estimate of the distinct count of the input stream + */ + double getEstimate() const; + + /** + * Returns the approximate lower error bound given a number of standard + * deviations. This parameter is similar to the number of standard deviations + * of the normal distribution and corresponds to approximately 67%, 95% and + * 99% confidence intervals. + * @param num_std_devs number of Standard Deviations (1, 2 or 3) + * @return the lower bound + */ + double getLowerBound(uint8_t num_std_devs) const; + + /** + * Returns the approximate upper error bound given a number of standard + * deviations. This parameter is similar to the number of standard deviations + * of the normal distribution and corresponds to approximately 67%, 95% and + * 99% confidence intervals. + * @param num_std_devs number of Standard Deviations (1, 2 or 3) + * @return the upper bound + */ + double getUpperBound(uint8_t num_std_devs) const; + + /** + * @return true if the sketch is in estimation mode (as opposed to exact mode) + */ + bool isEstimationMode() const; + + /** + * @return theta as a fraction from 0 to 1 (effective sampling rate) + */ + double getTheta() const; + + /** + * @return theta as a positive integer between 0 and LLONG_MAX + */ + virtual uint64_t getTheta64() const = 0; + + /** + * @return the number of retained entries in the sketch + */ + virtual uint32_t getNumRetained() const = 0; + + /** + * @return hash of the seed that was used to hash the input + */ + virtual uint16_t getSeedHash() const = 0; + + /** + * @return true if retained entries are ordered + */ + virtual bool isOrdered() const = 0; + + /** + * Provides a human-readable summary of this sketch as a string + * @param print_items if true include the list of items retained by the sketch + * @return sketch summary as a string + */ + virtual string toString(bool print_items = false) const; + + protected: + virtual void printSpecifics(std::ostringstream& os) const = 0; + virtual void printItems(std::ostringstream& os) const = 0; +}; + +/// Base class for the Theta Sketch, a generalization of the Kth Minimum Value +/// (KMV) sketch. +template > +class ThetaSketchAlloc : public BaseThetaSketchAlloc { + public: + using Entry = uint64_t; + using ExtractKey = trivialExtractKey; + using iterator = ThetaIterator; + using const_iterator = ThetaConstIterator; + + virtual ~ThetaSketchAlloc() = default; + + /** + * Iterator over hash values in this sketch. + * @return begin iterator + */ + virtual iterator begin() = 0; + + /** + * Iterator pointing past the valid range. + * Not to be incremented or dereferenced. + * @return end iterator + */ + virtual iterator end() = 0; + + /** + * Const iterator over hash values in this sketch. + * @return begin iterator + */ + virtual const_iterator begin() const = 0; + + /** + * Const iterator pointing past the valid range. + * Not to be incremented or dereferenced. + * @return end iterator + */ + virtual const_iterator end() const = 0; + + protected: + virtual void printItems(std::ostringstream& os) const; +}; + +// forward declaration +template +class CompactThetaSketchAlloc; + +/** + * Update Theta sketch. + * The purpose of this class is to build a Theta sketch from input data via the + * update() methods. There is no constructor. Use builder instead. + */ +template > +class UpdateThetaSketchAlloc : public ThetaSketchAlloc { + public: + using Base = ThetaSketchAlloc; + using Entry = typename Base::Entry; + using ExtractKey = typename Base::ExtractKey; + using iterator = typename Base::iterator; + using const_iterator = typename Base::const_iterator; + using ThetaTable = ThetaUpdateSketchBase; + using resizeFactor = typename ThetaTable::resizeFactor; + + // No constructor here. Use builder instead. + class builder; + + /** + * Copy constructor + * @param other sketch to be copied + */ + UpdateThetaSketchAlloc(const UpdateThetaSketchAlloc& other) = default; + + /** + * Move constructor + * @param other sketch to be moved + */ + UpdateThetaSketchAlloc(UpdateThetaSketchAlloc&& other) noexcept = default; + + virtual ~UpdateThetaSketchAlloc() = default; + + /** + * Copy assignment + * @param other sketch to be copied + * @return reference to this sketch + */ + UpdateThetaSketchAlloc& operator=(const UpdateThetaSketchAlloc& other) = + default; + + /** + * Move assignment + * @param other sketch to be moved + * @return reference to this sketch + */ + UpdateThetaSketchAlloc& operator=(UpdateThetaSketchAlloc&& other) = default; + + virtual Allocator getAllocator() const override; + virtual bool isEmpty() const override; + virtual bool isOrdered() const override; + virtual uint16_t getSeedHash() const override; + virtual uint64_t getTheta64() const override; + virtual uint32_t getNumRetained() const override; + + /** + * @return configured nominal number of entries in the sketch + */ + uint8_t getLgK() const; + + /** + * @return configured resize factor of the sketch + */ + resizeFactor getRf() const; + + /** + * Update this sketch with a given string. + * @param value string to update the sketch with + */ + void update(const std::string& value); + + /** + * Update this sketch with a given unsigned 64-bit integer. + * @param value uint64_t to update the sketch with + */ + void update(uint64_t value); + + /** + * Update this sketch with a given signed 64-bit integer. + * @param value int64_t to update the sketch with + */ + void update(int64_t value); + + /** + * Update this sketch with a given unsigned 32-bit integer. + * For compatibility with Java implementation. + * @param value uint32_t to update the sketch with + */ + void update(uint32_t value); + + /** + * Update this sketch with a given signed 32-bit integer. + * For compatibility with Java implementation. + * @param value int32_t to update the sketch with + */ + void update(int32_t value); + + /** + * Update this sketch with a given unsigned 16-bit integer. + * For compatibility with Java implementation. + * @param value uint16_t to update the sketch with + */ + void update(uint16_t value); + + /** + * Update this sketch with a given signed 16-bit integer. + * For compatibility with Java implementation. + * @param value int16_t to update the sketch with + */ + void update(int16_t value); + + /** + * Update this sketch with a given unsigned 8-bit integer. + * For compatibility with Java implementation. + * @param value uint8_t to update the sketch with + */ + void update(uint8_t value); + + /** + * Update this sketch with a given signed 8-bit integer. + * For compatibility with Java implementation. + * @param value int8_t to update the sketch with + */ + void update(int8_t value); + + /** + * Update this sketch with a given double-precision floating point value. + * For compatibility with Java implementation. + * @param value double to update the sketch with + */ + void update(double value); + + /** + * Update this sketch with a given floating point value. + * For compatibility with Java implementation. + * @param value float to update the sketch with + */ + void update(float value); + + /** + * Update this sketch with given data of any type. + * This is a "universal" update that covers all cases above, + * but may produce different hashes. + * Be very careful to hash input values consistently using the same approach + * both over time and on different platforms + * and while passing sketches between C++ environment and Java environment. + * Otherwise two sketches that should represent overlapping sets will be + * disjoint For instance, for signed 32-bit values call update(int32_t) method + * above, which does widening conversion to int64_t, if compatibility with + * Java is expected + * @param data pointer to the data + * @param length of the data in bytes + */ + void update(const void* data, size_t length); + + /** + * Remove retained entries in excess of the nominal size k (if any) + */ + void trim(); + + /** + * Reset the sketch to the initial empty state + */ + void reset(); + + /** + * Converts this sketch to a compact sketch (ordered or unordered). + * @param ordered optional flag to specify if an ordered sketch should be + * produced + * @return compact sketch + */ + CompactThetaSketchAlloc compact(bool ordered = true) const; + + virtual iterator begin() override; + virtual iterator end() override; + virtual const_iterator begin() const override; + virtual const_iterator end() const override; + + private: + ThetaTable table_; + + // for builder + UpdateThetaSketchAlloc( + uint8_t lg_cur_size, + uint8_t lg_nom_size, + resizeFactor rf, + float p, + uint64_t theta, + uint64_t seed, + const Allocator& allocator); + + virtual void printSpecifics(std::ostringstream& os) const override; +}; + +/** + * Compact Theta sketch. + * This is an immutable form of the Theta sketch, the form that can be + * serialized and deserialized. + */ +template > +class CompactThetaSketchAlloc : public ThetaSketchAlloc { + public: + using Base = ThetaSketchAlloc; + using iterator = typename Base::iterator; + using const_iterator = typename Base::const_iterator; + using AllocBytes = + typename std::allocator_traits::template rebind_alloc; + using vector_bytes = std::vector; + + static const uint8_t UNCOMPRESSED_SERIAL_VERSION = 3; + static const uint8_t COMPRESSED_SERIAL_VERSION = 4; + static const uint8_t SKETCH_TYPE = 3; + + // Instances of this type can be obtained: + // - by compacting an update_theta_sketch_alloc + // - as a result of a set operation + // - by deserializing a previously serialized compact sketch + + /** + * Copy constructor. + * Constructs a compact sketch from any other type of Theta sketch + * @param other sketch to be constructed from + * @param ordered if true make the resulting sketch ordered + */ + template + CompactThetaSketchAlloc(const Other& other, bool ordered); + + /** + * Copy constructor + * @param other sketch to be copied + */ + CompactThetaSketchAlloc(const CompactThetaSketchAlloc& other) = default; + + /** + * Move constructor + * @param other sketch to be moved + */ + CompactThetaSketchAlloc(CompactThetaSketchAlloc&& other) noexcept = default; + + virtual ~CompactThetaSketchAlloc() = default; + + /** + * Copy assignment + * @param other sketch to be copied + * @return reference to this sketch + */ + CompactThetaSketchAlloc& operator=(const CompactThetaSketchAlloc& other) = + default; + + /** + * Move assignment + * @param other sketch to be moved + * @return reference to this sketch + */ + CompactThetaSketchAlloc& operator=(CompactThetaSketchAlloc&& other) = default; + + virtual Allocator getAllocator() const override; + virtual bool isEmpty() const override; + virtual bool isOrdered() const override; + virtual uint64_t getTheta64() const override; + virtual uint32_t getNumRetained() const override; + virtual uint16_t getSeedHash() const override; + + /** + * Computes maximum serialized size in bytes + * @param lg_k nominal number of entries in the sketch + */ + static size_t getMaxSerializedSizeBytes(uint8_t lg_k); + + /** + * Computes size in bytes required to serialize the current state of the + * sketch. Computing compressed size is expensive. It takes iterating over all + * retained hashes, and the actual serialization will have to look at them + * again. + * @param compressed if true compressed size is returned (if applicable) + */ + size_t getSerializedSizeBytes(bool compressed = false) const; + + /** + * This method serializes the sketch into a given stream in a binary form + * @param os output stream + */ + void serialize(std::ostream& os) const; + + /** + * This method serializes the sketch as a vector of bytes. + * An optional header can be reserved in front of the sketch. + * It is an uninitialized space of a given size. + * This header is used in Datasketches PostgreSQL extension. + * @param header_size_bytes space to reserve in front of the sketch + */ + vector_bytes serialize(unsigned header_size_bytes = 0) const; + + /** + * This method serializes the sketch into a given stream in a compressed + * binary form. Compression is applied to ordered sketches except empty and + * single item. For unordered, empty and single item sketches this method is + * equivalent to serialize() + * @param os output stream + */ + void serializeCompressed(std::ostream& os) const; + + /** + * This method serializes the sketch as a vector of bytes. + * An optional header can be reserved in front of the sketch. + * It is an uninitialized space of a given size. + * This header is used in Datasketches PostgreSQL extension. + * Compression is applied to ordered sketches except empty and single item. + * For unordered, empty and single item sketches this method is equivalent to + * serialize() + * @param header_size_bytes space to reserve in front of the sketch + */ + vector_bytes serializeCompressed(unsigned header_size_bytes = 0) const; + + virtual iterator begin() override; + virtual iterator end() override; + virtual const_iterator begin() const override; + virtual const_iterator end() const override; + + /** + * This method deserializes a sketch from a given stream. + * @param is input stream + * @param seed the seed for the hash function that was used to create the + * sketch + * @param allocator instance of an Allocator + * @return an instance of the sketch + */ + static CompactThetaSketchAlloc deserialize( + std::istream& is, + uint64_t seed = DEFAULT_SEED, + const Allocator& allocator = Allocator()); + + /** + * This method deserializes a sketch from a given array of bytes. + * @param bytes pointer to the array of bytes + * @param size the size of the array + * @param seed the seed for the hash function that was used to create the + * sketch + * @param allocator instance of an Allocator + * @return an instance of the sketch + */ + static CompactThetaSketchAlloc deserialize( + const void* bytes, + size_t size, + uint64_t seed = DEFAULT_SEED, + const Allocator& allocator = Allocator()); + + private: + enum flags { IS_BIG_ENDIAN, IS_READ_ONLY, IS_EMPTY, IS_COMPACT, IS_ORDERED }; + + bool isEmpty_; + bool isOrdered_; + uint16_t seedHash_; + uint64_t theta_; + std::vector entries_; + + uint8_t getPreambleLongs(bool compressed) const; + bool isSuitableForCompression() const; + uint8_t computeEntryBits() const; + uint8_t getNumEntriesBytes() const; + size_t getCompressedSerializedSizeBytes( + uint8_t entry_bits, + uint8_t num_entries_bytes) const; + void serializeVersion4(std::ostream& os) const; + vector_bytes serializeVersion4(unsigned header_size_bytes = 0) const; + + static CompactThetaSketchAlloc deserializeV1( + uint8_t preamble_longs, + std::istream& is, + uint64_t seed, + const Allocator& allocator); + static CompactThetaSketchAlloc deserializeV2( + uint8_t preamble_longs, + std::istream& is, + uint64_t seed, + const Allocator& allocator); + static CompactThetaSketchAlloc deserializeV3( + uint8_t preamble_longs, + std::istream& is, + uint64_t seed, + const Allocator& allocator); + static CompactThetaSketchAlloc deserializeV4( + uint8_t preamble_longs, + std::istream& is, + uint64_t seed, + const Allocator& allocator); + + virtual void printSpecifics(std::ostringstream& os) const override; + + template < + typename E, + typename EK, + typename P, + typename S, + typename CS, + typename A> + friend class ThetaUnionBase; + template < + typename E, + typename EK, + typename P, + typename S, + typename CS, + typename A> + friend class ThetaIntersectionBase; + template + friend class ThetaSetDifferenceBase; + CompactThetaSketchAlloc( + bool is_empty, + bool is_ordered, + uint16_t seed_hash, + uint64_t theta, + std::vector&& entries); +}; + +/// Update Theta sketch builder +template +class UpdateThetaSketchAlloc::builder + : public ThetaBaseBuilder { + public: + /** + * Constructor + * @param allocator + */ + builder(const Allocator& allocator = Allocator()); + /// @return instance of Update Theta sketch + UpdateThetaSketchAlloc build() const; +}; + +/** + * Wrapped Compact Theta sketch. + * This is to wrap a buffer containing a serialized compact sketch and use it in + * a set operation avoiding some cost of deserialization. It does not take the + * ownership of the buffer. + */ +template > +class WrappedCompactThetaSketchAlloc : public BaseThetaSketchAlloc { + public: + class const_iterator; + + virtual Allocator getAllocator() const override; + virtual bool isEmpty() const override; + virtual bool isOrdered() const override; + virtual uint64_t getTheta64() const override; + virtual uint32_t getNumRetained() const override; + virtual uint16_t getSeedHash() const override; + + /** + * Const iterator over hash values in this sketch. + * @return begin iterator + */ + const_iterator begin() const; + + /** + * Const iterator pointing past the valid range. + * Not to be incremented or dereferenced. + * @return end iterator + */ + const_iterator end() const; + + /** + * This method wraps a serialized compact sketch as an array of bytes. + * @param bytes pointer to the array of bytes + * @param size the size of the array + * @param seed the seed for the hash function that was used to create the + * sketch + * @param dump_on_error if true prints hex dump of the input + * @return an instance of the sketch + */ + static const WrappedCompactThetaSketchAlloc wrap( + const void* bytes, + size_t size, + uint64_t seed = DEFAULT_SEED, + bool dump_on_error = false); + + protected: + virtual void printSpecifics(std::ostringstream& os) const override; + virtual void printItems(std::ostringstream& os) const override; + + private: + using data_type = CompactThetaSketchParser::CompactThetaSketchData; + data_type data_; + + WrappedCompactThetaSketchAlloc(const data_type& data); +}; + +template +class WrappedCompactThetaSketchAlloc::const_iterator { + public: + using iterator_category = std::input_iterator_tag; + using value_type = const uint64_t; + using difference_type = void; + using pointer = value_type*; + using reference = uint64_t; + + const_iterator( + const void* ptr, + uint8_t entry_bits, + uint32_t num_entries, + uint32_t index); + const_iterator& operator++(); + const_iterator operator++(int); + bool operator==(const const_iterator& other) const; + bool operator!=(const const_iterator& other) const; + reference operator*() const; + pointer operator->() const; + + private: + const void* ptr_; + uint8_t entry_bits_; + uint32_t num_entries_; + uint32_t index_; + uint64_t previous_; + bool is_block_mode_; + uint8_t offset_; + uint64_t buffer_[8]; + + inline void unpack1(); + inline void unpack8(); +}; + +} // namespace facebook::velox::common::theta + +#include "ThetaSketch.cpp" diff --git a/velox/external/theta/ThetaUnion.cpp b/velox/external/theta/ThetaUnion.cpp new file mode 100644 index 00000000000..d26dad88c75 --- /dev/null +++ b/velox/external/theta/ThetaUnion.cpp @@ -0,0 +1,79 @@ +/* +* Copyright (c) Facebook, Inc. and its affiliates. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. + */ + +// Adapted from Apache DataSketches + +#ifndef THETA_UNION_CPP +#define THETA_UNION_CPP + +#include "ThetaUnion.h" + +namespace facebook::velox::common::theta { + +template +ThetaUnionAlloc::ThetaUnionAlloc( + uint8_t lgCurSize, + uint8_t lgNomSize, + resizeFactor rf, + float p, + uint64_t theta, + uint64_t seed, + const A& allocator) + : state_( + lgCurSize, + lgNomSize, + rf, + p, + theta, + seed, + nop_policy(), + allocator) {} + +template +template +void ThetaUnionAlloc::update(FwdSketch&& sketch) { + state_.update(std::forward(sketch)); +} + +template +auto ThetaUnionAlloc::getResult(bool ordered) const -> CompactSketch { + return state_.getResult(ordered); +} + +template +void ThetaUnionAlloc::reset() { + state_.reset(); +} + +template +ThetaUnionAlloc::builder::builder(const A& allocator) + : ThetaBaseBuilder(allocator) {} + +template +auto ThetaUnionAlloc::builder::build() const -> ThetaUnionAlloc { + return ThetaUnionAlloc( + this->startingLgSize(), + this->lg_k_, + this->rf_, + this->p_, + this->startingTheta(), + this->seed_, + this->allocator_); +} + +} // namespace facebook::velox::common::theta + +#endif diff --git a/velox/external/theta/ThetaUnion.h b/velox/external/theta/ThetaUnion.h new file mode 100644 index 00000000000..e9e3cb54e3d --- /dev/null +++ b/velox/external/theta/ThetaUnion.h @@ -0,0 +1,113 @@ +/* +* Copyright (c) Facebook, Inc. and its affiliates. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. + */ + +// Adapted from Apache DataSketches + +#pragma once + +#include "Serde.h" +#include "ThetaSketch.h" +#include "ThetaUnionBase.h" + +namespace facebook::velox::common::theta { + +// forward declaration +template +class ThetaUnionAlloc; + +// alias with default allocator for convenience +using ThetaUnion = ThetaUnionAlloc>; + +/** + * Theta Union. + * Computes union of Theta sketches. There is no constructor. Use builder + * instead. + */ +template > +class ThetaUnionAlloc { + public: + using Entry = uint64_t; + using ExtractKey = trivialExtractKey; + using Sketch = ThetaSketchAlloc; + using CompactSketch = CompactThetaSketchAlloc; + using resizeFactor = ThetaConstants::resizeFactor; + + // there is no payload in Theta sketch entry + struct nop_policy { + void operator()(uint64_t internal_entry, uint64_t incoming_entry) const { + unused(internal_entry); + unused(incoming_entry); + } + }; + using State = ThetaUnionBase< + Entry, + ExtractKey, + nop_policy, + Sketch, + CompactSketch, + Allocator>; + + // No constructor here. Use builder instead. + class builder; + + /** + * Update the union with a given sketch + * @param sketch to update the union with + */ + template + void update(FwdSketch&& sketch); + + /** + * Produces a copy of the current state of the union as a compact sketch. + * @param ordered optional flag to specify if an ordered sketch should be + * produced + * @return the result of the union + */ + CompactSketch getResult(bool ordered = true) const; + + /// Reset the union to the initial empty state + void reset(); + + private: + State state_; + + // for builder + ThetaUnionAlloc( + uint8_t lg_cur_size, + uint8_t lg_nom_size, + resizeFactor rf, + float p, + uint64_t theta, + uint64_t seed, + const Allocator& allocator); +}; + +/// Theta union builder +template +class ThetaUnionAlloc::builder : public ThetaBaseBuilder { + public: + builder(const A& allocator = A()); + + /** + * Create an instance of the union with predefined parameters. + * @return an instance of the union + */ + ThetaUnionAlloc build() const; +}; + +} // namespace facebook::velox::common::theta + +#include "ThetaUnion.cpp" diff --git a/velox/external/theta/ThetaUnionBase.cpp b/velox/external/theta/ThetaUnionBase.cpp new file mode 100644 index 00000000000..4567bd46e32 --- /dev/null +++ b/velox/external/theta/ThetaUnionBase.cpp @@ -0,0 +1,168 @@ +/* +* Copyright (c) Facebook, Inc. and its affiliates. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. + */ + +// Adapted from Apache DataSketches + +#ifndef THETA_UNION_BASE_CPP +#define THETA_UNION_BASE_CPP + +#include + +#include "ConditionalForward.h" +#include "ThetaUnionBase.h" +#include "velox/common/base/Exceptions.h" + +namespace facebook::velox::common::theta { + +template < + typename EN, + typename EK, + typename P, + typename S, + typename CS, + typename A> +ThetaUnionBase::ThetaUnionBase( + uint8_t lg_cur_size, + uint8_t lg_nom_size, + resizeFactor rf, + float p, + uint64_t theta, + uint64_t seed, + const P& policy, + const A& allocator) + : policy_(policy), + table_(lg_cur_size, lg_nom_size, rf, p, theta, seed, allocator), + union_theta_(table_.theta_) {} + +template < + typename EN, + typename EK, + typename P, + typename S, + typename CS, + typename A> +template +void ThetaUnionBase::update(SS&& sketch) { + if (sketch.isEmpty()) + return; + if (sketch.getSeedHash() != compute_seed_hash(table_.seed_)) { + throw VeloxRuntimeError( + __FILE__, + __LINE__, + __FUNCTION__, + "", + "seed hash mismatch", + error_source::kErrorSourceRuntime, + error_code::kUnknown, + false /*retriable*/); + } + table_.isEmpty_ = false; + union_theta_ = std::min(union_theta_, sketch.getTheta64()); + for (auto&& entry : sketch) { + const uint64_t hash = EK()(entry); + if (hash < union_theta_ && hash < table_.theta_) { + auto result = table_.find(hash); + if (!result.second) { + table_.insert(result.first, conditionalForward(entry)); + } else { + policy_(*result.first, conditionalForward(entry)); + } + } else { + if (sketch.isOrdered()) + break; // early stop + } + } + union_theta_ = std::min(union_theta_, table_.theta_); +} + +template < + typename EN, + typename EK, + typename P, + typename S, + typename CS, + typename A> +CS ThetaUnionBase::getResult(bool ordered) const { + std::vector entries(table_.allocator_); + if (table_.isEmpty_) + return CS( + true, + true, + compute_seed_hash(table_.seed_), + union_theta_, + std::move(entries)); + entries.reserve(table_.numEntries_); + uint64_t theta = std::min(union_theta_, table_.theta_); + const uint32_t nominal_num = 1 << table_.lgNomSize_; + if (union_theta_ >= table_.theta_) { + std::copy_if( + table_.begin(), + table_.end(), + std::back_inserter(entries), + keyNotZero()); + } else { + std::copy_if( + table_.begin(), + table_.end(), + std::back_inserter(entries), + keyNotZeroLessThan(theta)); + } + if (entries.size() > nominal_num) { + std::nth_element( + entries.begin(), + entries.begin() + nominal_num, + entries.end(), + comparator()); + theta = EK()(entries[nominal_num]); + entries.erase(entries.begin() + nominal_num, entries.end()); + entries.shrink_to_fit(); + } + if (ordered) + std::sort(entries.begin(), entries.end(), comparator()); + return CS( + table_.isEmpty_, + ordered, + compute_seed_hash(table_.seed_), + theta, + std::move(entries)); +} + +template < + typename EN, + typename EK, + typename P, + typename S, + typename CS, + typename A> +const P& ThetaUnionBase::getPolicy() const { + return policy_; +} + +template < + typename EN, + typename EK, + typename P, + typename S, + typename CS, + typename A> +void ThetaUnionBase::reset() { + table_.reset(); + union_theta_ = table_.theta_; +} + +} // namespace facebook::velox::common::theta + +#endif diff --git a/velox/external/theta/ThetaUnionBase.h b/velox/external/theta/ThetaUnionBase.h new file mode 100644 index 00000000000..1d25f060191 --- /dev/null +++ b/velox/external/theta/ThetaUnionBase.h @@ -0,0 +1,65 @@ +/* +* Copyright (c) Facebook, Inc. and its affiliates. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. + */ + +// Adapted from Apache DataSketches + +#pragma once + +#include "ThetaUpdateSketchBase.h" + +namespace facebook::velox::common::theta { + +template < + typename Entry, + typename ExtractKey, + typename Policy, + typename Sketch, + typename CompactSketch, + typename Allocator> +class ThetaUnionBase { + public: + using hashTable = ThetaUpdateSketchBase; + using resizeFactor = typename hashTable::resizeFactor; + using comparator = compareByKey; + + ThetaUnionBase( + uint8_t lg_cur_size, + uint8_t lg_nom_size, + resizeFactor rf, + float p, + uint64_t theta, + uint64_t seed, + const Policy& policy, + const Allocator& allocator); + + template + void update(FwdSketch&& sketch); + + CompactSketch getResult(bool ordered = true) const; + + const Policy& getPolicy() const; + + void reset(); + + private: + Policy policy_; + hashTable table_; + uint64_t union_theta_; +}; + +} // namespace facebook::velox::common::theta + +#include "ThetaUnionBase.cpp" diff --git a/velox/external/theta/ThetaUpdateSketchBase.cpp b/velox/external/theta/ThetaUpdateSketchBase.cpp new file mode 100644 index 00000000000..591c6a084a0 --- /dev/null +++ b/velox/external/theta/ThetaUpdateSketchBase.cpp @@ -0,0 +1,530 @@ +/* +* Copyright (c) Facebook, Inc. and its affiliates. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. + */ + +// Adapted from Apache DataSketches + +#ifndef THETA_UPDATE_SKETCH_BASE_CPP +#define THETA_UPDATE_SKETCH_BASE_CPP + +#include +#include +#include +#include + +#include "ThetaHelpers.h" +#include "ThetaUpdateSketchBase.h" +#include "velox/common/base/Exceptions.h" + +namespace facebook::velox::common::theta { + +template +ThetaUpdateSketchBase::ThetaUpdateSketchBase( + uint8_t lgCurSize, + uint8_t lgNomSize, + resizeFactor rf, + float p, + uint64_t theta, + uint64_t seed, + const A& allocator, + bool isEmpty) + : allocator_(allocator), + isEmpty_(isEmpty), + lgCurSize_(lgCurSize), + lgNomSize_(lgNomSize), + rf_(rf), + p_(p), + numEntries_(0), + theta_(theta), + seed_(seed), + entries_(nullptr) { + if (lgCurSize > 0) { + const size_t size = 1ULL << lgCurSize; + entries_ = allocator_.allocate(size); + for (size_t i = 0; i < size; ++i) + EK()(entries_[i]) = 0; + } +} + +template +ThetaUpdateSketchBase::ThetaUpdateSketchBase( + const ThetaUpdateSketchBase& other) + : allocator_(other.allocator_), + isEmpty_(other.isEmpty_), + lgCurSize_(other.lgCurSize_), + lgNomSize_(other.lgNomSize_), + rf_(other.rf_), + p_(other.p_), + numEntries_(other.numEntries_), + theta_(other.theta_), + seed_(other.seed_), + entries_(nullptr) { + if (other.entries_ != nullptr) { + const size_t size = 1ULL << lgCurSize_; + entries_ = allocator_.allocate(size); + for (size_t i = 0; i < size; ++i) { + if (EK()(other.entries_[i]) != 0) { + new (&entries_[i]) EN(other.entries_[i]); + } else { + EK()(entries_[i]) = 0; + } + } + } +} + +template +ThetaUpdateSketchBase::ThetaUpdateSketchBase( + ThetaUpdateSketchBase&& other) noexcept + : allocator_(std::move(other.allocator_)), + isEmpty_(other.isEmpty_), + lgCurSize_(other.lgCurSize_), + lgNomSize_(other.lgNomSize_), + rf_(other.rf_), + p_(other.p_), + numEntries_(other.numEntries_), + theta_(other.theta_), + seed_(other.seed_), + entries_(other.entries_) { + other.entries_ = nullptr; +} + +template +ThetaUpdateSketchBase::~ThetaUpdateSketchBase() { + if (entries_ != nullptr) { + const size_t size = 1ULL << lgCurSize_; + for (size_t i = 0; i < size; ++i) { + if (EK()(entries_[i]) != 0) + entries_[i].~EN(); + } + allocator_.deallocate(entries_, size); + } +} + +template +ThetaUpdateSketchBase& ThetaUpdateSketchBase::operator=( + const ThetaUpdateSketchBase& other) { + ThetaUpdateSketchBase copy(other); + std::swap(allocator_, copy.allocator_); + std::swap(isEmpty_, copy.isEmpty_); + std::swap(lgCurSize_, copy.lgCurSize_); + std::swap(lgNomSize_, copy.lgNomSize_); + std::swap(rf_, copy.rf_); + std::swap(p_, copy.p_); + std::swap(numEntries_, copy.numEntries_); + std::swap(theta_, copy.theta_); + std::swap(seed_, copy.seed_); + std::swap(entries_, copy.entries_); + return *this; +} + +template +ThetaUpdateSketchBase& ThetaUpdateSketchBase::operator=( + ThetaUpdateSketchBase&& other) { + std::swap(allocator_, other.allocator_); + std::swap(isEmpty_, other.isEmpty_); + std::swap(lgCurSize_, other.lgCurSize_); + std::swap(lgNomSize_, other.lgNomSize_); + std::swap(rf_, other.rf_); + std::swap(p_, other.p_); + std::swap(numEntries_, other.numEntries_); + std::swap(theta_, other.theta_); + std::swap(seed_, other.seed_); + std::swap(entries_, other.entries_); + return *this; +} + +template +uint64_t ThetaUpdateSketchBase::hashAndScreen( + const void* data, + size_t length) { + isEmpty_ = false; + const uint64_t hash = computeHash(data, length, seed_); + if (hash >= theta_) + return 0; // hash == 0 is reserved to mark empty slots in the table + return hash; +} + +template +auto ThetaUpdateSketchBase::find(uint64_t key) const + -> std::pair { + return find(entries_, lgCurSize_, key); +} + +template +auto ThetaUpdateSketchBase::find( + EN* entries, + uint8_t lg_size, + uint64_t key) -> std::pair { + const uint32_t size = 1 << lg_size; + const uint32_t mask = size - 1; + const uint32_t stride = getStride(key, lg_size); + uint32_t index = static_cast(key) & mask; + // search for duplicate or zero + const uint32_t loop_index = index; + do { + const uint64_t probe = EK()(entries[index]); + if (probe == 0) { + return std::pair(&entries[index], false); + } else if (probe == key) { + return std::pair(&entries[index], true); + } + index = (index + stride) & mask; + } while (index != loop_index); + throw VeloxRuntimeError( + __FILE__, + __LINE__, + __FUNCTION__, + "", + "key not found and no empty slots!", + error_source::kErrorSourceRuntime, + error_code::kInvalidArgument, + false /*retriable*/); +} + +template +template +void ThetaUpdateSketchBase::insert(iterator it, Fwd&& entry) { + new (it) EN(std::forward(entry)); + ++numEntries_; + if (numEntries_ > getCapacity(lgCurSize_, lgNomSize_)) { + if (lgCurSize_ <= lgNomSize_) { + resize(); + } else { + rebuild(); + } + } +} + +template +auto ThetaUpdateSketchBase::begin() const -> iterator { + return entries_; +} + +template +auto ThetaUpdateSketchBase::end() const -> iterator { + return entries_ + (1ULL << lgCurSize_); +} + +template +uint32_t ThetaUpdateSketchBase::getCapacity( + uint8_t lg_cur_size, + uint8_t lg_nom_size) { + const double fraction = + (lg_cur_size <= lg_nom_size) ? RESIZE_THRESHOLD : REBUILD_THRESHOLD; + return static_cast(std::floor(fraction * (1 << lg_cur_size))); +} + +template +uint32_t ThetaUpdateSketchBase::getStride( + uint64_t key, + uint8_t lg_size) { + // odd and independent of index assuming lg_size lowest bits of the key were + // used for the index + return (2 * static_cast((key >> lg_size) & STRIDE_MASK)) + 1; +} + +template +void ThetaUpdateSketchBase::resize() { + const size_t old_size = 1ULL << lgCurSize_; + const uint8_t lg_new_size = + std::min(lgCurSize_ + static_cast(rf_), lgNomSize_ + 1); + const size_t new_size = 1ULL << lg_new_size; + EN* new_entries = allocator_.allocate(new_size); + for (size_t i = 0; i < new_size; ++i) + EK()(new_entries[i]) = 0; + for (size_t i = 0; i < old_size; ++i) { + const uint64_t key = EK()(entries_[i]); + if (key != 0) { + // always finds an empty slot in a larger table + new (find(new_entries, lg_new_size, key).first) + EN(std::move(entries_[i])); + entries_[i].~EN(); + EK()(entries_[i]) = 0; + } + } + std::swap(entries_, new_entries); + lgCurSize_ = lg_new_size; + allocator_.deallocate(new_entries, old_size); +} + +// assumes number of entries > nominal size +template +void ThetaUpdateSketchBase::rebuild() { + const size_t size = 1ULL << lgCurSize_; + const uint32_t nominal_size = 1 << lgNomSize_; + + // empty entries have uninitialized payloads + // TODO: avoid this for empty or trivial payloads (arithmetic types) + consolidateNonEmpty(entries_, size, numEntries_); + + std::nth_element( + entries_, entries_ + nominal_size, entries_ + numEntries_, comparator()); + this->theta_ = EK()(entries_[nominal_size]); + EN* old_entries = entries_; + const size_t num_old_entries = numEntries_; + entries_ = allocator_.allocate(size); + for (size_t i = 0; i < size; ++i) + EK()(entries_[i]) = 0; + numEntries_ = nominal_size; + // relies on consolidating non-empty entries to the front + for (size_t i = 0; i < nominal_size; ++i) { + new (find(EK()(old_entries[i])).first) EN(std::move(old_entries[i])); + old_entries[i].~EN(); + } + for (size_t i = nominal_size; i < num_old_entries; ++i) + old_entries[i].~EN(); + allocator_.deallocate(old_entries, size); +} + +template +void ThetaUpdateSketchBase::trim() { + if (numEntries_ > static_cast(1 << lgNomSize_)) + rebuild(); +} + +template +void ThetaUpdateSketchBase::reset() { + const size_t cur_size = 1ULL << lgCurSize_; + for (size_t i = 0; i < cur_size; ++i) { + if (EK()(entries_[i]) != 0) { + entries_[i].~EN(); + EK()(entries_[i]) = 0; + } + } + const uint8_t starting_lg_size = ThetaBuildHelper::startingSubMultiple( + lgNomSize_ + 1, ThetaConstants::MIN_LG_K, static_cast(rf_)); + if (starting_lg_size != lgCurSize_) { + allocator_.deallocate(entries_, cur_size); + lgCurSize_ = starting_lg_size; + const size_t new_size = 1ULL << starting_lg_size; + entries_ = allocator_.allocate(new_size); + for (size_t i = 0; i < new_size; ++i) + EK()(entries_[i]) = 0; + } + numEntries_ = 0; + theta_ = ThetaBuildHelper::startingThetaFromP(p_); + isEmpty_ = true; +} + +template +void ThetaUpdateSketchBase::consolidateNonEmpty( + EN* entries, + size_t size, + size_t num) { + // find the first empty slot + size_t i = 0; + while (i < size) { + if (EK()(entries[i]) == 0) + break; + ++i; + } + // scan the rest and move non-empty entries to the front + for (size_t j = i + 1; j < size; ++j) { + if (EK()(entries[j]) != 0) { + new (&entries[i]) EN(std::move(entries[j])); + entries[j].~EN(); + EK()(entries[j]) = 0; + ++i; + if (i == num) + break; + } + } +} + +// builder + +template +ThetaBaseBuilder::ThetaBaseBuilder( + const Allocator& allocator) + : allocator_(allocator), + lg_k_(ThetaConstants::DEFAULT_LG_K), + rf_(ThetaConstants::DEFAULT_RESIZE_FACTOR), + p_(1), + seed_(DEFAULT_SEED) {} + +template +Derived& ThetaBaseBuilder::set_lg_k(uint8_t lg_k) { + if (lg_k < ThetaConstants::MIN_LG_K) { + throw VeloxRuntimeError( + __FILE__, + __LINE__, + __FUNCTION__, + "", + "lg_k must not be less than " + + std::to_string(ThetaConstants::MIN_LG_K) + ": " + std::to_string(lg_k), + error_source::kErrorSourceRuntime, + error_code::kInvalidArgument, + false /*retriable*/); + } + if (lg_k > ThetaConstants::MAX_LG_K) { + throw VeloxRuntimeError( + __FILE__, + __LINE__, + __FUNCTION__, + "", + "lg_k must not be greater than " + + std::to_string(ThetaConstants::MAX_LG_K) + ": " + std::to_string(lg_k), + error_source::kErrorSourceRuntime, + error_code::kInvalidArgument, + false /*retriable*/); + } + lg_k_ = lg_k; + return static_cast(*this); +} + +template +Derived& ThetaBaseBuilder::setResizeFactor( + resizeFactor rf) { + rf_ = rf; + return static_cast(*this); +} + +template +Derived& ThetaBaseBuilder::setP(float p) { + if (p <= 0 || p > 1) { + throw VeloxRuntimeError( + __FILE__, + __LINE__, + __FUNCTION__, + "", + "sampling probability must be between 0 and 1", + error_source::kErrorSourceRuntime, + error_code::kInvalidArgument, + false /*retriable*/); + } + p_ = p; + return static_cast(*this); +} + +template +Derived& ThetaBaseBuilder::setSeed(uint64_t seed) { + seed_ = seed; + return static_cast(*this); +} + +template +uint64_t ThetaBaseBuilder::startingTheta() const { + return ThetaBuildHelper::startingThetaFromP(p_); +} + +template +uint8_t ThetaBaseBuilder::startingLgSize() const { + return ThetaBuildHelper::startingSubMultiple( + lg_k_ + 1, ThetaConstants::MIN_LG_K, static_cast(rf_)); +} + +// iterator + +template +ThetaIterator::ThetaIterator( + Entry* entries, + uint32_t size, + uint32_t index) + : entries_(entries), size_(size), index_(index) { + while (index_ < size_ && ExtractKey()(entries_[index_]) == 0) + ++index_; +} + +template +auto ThetaIterator::operator++() -> ThetaIterator& { + ++index_; + while (index_ < size_ && ExtractKey()(entries_[index_]) == 0) + ++index_; + return *this; +} + +template +auto ThetaIterator::operator++(int) -> ThetaIterator { + ThetaIterator tmp(*this); + operator++(); + return tmp; +} + +template +bool ThetaIterator::operator!=( + const ThetaIterator& other) const { + return index_ != other.index_; +} + +template +bool ThetaIterator::operator==( + const ThetaIterator& other) const { + return index_ == other.index_; +} + +template +auto ThetaIterator::operator*() const -> reference { + return entries_[index_]; +} + +template +auto ThetaIterator::operator->() const -> pointer { + return entries_ + index_; +} + +// const iterator + +template +ThetaConstIterator::ThetaConstIterator( + const Entry* entries, + uint32_t size, + uint32_t index) + : entries_(entries), size_(size), index_(index) { + while (index_ < size_ && ExtractKey()(entries_[index_]) == 0) + ++index_; +} + +template +auto ThetaConstIterator::operator++() + -> ThetaConstIterator& { + ++index_; + while (index_ < size_ && ExtractKey()(entries_[index_]) == 0) + ++index_; + return *this; +} + +template +auto ThetaConstIterator::operator++(int) + -> ThetaConstIterator { + ThetaConstIterator tmp(*this); + operator++(); + return tmp; +} + +template +bool ThetaConstIterator::operator!=( + const ThetaConstIterator& other) const { + return index_ != other.index_; +} + +template +bool ThetaConstIterator::operator==( + const ThetaConstIterator& other) const { + return index_ == other.index_; +} + +template +auto ThetaConstIterator::operator*() const -> reference { + return entries_[index_]; +} + +template +auto ThetaConstIterator::operator->() const -> pointer { + return entries_ + index_; +} + +} // namespace facebook::velox::common::theta + +#endif diff --git a/velox/external/theta/ThetaUpdateSketchBase.h b/velox/external/theta/ThetaUpdateSketchBase.h new file mode 100644 index 00000000000..e0c98928173 --- /dev/null +++ b/velox/external/theta/ThetaUpdateSketchBase.h @@ -0,0 +1,258 @@ +/* +* Copyright (c) Facebook, Inc. and its affiliates. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. + */ + +// Adapted from Apache DataSketches + +#pragma once + +#include +#include + +#include "MurmurHash3.h" +#include "ThetaComparators.h" +#include "ThetaConstants.h" + +namespace facebook::velox::common::theta { + +template +struct ThetaUpdateSketchBase { + using resizeFactor = ThetaConstants::resizeFactor; + using comparator = compareByKey; + + ThetaUpdateSketchBase( + uint8_t lg_cur_size, + uint8_t lg_nom_size, + resizeFactor rf, + float p, + uint64_t theta, + uint64_t seed, + const Allocator& allocator, + bool is_empty = true); + ThetaUpdateSketchBase(const ThetaUpdateSketchBase& other); + ThetaUpdateSketchBase(ThetaUpdateSketchBase&& other) noexcept; + ~ThetaUpdateSketchBase(); + ThetaUpdateSketchBase& operator=(const ThetaUpdateSketchBase& other); + ThetaUpdateSketchBase& operator=(ThetaUpdateSketchBase&& other); + + using iterator = Entry*; + + inline uint64_t hashAndScreen(const void* data, size_t length); + + inline std::pair find(uint64_t key) const; + static inline std::pair + find(Entry* entries, uint8_t lg_size, uint64_t key); + + template + inline void insert(iterator it, FwdEntry&& entry); + + iterator begin() const; + iterator end() const; + + // resize threshold = 0.5 tuned for speed + static constexpr double RESIZE_THRESHOLD = 0.5; + // hash table rebuild threshold = 15/16 + static constexpr double REBUILD_THRESHOLD = 15.0 / 16.0; + + static constexpr uint8_t STRIDE_HASH_BITS = 7; + static constexpr uint32_t STRIDE_MASK = (1 << STRIDE_HASH_BITS) - 1; + + Allocator allocator_; + bool isEmpty_; + uint8_t lgCurSize_; + uint8_t lgNomSize_; + resizeFactor rf_; + float p_; + uint32_t numEntries_; + uint64_t theta_; + uint64_t seed_; + Entry* entries_; + + void resize(); + void rebuild(); + void trim(); + void reset(); + + static inline uint32_t getCapacity(uint8_t lg_cur_size, uint8_t lg_nom_size); + static inline uint32_t getStride(uint64_t key, uint8_t lg_size); + static void consolidateNonEmpty(Entry* entries, size_t size, size_t num); +}; + +/// Theta base builder +template +class ThetaBaseBuilder { + public: + /** + * Creates and instance of the builder with default parameters. + * @param allocator instance of an Allocator to pass to created sketches + */ + ThetaBaseBuilder(const Allocator& allocator); + + /** + * Set log2(k), where k is a nominal number of entries in the sketch + * @param lg_k base 2 logarithm of nominal number of entries + * @return this builder + */ + Derived& set_lg_k(uint8_t lg_k); + + /** + * Set resize factor for the internal hash table (defaults to 8) + * @param rf resize factor + * @return this builder + */ + Derived& setResizeFactor(resizeFactor rf); + + /** + * Set sampling probability (initial theta). The default is 1, so the sketch + * retains all entries until it reaches the limit, at which point it goes into + * the estimation mode and reduces the effective sampling probability (theta) + * as necessary. + * @param p sampling probability + * @return this builder + */ + Derived& setP(float p); + + /** + * Set the seed for the hash function. Should be used carefully if needed. + * Sketches produced with different seed are not compatible + * and cannot be mixed in set operations. + * @param seed hash seed + * @return this builder + */ + Derived& setSeed(uint64_t seed); + + protected: + Allocator allocator_; + uint8_t lg_k_; + resizeFactor rf_; + float p_; + uint64_t seed_; + + uint64_t startingTheta() const; + uint8_t startingLgSize() const; +}; + +// key extractor + +struct trivialExtractKey { + template + auto operator()(T&& entry) const -> decltype(std::forward(entry)) { + return std::forward(entry); + } +}; + +// key not zero + +template +class keyNotZero { + public: + bool operator()(const Entry& entry) const { + return ExtractKey()(entry) != 0; + } +}; + +template +class keyNotZeroLessThan { + public: + explicit keyNotZeroLessThan(const Key& key) : key(key) {} + bool operator()(const Entry& entry) const { + return ExtractKey()(entry) != 0 && ExtractKey()(entry) < this->key; + } + + private: + Key key; +}; + +// MurMur3 hash functions + +static inline uint64_t +computeHash(const void* data, size_t length, uint64_t seed) { + HashState hashes; + MurmurHash3_x64_128(data, length, seed, hashes); + return ( + hashes.h1 >> + 1); // Java implementation does unsigned shift >>> to make values positive +} + +// iterators + +template +class ThetaIterator { + public: + using iterator_category = std::input_iterator_tag; + using value_type = Entry; + using difference_type = std::ptrdiff_t; + using pointer = Entry*; + using reference = Entry&; + + ThetaIterator(Entry* entries, uint32_t size, uint32_t index); + ThetaIterator& operator++(); + ThetaIterator operator++(int); + bool operator==(const ThetaIterator& other) const; + bool operator!=(const ThetaIterator& other) const; + reference operator*() const; + pointer operator->() const; + + private: + Entry* entries_; + uint32_t size_; + uint32_t index_; +}; + +template +class ThetaConstIterator { + public: + using iterator_category = std::input_iterator_tag; + using value_type = const Entry; + using difference_type = std::ptrdiff_t; + using pointer = const Entry*; + using reference = const Entry&; + + ThetaConstIterator(const Entry* entries, uint32_t size, uint32_t index); + ThetaConstIterator& operator++(); + ThetaConstIterator operator++(int); + bool operator==(const ThetaConstIterator& other) const; + bool operator!=(const ThetaConstIterator& other) const; + reference operator*() const; + pointer operator->() const; + + private: + const Entry* entries_; + uint32_t size_; + uint32_t index_; +}; + +// double value canonicalization for compatibility with Java +static inline int64_t canonical_double(double value) { + union { + int64_t long_value; + double double_value; + } long_double_union; + + if (value == 0.0) { + long_double_union.double_value = 0.0; // canonicalize -0.0 to 0.0 + } else if (std::isnan(value)) { + long_double_union.long_value = + 0x7ff8000000000000L; // canonicalize NaN using value from Java's + // Double.doubleToLongBits() + } else { + long_double_union.double_value = value; + } + return long_double_union.long_value; +} + +} // namespace facebook::velox::common::theta + +#include "ThetaUpdateSketchBase.cpp" diff --git a/velox/external/theta/tests/BitPackingTest.cpp b/velox/external/theta/tests/BitPackingTest.cpp new file mode 100644 index 00000000000..0eff4caed1e --- /dev/null +++ b/velox/external/theta/tests/BitPackingTest.cpp @@ -0,0 +1,133 @@ +/* +* Copyright (c) Facebook, Inc. and its affiliates. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +// Adapted from Apache DataSketches + +#include "velox/external/theta/BitPacking.h" + +#include + +namespace facebook::velox::common::theta { + +// for every number of bits from 1 to 63 +// generate pseudo-random data, pack, unpack and compare + +// inverse golden ratio (0.618.. of max uint64_t) +static const uint64_t IGOLDEN64 = 0x9e3779b97f4a7c13ULL; + +TEST(BitPackingTest, packUnpackBits) { + uint64_t value = 0xaa55aa55aa55aa55ULL; // arbitrary starting value + for (int m = 0; m < 10000; ++m) { + for (uint8_t bits = 1; bits <= 63; ++bits) { + int n = 8; + const uint64_t mask = (1ULL << bits) - 1; + std::vector input(n, 0); + for (int i = 0; i < n; ++i) { + input[i] = value & mask; + value += IGOLDEN64; + } + std::vector bytes(n * sizeof(uint64_t), 0); + uint8_t offset = 0; + uint8_t* ptr = bytes.data(); + for (int i = 0; i < n; ++i) { + offset = packBits(input[i], bits, ptr, offset); + } + + std::vector output(n, 0); + offset = 0; + const uint8_t* cptr = bytes.data(); + for (int i = 0; i < n; ++i) { + offset = unpackBits(output[i], bits, cptr, offset); + } + for (int i = 0; i < n; ++i) { + EXPECT_EQ(input[i], output[i]); + } + } + } +} + +TEST(BitPackingTest, packUnpackBlocks) { + uint64_t value = 0xaa55aa55aa55aa55ULL; // arbitrary starting value + for (int n = 0; n < 10000; ++n) { + for (uint8_t bits = 1; bits <= 63; ++bits) { + const uint64_t mask = (1ULL << bits) - 1; + std::vector input(8, 0); + for (int i = 0; i < 8; ++i) { + input[i] = value & mask; + value += IGOLDEN64; + } + std::vector bytes(bits, 0); + packBitsBlock8(input.data(), bytes.data(), bits); + std::vector output(8, 0); + unpackBitsBlock8(output.data(), bytes.data(), bits); + for (int i = 0; i < 8; ++i) { + EXPECT_EQ(input[i], output[i]); + } + } + } +} + +TEST(BitPackingTest, packBitsUnpackBlocks) { + uint64_t value = 0; // arbitrary starting value + for (int m = 0; m < 10000; ++m) { + for (uint8_t bits = 1; bits <= 63; ++bits) { + const uint64_t mask = (1ULL << bits) - 1; + std::vector input(8, 0); + for (int i = 0; i < 8; ++i) { + input[i] = value & mask; + value += IGOLDEN64; + } + std::vector bytes(bits, 0); + uint8_t offset = 0; + uint8_t* ptr = bytes.data(); + for (int i = 0; i < 8; ++i) { + offset = packBits(input[i], bits, ptr, offset); + } + std::vector output(8, 0); + unpackBitsBlock8(output.data(), bytes.data(), bits); + for (int i = 0; i < 8; ++i) { + EXPECT_EQ(input[i], output[i]); + } + } + } +} + +TEST(BitPackingTest, packBlocksUnpackBits) { + uint64_t value = 111; // arbitrary starting value + for (int m = 0; m < 10000; ++m) { + for (uint8_t bits = 1; bits <= 63; ++bits) { + const uint64_t mask = (1ULL << bits) - 1; + std::vector input(8, 0); + for (int i = 0; i < 8; ++i) { + input[i] = value & mask; + value += IGOLDEN64; + } + std::vector bytes(bits, 0); + packBitsBlock8(input.data(), bytes.data(), bits); + std::vector output(8, 0); + uint8_t offset = 0; + const uint8_t* cptr = bytes.data(); + for (int i = 0; i < 8; ++i) { + offset = unpackBits(output[i], bits, cptr, offset); + } + for (int i = 0; i < 8; ++i) { + EXPECT_EQ(input[i], output[i]); + } + } + } +} + +} // namespace facebook::velox::common::theta diff --git a/velox/external/theta/tests/CMakeLists.txt b/velox/external/theta/tests/CMakeLists.txt new file mode 100644 index 00000000000..431e13e1ce9 --- /dev/null +++ b/velox/external/theta/tests/CMakeLists.txt @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +add_executable( + velox_common_theta_test + ThetaSketchTest.cpp + ThetaUnionTest.cpp + BitPackingTest.cpp + TestUtils.h) + +add_test(NAME velox_common_theta_test COMMAND velox_common_theta_test) + +target_link_libraries( + velox_common_theta_test + PRIVATE velox_common_theta GTest::gtest GTest::gtest_main) + +file(COPY test_sketch_files + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) diff --git a/velox/external/theta/tests/TestUtils.h b/velox/external/theta/tests/TestUtils.h new file mode 100644 index 00000000000..a9055b521ab --- /dev/null +++ b/velox/external/theta/tests/TestUtils.h @@ -0,0 +1,204 @@ +/* +* Copyright (c) Facebook, Inc. and its affiliates. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. + */ + +// Adapted from Apache DataSketches + +#pragma once + +#include +#include +#include "velox/common/base/Exceptions.h" + +namespace { +class Approx { + private: + bool marginComparison(double lhs, double rhs, double margin) const { + return (lhs + margin >= rhs) && (rhs + margin >= lhs); + } + + bool equalityComparisonImpl(const double other) const { + // First try with fixed margin, then compute margin based on epsilon, scale + // and Approx's value Thanks to Richard Harris for his help refining the + // scaled margin value + return marginComparison(m_value, other, m_margin) || + marginComparison( + m_value, + other, + m_epsilon * + (m_scale + std::fabs(std::isinf(m_value) ? 0 : m_value))); + } + // Validates the new margin (margin >= 0) + // out-of-line to avoid including stdexcept in the header + void setMargin(double newMargin) { + VELOX_CHECK( + newMargin >= 0, + "Invalid Approx::margin: {} Approx::Margin has to be non-negative.", + newMargin); + m_margin = newMargin; + } + + // Validates the new epsilon (0 < epsilon < 1) + // out-of-line to avoid including stdexcept in the header + void setEpsilon(double newEpsilon) { + VELOX_CHECK( + newEpsilon >= 0 && newEpsilon <= 1.0, + "Invalid Approx::epsilon: {} Approx::epsilon has to be in [0, 1]", + newEpsilon); + m_epsilon = newEpsilon; + } + + public: + explicit Approx(double value) + : m_epsilon(std::numeric_limits::epsilon() * 100), + m_margin(0.0), + m_scale(0.0), + m_value(value) {} + + static Approx custom() { + return Approx(0); + } + + Approx operator-() const { + auto temp(*this); + temp.m_value = -temp.m_value; + return temp; + } + + template < + typename T, + typename = typename std::enable_if< + std::is_constructible::value>::type> + Approx operator()(T const& value) const { + Approx approx(static_cast(value)); + approx.m_epsilon = m_epsilon; + approx.m_margin = m_margin; + approx.m_scale = m_scale; + return approx; + } + + template < + typename T, + typename = typename std::enable_if< + std::is_constructible::value>::type> + explicit Approx(T const& value) : Approx(static_cast(value)) {} + + template < + typename T, + typename = typename std::enable_if< + std::is_constructible::value>::type> + friend bool operator==(const T& lhs, Approx const& rhs) { + auto lhs_v = static_cast(lhs); + return rhs.equalityComparisonImpl(lhs_v); + } + + template < + typename T, + typename = typename std::enable_if< + std::is_constructible::value>::type> + friend bool operator==(Approx const& lhs, const T& rhs) { + return operator==(rhs, lhs); + } + + template < + typename T, + typename = typename std::enable_if< + std::is_constructible::value>::type> + friend bool operator!=(T const& lhs, Approx const& rhs) { + return !operator==(lhs, rhs); + } + + template < + typename T, + typename = typename std::enable_if< + std::is_constructible::value>::type> + friend bool operator!=(Approx const& lhs, T const& rhs) { + return !operator==(rhs, lhs); + } + + template < + typename T, + typename = typename std::enable_if< + std::is_constructible::value>::type> + friend bool operator<=(T const& lhs, Approx const& rhs) { + return static_cast(lhs) < rhs.m_value || lhs == rhs; + } + + template < + typename T, + typename = typename std::enable_if< + std::is_constructible::value>::type> + friend bool operator<=(Approx const& lhs, T const& rhs) { + return lhs.m_value < static_cast(rhs) || lhs == rhs; + } + + template < + typename T, + typename = typename std::enable_if< + std::is_constructible::value>::type> + friend bool operator>=(T const& lhs, Approx const& rhs) { + return static_cast(lhs) > rhs.m_value || lhs == rhs; + } + + template < + typename T, + typename = typename std::enable_if< + std::is_constructible::value>::type> + friend bool operator>=(Approx const& lhs, T const& rhs) { + return lhs.m_value > static_cast(rhs) || lhs == rhs; + } + + template < + typename T, + typename = typename std::enable_if< + std::is_constructible::value>::type> + Approx& epsilon(T const& newEpsilon) { + double epsilonAsDouble = static_cast(newEpsilon); + setEpsilon(epsilonAsDouble); + return *this; + } + + template < + typename T, + typename = typename std::enable_if< + std::is_constructible::value>::type> + Approx& margin(T const& newMargin) { + double marginAsDouble = static_cast(newMargin); + setMargin(marginAsDouble); + return *this; + } + + template < + typename T, + typename = typename std::enable_if< + std::is_constructible::value>::type> + Approx& scale(T const& newScale) { + m_scale = static_cast(newScale); + return *this; + } + + std::string toString() const { + std::stringstream rss; + rss << "Approx( " << m_value << " )"; + return rss.str(); + } + + private: + double m_epsilon; + double m_margin; + double m_scale; + double m_value; +}; +} // namespace diff --git a/velox/external/theta/tests/ThetaSketchTest.cpp b/velox/external/theta/tests/ThetaSketchTest.cpp new file mode 100644 index 00000000000..fb5ee1fced2 --- /dev/null +++ b/velox/external/theta/tests/ThetaSketchTest.cpp @@ -0,0 +1,773 @@ +/* +* Copyright (c) Facebook, Inc. and its affiliates. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. + */ + +// Adapted from Apache DataSketches + +#include "velox/external/theta/ThetaSketch.h" +#include "TestUtils.h" + +#include +#include +#include +#include +#include +#include + +namespace facebook::velox::common::theta { + +const std::string inputPath = "test_sketch_files/"; + +TEST(ThetaSketch, Empty) { + updateThetaSketch update_sketch = updateThetaSketch::builder().build(); + EXPECT_TRUE(update_sketch.isEmpty()); + EXPECT_FALSE(update_sketch.isEstimationMode()); + EXPECT_TRUE(update_sketch.getTheta() == 1.0); + EXPECT_TRUE(update_sketch.getEstimate() == 0.0); + EXPECT_TRUE(update_sketch.getLowerBound(1) == 0.0); + EXPECT_TRUE(update_sketch.getUpperBound(1) == 0.0); + EXPECT_TRUE(update_sketch.isOrdered()); + + compactThetaSketch compact_sketch = update_sketch.compact(); + EXPECT_TRUE(compact_sketch.isEmpty()); + EXPECT_FALSE(compact_sketch.isEstimationMode()); + EXPECT_TRUE(compact_sketch.getTheta() == 1.0); + EXPECT_TRUE(compact_sketch.getEstimate() == 0.0); + EXPECT_TRUE(compact_sketch.getLowerBound(1) == 0.0); + EXPECT_TRUE(compact_sketch.getUpperBound(1) == 0.0); + EXPECT_TRUE(compact_sketch.isOrdered()); + + // empty is forced to be ordered + EXPECT_TRUE(update_sketch.compact(false).isOrdered()); +} + +TEST(ThetaSketch, NonEmptyNoRetainedKeys) { + updateThetaSketch update_sketch = + updateThetaSketch::builder().setP(0.001f).build(); + update_sketch.update(1); + EXPECT_TRUE(update_sketch.getNumRetained() == 0); + EXPECT_FALSE(update_sketch.isEmpty()); + EXPECT_TRUE(update_sketch.isEstimationMode()); + EXPECT_TRUE(update_sketch.getEstimate() == 0.0); + EXPECT_TRUE(update_sketch.getLowerBound(1) == 0.0); + EXPECT_TRUE(update_sketch.getUpperBound(1) > 0); + + compactThetaSketch compact_sketch = update_sketch.compact(); + EXPECT_TRUE(compact_sketch.getNumRetained() == 0); + EXPECT_FALSE(compact_sketch.isEmpty()); + EXPECT_TRUE(compact_sketch.isEstimationMode()); + EXPECT_TRUE(compact_sketch.getEstimate() == 0.0); + EXPECT_TRUE(compact_sketch.getLowerBound(1) == 0.0); + EXPECT_TRUE(compact_sketch.getUpperBound(1) > 0); + + update_sketch.reset(); + EXPECT_TRUE(update_sketch.isEmpty()); + EXPECT_FALSE(update_sketch.isEstimationMode()); + EXPECT_TRUE(update_sketch.getTheta() == 1.0); + EXPECT_TRUE(update_sketch.getEstimate() == 0.0); + EXPECT_TRUE(update_sketch.getLowerBound(1) == 0.0); + EXPECT_TRUE(update_sketch.getUpperBound(1) == 0.0); +} + +TEST(ThetaSketch, SingleItem) { + updateThetaSketch update_sketch = updateThetaSketch::builder().build(); + update_sketch.update(1); + EXPECT_FALSE(update_sketch.isEmpty()); + EXPECT_FALSE(update_sketch.isEstimationMode()); + EXPECT_TRUE(update_sketch.getTheta() == 1.0); + EXPECT_TRUE(update_sketch.getEstimate() == 1.0); + EXPECT_TRUE(update_sketch.getLowerBound(1) == 1.0); + EXPECT_TRUE(update_sketch.getUpperBound(1) == 1.0); + EXPECT_TRUE(update_sketch.isOrdered()); // one item is ordered + + compactThetaSketch compact_sketch = update_sketch.compact(); + EXPECT_FALSE(compact_sketch.isEmpty()); + EXPECT_FALSE(compact_sketch.isEstimationMode()); + EXPECT_TRUE(compact_sketch.getTheta() == 1.0); + EXPECT_TRUE(compact_sketch.getEstimate() == 1.0); + EXPECT_TRUE(compact_sketch.getLowerBound(1) == 1.0); + EXPECT_TRUE(compact_sketch.getUpperBound(1) == 1.0); + EXPECT_TRUE(compact_sketch.isOrdered()); + + // single item is forced to be ordered + EXPECT_TRUE(update_sketch.compact(false).isOrdered()); +} + +TEST(ThetaSketch, ResizeExact) { + updateThetaSketch update_sketch = updateThetaSketch::builder().build(); + for (int i = 0; i < 2000; i++) + update_sketch.update(i); + EXPECT_FALSE(update_sketch.isEmpty()); + EXPECT_FALSE(update_sketch.isEstimationMode()); + EXPECT_TRUE(update_sketch.getTheta() == 1.0); + EXPECT_TRUE(update_sketch.getEstimate() == 2000.0); + EXPECT_TRUE(update_sketch.getLowerBound(1) == 2000.0); + EXPECT_TRUE(update_sketch.getUpperBound(1) == 2000.0); + EXPECT_FALSE(update_sketch.isOrdered()); + + compactThetaSketch compact_sketch = update_sketch.compact(); + EXPECT_FALSE(compact_sketch.isEmpty()); + EXPECT_FALSE(compact_sketch.isEstimationMode()); + EXPECT_TRUE(compact_sketch.getTheta() == 1.0); + EXPECT_TRUE(compact_sketch.getEstimate() == 2000.0); + EXPECT_TRUE(compact_sketch.getLowerBound(1) == 2000.0); + EXPECT_TRUE(compact_sketch.getUpperBound(1) == 2000.0); + EXPECT_TRUE(compact_sketch.isOrdered()); + + update_sketch.reset(); + EXPECT_TRUE(update_sketch.isEmpty()); + EXPECT_FALSE(update_sketch.isEstimationMode()); + EXPECT_TRUE(update_sketch.getTheta() == 1.0); + EXPECT_TRUE(update_sketch.getEstimate() == 0.0); + EXPECT_TRUE(update_sketch.getLowerBound(1) == 0.0); + EXPECT_TRUE(update_sketch.getUpperBound(1) == 0.0); + EXPECT_TRUE(update_sketch.isOrdered()); +} + +TEST(ThetaSketch, estimation) { + updateThetaSketch update_sketch = + updateThetaSketch::builder() + .setResizeFactor(updateThetaSketch::resizeFactor::X1) + .build(); + const int n = 8000; + for (int i = 0; i < n; i++) + update_sketch.update(i); + // std::cerr << update_sketch.to_string(); + EXPECT_FALSE(update_sketch.isEmpty()); + EXPECT_TRUE(update_sketch.isEstimationMode()); + EXPECT_TRUE(update_sketch.getTheta() < 1.0); + EXPECT_TRUE( + update_sketch.getEstimate() == Approx((double)n).margin(n * 0.01)); + EXPECT_TRUE(update_sketch.getLowerBound(1) < n); + EXPECT_TRUE(update_sketch.getUpperBound(1) > n); + + const uint32_t k = 1 << ThetaConstants::DEFAULT_LG_K; + EXPECT_TRUE(update_sketch.getNumRetained() >= k); + update_sketch.trim(); + EXPECT_TRUE(update_sketch.getNumRetained() == k); + + compactThetaSketch compact_sketch = update_sketch.compact(); + EXPECT_FALSE(compact_sketch.isEmpty()); + EXPECT_TRUE(compact_sketch.isOrdered()); + EXPECT_TRUE(compact_sketch.isEstimationMode()); + EXPECT_TRUE(compact_sketch.getTheta() < 1.0); + EXPECT_TRUE( + compact_sketch.getEstimate() == Approx((double)n).margin(n * 0.01)); + EXPECT_TRUE(compact_sketch.getLowerBound(1) < n); + EXPECT_TRUE(compact_sketch.getUpperBound(1) > n); +} + +TEST(ThetaSketch, DeserializeCompactV1EmptyFromJava) { + std::ifstream is; + is.exceptions(std::ios::failbit | std::ios::badbit); + is.open(inputPath + "theta_compact_empty_from_java_v1.sk", std::ios::binary); + auto sketch = compactThetaSketch::deserialize(is); + EXPECT_TRUE(sketch.isEmpty()); + EXPECT_FALSE(sketch.isEstimationMode()); + EXPECT_TRUE(sketch.getNumRetained() == 0); + EXPECT_TRUE(sketch.getTheta() == 1.0); + EXPECT_TRUE(sketch.getEstimate() == 0.0); + EXPECT_TRUE(sketch.getLowerBound(1) == 0.0); + EXPECT_TRUE(sketch.getUpperBound(1) == 0.0); +} + +TEST(ThetaSketch, DeserializeCompactV2EmptyFromJava) { + std::ifstream is; + is.exceptions(std::ios::failbit | std::ios::badbit); + is.open(inputPath + "theta_compact_empty_from_java_v2.sk", std::ios::binary); + auto sketch = compactThetaSketch::deserialize(is); + EXPECT_TRUE(sketch.isEmpty()); + EXPECT_FALSE(sketch.isEstimationMode()); + EXPECT_TRUE(sketch.getNumRetained() == 0); + EXPECT_TRUE(sketch.getTheta() == 1.0); + EXPECT_TRUE(sketch.getEstimate() == 0.0); + EXPECT_TRUE(sketch.getLowerBound(1) == 0.0); + EXPECT_TRUE(sketch.getUpperBound(1) == 0.0); +} + +TEST(ThetaSketch, DeserializeCompactV1EstimationFromJava) { + std::ifstream is; + is.exceptions(std::ios::failbit | std::ios::badbit); + is.open( + inputPath + "theta_compact_estimation_from_java_v1.sk", std::ios::binary); + auto sketch = compactThetaSketch::deserialize(is); + EXPECT_FALSE(sketch.isEmpty()); + EXPECT_TRUE(sketch.isEstimationMode()); + EXPECT_TRUE(sketch.isOrdered()); + EXPECT_TRUE(sketch.getNumRetained() == 4342); + EXPECT_TRUE(sketch.getTheta() == Approx(0.531700444213199).margin(1e-10)); + EXPECT_TRUE(sketch.getEstimate() == Approx(8166.25234614053).margin(1e-10)); + EXPECT_TRUE( + sketch.getLowerBound(2) == Approx(7996.956955317471).margin(1e-10)); + EXPECT_TRUE( + sketch.getUpperBound(2) == Approx(8339.090301078124).margin(1e-10)); + + // the same construction process in Java must have produced exactly the same + // sketch + updateThetaSketch update_sketch = updateThetaSketch::builder().build(); + const int n = 8192; + for (int i = 0; i < n; i++) + update_sketch.update(i); + EXPECT_TRUE(sketch.getNumRetained() == update_sketch.getNumRetained()); + EXPECT_TRUE( + sketch.getTheta() == Approx(update_sketch.getTheta()).margin(1e-10)); + EXPECT_TRUE( + sketch.getEstimate() == + Approx(update_sketch.getEstimate()).margin(1e-10)); + EXPECT_TRUE( + sketch.getLowerBound(1) == + Approx(update_sketch.getLowerBound(1)).margin(1e-10)); + EXPECT_TRUE( + sketch.getUpperBound(1) == + Approx(update_sketch.getUpperBound(1)).margin(1e-10)); + EXPECT_TRUE( + sketch.getLowerBound(2) == + Approx(update_sketch.getLowerBound(2)).margin(1e-10)); + EXPECT_TRUE( + sketch.getUpperBound(2) == + Approx(update_sketch.getUpperBound(2)).margin(1e-10)); + EXPECT_TRUE( + sketch.getLowerBound(3) == + Approx(update_sketch.getLowerBound(3)).margin(1e-10)); + EXPECT_TRUE( + sketch.getUpperBound(3) == + Approx(update_sketch.getUpperBound(3)).margin(1e-10)); + compactThetaSketch compact_sketch = update_sketch.compact(); + // the sketches are ordered, so the iteration sequence must match exactly + auto iter = sketch.begin(); + for (const auto& key : compact_sketch) { + EXPECT_TRUE(*iter == key); + ++iter; + } +} + +TEST(ThetaSketch, DeserializeCompactV2EstimationFromJava) { + std::ifstream is; + is.exceptions(std::ios::failbit | std::ios::badbit); + is.open( + inputPath + "theta_compact_estimation_from_java_v2.sk", std::ios::binary); + auto sketch = compactThetaSketch::deserialize(is); + EXPECT_FALSE(sketch.isEmpty()); + EXPECT_TRUE(sketch.isEstimationMode()); + EXPECT_TRUE(sketch.isOrdered()); + EXPECT_TRUE(sketch.getNumRetained() == 4342); + EXPECT_TRUE(sketch.getTheta() == Approx(0.531700444213199).margin(1e-10)); + EXPECT_TRUE(sketch.getEstimate() == Approx(8166.25234614053).margin(1e-10)); + EXPECT_TRUE( + sketch.getLowerBound(2) == Approx(7996.956955317471).margin(1e-10)); + EXPECT_TRUE( + sketch.getUpperBound(2) == Approx(8339.090301078124).margin(1e-10)); + + // the same construction process in Java must have produced exactly the same + // sketch + updateThetaSketch update_sketch = updateThetaSketch::builder().build(); + const int n = 8192; + for (int i = 0; i < n; i++) + update_sketch.update(i); + EXPECT_TRUE(sketch.getNumRetained() == update_sketch.getNumRetained()); + EXPECT_TRUE( + sketch.getTheta() == Approx(update_sketch.getTheta()).margin(1e-10)); + EXPECT_TRUE( + sketch.getEstimate() == + Approx(update_sketch.getEstimate()).margin(1e-10)); + EXPECT_TRUE( + sketch.getLowerBound(1) == + Approx(update_sketch.getLowerBound(1)).margin(1e-10)); + EXPECT_TRUE( + sketch.getUpperBound(1) == + Approx(update_sketch.getUpperBound(1)).margin(1e-10)); + EXPECT_TRUE( + sketch.getLowerBound(2) == + Approx(update_sketch.getLowerBound(2)).margin(1e-10)); + EXPECT_TRUE( + sketch.getUpperBound(2) == + Approx(update_sketch.getUpperBound(2)).margin(1e-10)); + EXPECT_TRUE( + sketch.getLowerBound(3) == + Approx(update_sketch.getLowerBound(3)).margin(1e-10)); + EXPECT_TRUE( + sketch.getUpperBound(3) == + Approx(update_sketch.getUpperBound(3)).margin(1e-10)); + compactThetaSketch compact_sketch = update_sketch.compact(); + // the sketches are ordered, so the iteration sequence must match exactly + auto iter = sketch.begin(); + for (const auto& key : compact_sketch) { + EXPECT_TRUE(*iter == key); + ++iter; + } +} + +TEST(ThetaSketch, DerializeDeserializeStreamAndBytesEquivalence) { + updateThetaSketch update_sketch = updateThetaSketch::builder().build(); + const int n = 8192; + for (int i = 0; i < n; i++) + update_sketch.update(i); + + std::stringstream s(std::ios::in | std::ios::out | std::ios::binary); + auto compact_sketch = update_sketch.compact(); + compact_sketch.serialize(s); + auto bytes = compact_sketch.serialize(); + EXPECT_TRUE(bytes.size() == static_cast(s.tellp())); + EXPECT_TRUE(bytes.size() == compact_sketch.getSerializedSizeBytes()); + for (size_t i = 0; i < bytes.size(); ++i) { + EXPECT_TRUE(((char*)bytes.data())[i] == (char)s.get()); + } + + s.seekg(0); // rewind + compactThetaSketch deserialized_sketch1 = compactThetaSketch::deserialize(s); + compactThetaSketch deserialized_sketch2 = + compactThetaSketch::deserialize(bytes.data(), bytes.size()); + EXPECT_TRUE(bytes.size() == static_cast(s.tellg())); + EXPECT_TRUE(deserialized_sketch2.isEmpty() == deserialized_sketch1.isEmpty()); + EXPECT_TRUE( + deserialized_sketch2.isOrdered() == deserialized_sketch1.isOrdered()); + EXPECT_TRUE( + deserialized_sketch2.getNumRetained() == + deserialized_sketch1.getNumRetained()); + EXPECT_TRUE( + deserialized_sketch2.getTheta() == deserialized_sketch1.getTheta()); + EXPECT_TRUE( + deserialized_sketch2.getEstimate() == deserialized_sketch1.getEstimate()); + EXPECT_TRUE( + deserialized_sketch2.getLowerBound(1) == + deserialized_sketch1.getLowerBound(1)); + EXPECT_TRUE( + deserialized_sketch2.getUpperBound(1) == + deserialized_sketch1.getUpperBound(1)); + // the sketches are ordered, so the iteration sequence must match exactly + auto iter = deserialized_sketch1.begin(); + for (auto key : deserialized_sketch2) { + EXPECT_TRUE(*iter == key); + ++iter; + } +} + +TEST(ThetaSketch, DeserializeEmptyBufferOverrun) { + updateThetaSketch update_sketch = updateThetaSketch::builder().build(); + auto bytes = update_sketch.compact().serialize(); + EXPECT_TRUE(bytes.size() == 8); + EXPECT_THROW( + compactThetaSketch::deserialize(bytes.data(), bytes.size() - 1), + VeloxUserError); +} + +TEST(ThetaSketch, DeserializeSingleItemBufferOverrun) { + updateThetaSketch update_sketch = updateThetaSketch::builder().build(); + update_sketch.update(1); + auto bytes = update_sketch.compact().serialize(); + EXPECT_THROW( + compactThetaSketch::deserialize(bytes.data(), 7), VeloxUserError); + EXPECT_THROW( + compactThetaSketch::deserialize(bytes.data(), bytes.size() - 1), + VeloxUserError); +} + +TEST(ThetaSketch, DeserializeExactModeBufferOverrun) { + updateThetaSketch update_sketch = updateThetaSketch::builder().build(); + for (int i = 0; i < 1000; ++i) + update_sketch.update(i); + auto bytes = update_sketch.compact().serialize(); + EXPECT_THROW( + compactThetaSketch::deserialize(bytes.data(), 7), VeloxUserError); + EXPECT_THROW( + compactThetaSketch::deserialize(bytes.data(), 8), VeloxUserError); + EXPECT_THROW( + compactThetaSketch::deserialize(bytes.data(), 16), VeloxUserError); + EXPECT_THROW( + compactThetaSketch::deserialize(bytes.data(), bytes.size() - 1), + VeloxUserError); +} + +TEST(ThetaSketch, DeserializeEstimationModeBufferOverrun) { + updateThetaSketch update_sketch = updateThetaSketch::builder().build(); + for (int i = 0; i < 10000; ++i) + update_sketch.update(i); + auto bytes = update_sketch.compact().serialize(); + EXPECT_THROW( + compactThetaSketch::deserialize(bytes.data(), 7), VeloxUserError); + EXPECT_THROW( + compactThetaSketch::deserialize(bytes.data(), 8), VeloxUserError); + EXPECT_THROW( + compactThetaSketch::deserialize(bytes.data(), 16), VeloxUserError); + EXPECT_THROW( + compactThetaSketch::deserialize(bytes.data(), 24), VeloxUserError); + EXPECT_THROW( + compactThetaSketch::deserialize(bytes.data(), bytes.size() - 1), + VeloxUserError); +} + +TEST(ThetaSketch, ConversionConstructorAndWrappedCompact) { + updateThetaSketch update_sketch = updateThetaSketch::builder().build(); + const int n = 8192; + for (int i = 0; i < n; i++) + update_sketch.update(i); + + // unordered + auto unordered_compact1 = update_sketch.compact(false); + compactThetaSketch unordered_compact2(update_sketch, false); + auto it = unordered_compact1.begin(); + for (auto entry : unordered_compact2) { + EXPECT_TRUE(*it == entry); + ++it; + } + + // ordered + auto ordered_compact1 = update_sketch.compact(); + compactThetaSketch ordered_compact2(update_sketch, true); + it = ordered_compact1.begin(); + for (auto entry : ordered_compact2) { + EXPECT_TRUE(*it == entry); + ++it; + } + + // wrapped compact + auto bytes = ordered_compact1.serialize(); + auto ordered_compact3 = + wrappedCompactThetaSketch::wrap(bytes.data(), bytes.size()); + it = ordered_compact1.begin(); + for (auto entry : ordered_compact3) { + EXPECT_TRUE(*it == entry); + ++it; + } + EXPECT_TRUE(ordered_compact3.getEstimate() == ordered_compact1.getEstimate()); + EXPECT_TRUE( + ordered_compact3.getLowerBound(1) == ordered_compact1.getLowerBound(1)); + EXPECT_TRUE( + ordered_compact3.getUpperBound(1) == ordered_compact1.getUpperBound(1)); + EXPECT_TRUE( + ordered_compact3.isEstimationMode() == + ordered_compact1.isEstimationMode()); + EXPECT_TRUE(ordered_compact3.getTheta() == ordered_compact1.getTheta()); + + // seed mismatch + EXPECT_THROW( + wrappedCompactThetaSketch::wrap(bytes.data(), bytes.size(), 0), + VeloxUserError); +} + +TEST(ThetaSketch, WrapCompactV1EmptyFromJava) { + std::ifstream is; + is.exceptions(std::ios::failbit | std::ios::badbit); + is.open( + inputPath + "theta_compact_empty_from_java_v1.sk", + std::ios::binary | std::ios::ate); + + std::vector buf; + if (is) { + auto size = is.tellg(); + buf.reserve(size); + buf.assign(size, 0); + is.seekg(0, std::ios_base::beg); + is.read((char*)(buf.data()), buf.size()); + } + + auto sketch = wrappedCompactThetaSketch::wrap(buf.data(), buf.size()); + EXPECT_TRUE(sketch.isEmpty()); + EXPECT_FALSE(sketch.isEstimationMode()); + EXPECT_TRUE(sketch.getNumRetained() == 0); + EXPECT_TRUE(sketch.getTheta() == 1.0); + EXPECT_TRUE(sketch.getEstimate() == 0.0); + EXPECT_TRUE(sketch.getLowerBound(1) == 0.0); + EXPECT_TRUE(sketch.getUpperBound(1) == 0.0); +} + +TEST(ThetaSketch, WrapCompactV2EmptyFromJava) { + std::ifstream is; + is.exceptions(std::ios::failbit | std::ios::badbit); + is.open( + inputPath + "theta_compact_empty_from_java_v2.sk", + std::ios::binary | std::ios::ate); + + std::vector buf; + if (is) { + auto size = is.tellg(); + buf.reserve(size); + buf.assign(size, 0); + is.seekg(0, std::ios_base::beg); + is.read((char*)(buf.data()), buf.size()); + } + + auto sketch = wrappedCompactThetaSketch::wrap(buf.data(), buf.size()); + EXPECT_TRUE(sketch.isEmpty()); + EXPECT_FALSE(sketch.isEstimationMode()); + EXPECT_TRUE(sketch.getNumRetained() == 0); + EXPECT_TRUE(sketch.getTheta() == 1.0); + EXPECT_TRUE(sketch.getEstimate() == 0.0); + EXPECT_TRUE(sketch.getLowerBound(1) == 0.0); + EXPECT_TRUE(sketch.getUpperBound(1) == 0.0); +} + +TEST(ThetaSketch, WrapCompactV1EstimationFromJava) { + std::ifstream is; + is.exceptions(std::ios::failbit | std::ios::badbit); + is.open( + inputPath + "theta_compact_estimation_from_java_v1.sk", + std::ios::binary | std::ios::ate); + std::vector buf; + if (is) { + auto size = is.tellg(); + buf.reserve(size); + buf.assign(size, 0); + is.seekg(0, std::ios_base::beg); + is.read((char*)(buf.data()), buf.size()); + } + + auto sketch = wrappedCompactThetaSketch::wrap(buf.data(), buf.size()); + EXPECT_FALSE(sketch.isEmpty()); + EXPECT_TRUE(sketch.isEstimationMode()); + // EXPECT_TRUE(sketch.isOrdered()); // v1 may not be ordered + EXPECT_TRUE(sketch.getNumRetained() == 4342); + EXPECT_TRUE(sketch.getTheta() == Approx(0.531700444213199).margin(1e-10)); + EXPECT_TRUE(sketch.getEstimate() == Approx(8166.25234614053).margin(1e-10)); + EXPECT_TRUE( + sketch.getLowerBound(2) == Approx(7996.956955317471).margin(1e-10)); + EXPECT_TRUE( + sketch.getUpperBound(2) == Approx(8339.090301078124).margin(1e-10)); + + // the same construction process in Java must have produced exactly the same + // sketch + updateThetaSketch update_sketch = updateThetaSketch::builder().build(); + const int n = 8192; + for (int i = 0; i < n; i++) + update_sketch.update(i); + EXPECT_TRUE(sketch.getNumRetained() == update_sketch.getNumRetained()); + EXPECT_TRUE( + sketch.getTheta() == Approx(update_sketch.getTheta()).margin(1e-10)); + EXPECT_TRUE( + sketch.getEstimate() == + Approx(update_sketch.getEstimate()).margin(1e-10)); + EXPECT_TRUE( + sketch.getLowerBound(1) == + Approx(update_sketch.getLowerBound(1)).margin(1e-10)); + EXPECT_TRUE( + sketch.getUpperBound(1) == + Approx(update_sketch.getUpperBound(1)).margin(1e-10)); + EXPECT_TRUE( + sketch.getLowerBound(2) == + Approx(update_sketch.getLowerBound(2)).margin(1e-10)); + EXPECT_TRUE( + sketch.getUpperBound(2) == + Approx(update_sketch.getUpperBound(2)).margin(1e-10)); + EXPECT_TRUE( + sketch.getLowerBound(3) == + Approx(update_sketch.getLowerBound(3)).margin(1e-10)); + EXPECT_TRUE( + sketch.getUpperBound(3) == + Approx(update_sketch.getUpperBound(3)).margin(1e-10)); + compactThetaSketch compact_sketch = update_sketch.compact(); + // the sketches are ordered, so the iteration sequence must match exactly + auto iter = sketch.begin(); + for (const auto key : compact_sketch) { + EXPECT_TRUE(*iter == key); + ++iter; + } +} + +TEST(ThetaSketch, WrapCompactV2EstimationFromJava) { + std::ifstream is; + is.exceptions(std::ios::failbit | std::ios::badbit); + is.open( + inputPath + "theta_compact_estimation_from_java_v2.sk", + std::ios::binary | std::ios::ate); + std::vector buf; + if (is) { + auto size = is.tellg(); + buf.reserve(size); + buf.assign(size, 0); + is.seekg(0, std::ios_base::beg); + is.read((char*)(buf.data()), buf.size()); + } + + auto sketch = wrappedCompactThetaSketch::wrap(buf.data(), buf.size()); + EXPECT_FALSE(sketch.isEmpty()); + EXPECT_TRUE(sketch.isEstimationMode()); + // EXPECT_TRUE(sketch.isOrdered()); // v1 may not be ordered + EXPECT_TRUE(sketch.getNumRetained() == 4342); + EXPECT_TRUE(sketch.getTheta() == Approx(0.531700444213199).margin(1e-10)); + EXPECT_TRUE(sketch.getEstimate() == Approx(8166.25234614053).margin(1e-10)); + EXPECT_TRUE( + sketch.getLowerBound(2) == Approx(7996.956955317471).margin(1e-10)); + EXPECT_TRUE( + sketch.getUpperBound(2) == Approx(8339.090301078124).margin(1e-10)); + + // the same construction process in Java must have produced exactly the same + // sketch + updateThetaSketch update_sketch = updateThetaSketch::builder().build(); + const int n = 8192; + for (int i = 0; i < n; i++) + update_sketch.update(i); + EXPECT_TRUE(sketch.getNumRetained() == update_sketch.getNumRetained()); + EXPECT_TRUE( + sketch.getTheta() == Approx(update_sketch.getTheta()).margin(1e-10)); + EXPECT_TRUE( + sketch.getEstimate() == + Approx(update_sketch.getEstimate()).margin(1e-10)); + EXPECT_TRUE( + sketch.getLowerBound(1) == + Approx(update_sketch.getLowerBound(1)).margin(1e-10)); + EXPECT_TRUE( + sketch.getUpperBound(1) == + Approx(update_sketch.getUpperBound(1)).margin(1e-10)); + EXPECT_TRUE( + sketch.getLowerBound(2) == + Approx(update_sketch.getLowerBound(2)).margin(1e-10)); + EXPECT_TRUE( + sketch.getUpperBound(2) == + Approx(update_sketch.getUpperBound(2)).margin(1e-10)); + EXPECT_TRUE( + sketch.getLowerBound(3) == + Approx(update_sketch.getLowerBound(3)).margin(1e-10)); + EXPECT_TRUE( + sketch.getUpperBound(3) == + Approx(update_sketch.getUpperBound(3)).margin(1e-10)); + compactThetaSketch compact_sketch = update_sketch.compact(); + // the sketches are ordered, so the iteration sequence must match exactly + auto iter = sketch.begin(); + for (const auto key : compact_sketch) { + EXPECT_TRUE(*iter == key); + ++iter; + } +} + +TEST(ThetaSketch, SerializeDeserializeSmallCompressed) { + auto update_sketch = updateThetaSketch::builder().build(); + for (int i = 0; i < 10; i++) + update_sketch.update(i); + auto compact_sketch = update_sketch.compact(); + + auto bytes = compact_sketch.serializeCompressed(); + EXPECT_TRUE(bytes.size() == compact_sketch.getSerializedSizeBytes(true)); + { // deserialize bytes + auto deserialized_sketch = + compactThetaSketch::deserialize(bytes.data(), bytes.size()); + EXPECT_TRUE( + deserialized_sketch.getNumRetained() == + compact_sketch.getNumRetained()); + EXPECT_TRUE(deserialized_sketch.getTheta() == compact_sketch.getTheta()); + auto iter = deserialized_sketch.begin(); + for (const auto key : compact_sketch) { + EXPECT_TRUE(*iter == key); + ++iter; + } + } + { // wrap bytes + auto wrapped_sketch = + wrappedCompactThetaSketch::wrap(bytes.data(), bytes.size()); + EXPECT_TRUE( + wrapped_sketch.getNumRetained() == compact_sketch.getNumRetained()); + EXPECT_TRUE(wrapped_sketch.getTheta() == compact_sketch.getTheta()); + auto iter = wrapped_sketch.begin(); + for (const auto key : compact_sketch) { + EXPECT_TRUE(*iter == key); + ++iter; + } + } + + std::stringstream s(std::ios::in | std::ios::out | std::ios::binary); + compact_sketch.serializeCompressed(s); + EXPECT_TRUE( + static_cast(s.tellp()) == + compact_sketch.getSerializedSizeBytes(true)); + auto deserialized_sketch = compactThetaSketch::deserialize(s); + EXPECT_TRUE( + deserialized_sketch.getNumRetained() == compact_sketch.getNumRetained()); + EXPECT_TRUE(deserialized_sketch.getTheta() == compact_sketch.getTheta()); + auto iter = deserialized_sketch.begin(); + for (const auto key : compact_sketch) { + EXPECT_TRUE(*iter == key); + ++iter; + } +} + +TEST(ThetaSketch, SerializeDeserializeCompressed) { + auto update_sketch = updateThetaSketch::builder().build(); + for (int i = 0; i < 10000; i++) + update_sketch.update(i); + auto compact_sketch = update_sketch.compact(); + + auto bytes = compact_sketch.serializeCompressed(); + EXPECT_TRUE(bytes.size() == compact_sketch.getSerializedSizeBytes(true)); + { // deserialize bytes + auto deserialized_sketch = + compactThetaSketch::deserialize(bytes.data(), bytes.size()); + EXPECT_TRUE( + deserialized_sketch.getNumRetained() == + compact_sketch.getNumRetained()); + EXPECT_TRUE(deserialized_sketch.getTheta() == compact_sketch.getTheta()); + auto iter = deserialized_sketch.begin(); + for (const auto key : compact_sketch) { + EXPECT_TRUE(*iter == key); + ++iter; + } + } + { // wrap bytes + auto wrapped_sketch = + wrappedCompactThetaSketch::wrap(bytes.data(), bytes.size()); + EXPECT_TRUE( + wrapped_sketch.getNumRetained() == compact_sketch.getNumRetained()); + EXPECT_TRUE(wrapped_sketch.getTheta() == compact_sketch.getTheta()); + auto iter = wrapped_sketch.begin(); + for (const auto key : compact_sketch) { + EXPECT_TRUE(*iter == key); + ++iter; + } + } + + std::stringstream s(std::ios::in | std::ios::out | std::ios::binary); + compact_sketch.serializeCompressed(s); + EXPECT_TRUE( + static_cast(s.tellp()) == + compact_sketch.getSerializedSizeBytes(true)); + auto deserialized_sketch = compactThetaSketch::deserialize(s); + EXPECT_TRUE( + deserialized_sketch.getNumRetained() == compact_sketch.getNumRetained()); + EXPECT_TRUE(deserialized_sketch.getTheta() == compact_sketch.getTheta()); + auto iter = deserialized_sketch.begin(); + for (const auto key : compact_sketch) { + EXPECT_TRUE(*iter == key); + ++iter; + } +} + +// The sketch reaches capacity for the first time at 2 * K * 15/16, +// but at that point it is still in exact mode, so the serialized size is not +// the maximum (theta in not serialized in the exact mode). So we need to catch +// the second time, but some updates will be ignored in the estimation mode, so +// we update more than enough times keeping track of the maximum. Potentially +// the exact number of updates to reach the peak can be figured out given this +// particular sequence, but not assuming that might be even better (say, in case +// we change the load factor or hash function or just out of principle not to +// rely on implementation details too much). +TEST(ThetaSketch, maxSerializedSize) { + const uint8_t lg_k = 10; + auto sketch = updateThetaSketch::builder().set_lg_k(lg_k).build(); + int value = 0; + + // this will go over the first peak, which is not the highest + for (int i = 0; i < (1 << lg_k) * 2; ++i) + sketch.update(value++); + + // this will to over the second peak keeping track of the max size + size_t max_size_bytes = 0; + for (int i = 0; i < (1 << lg_k) * 2; ++i) { + sketch.update(value++); + auto bytes = sketch.compact().serialize(); + max_size_bytes = std::max(max_size_bytes, bytes.size()); + } + EXPECT_TRUE( + max_size_bytes == compactThetaSketch::getMaxSerializedSizeBytes(lg_k)); +} + +} // namespace facebook::velox::common::theta diff --git a/velox/external/theta/tests/ThetaUnionTest.cpp b/velox/external/theta/tests/ThetaUnionTest.cpp new file mode 100644 index 00000000000..5740f33abc4 --- /dev/null +++ b/velox/external/theta/tests/ThetaUnionTest.cpp @@ -0,0 +1,164 @@ +/* +* Copyright (c) Facebook, Inc. and its affiliates. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. + */ + +// Adapted from Apache DataSketches + +#include "velox/external/theta/ThetaUnion.h" +#include "TestUtils.h" + +#include +#include + +namespace facebook::velox::common::theta { + +TEST(ThetaUnion, empty) { + updateThetaSketch sketch1 = updateThetaSketch::builder().build(); + ThetaUnion u = ThetaUnion::builder().build(); + compactThetaSketch sketch2 = u.getResult(); + EXPECT_TRUE(sketch2.getNumRetained() == 0); + EXPECT_TRUE(sketch2.isEmpty()); + EXPECT_FALSE(sketch2.isEstimationMode()); + + u.update(sketch1); + sketch2 = u.getResult(); + EXPECT_TRUE(sketch2.getNumRetained() == 0); + EXPECT_TRUE(sketch2.isEmpty()); + EXPECT_FALSE(sketch2.isEstimationMode()); +} + +TEST(ThetaUnion, nonEmptyNoRetainedKeys) { + updateThetaSketch update_sketch = + updateThetaSketch::builder().setP(0.001f).build(); + update_sketch.update(1); + ThetaUnion u = ThetaUnion::builder().build(); + u.update(update_sketch); + compactThetaSketch sketch = u.getResult(); + EXPECT_TRUE(sketch.getNumRetained() == 0); + EXPECT_FALSE(sketch.isEmpty()); + EXPECT_TRUE(sketch.isEstimationMode()); + EXPECT_TRUE(sketch.getTheta() == Approx(0.001).margin(1e-10)); +} + +TEST(ThetaUnion, exactModeHalfOverlap) { + auto sketch1 = updateThetaSketch::builder().build(); + int value = 0; + for (int i = 0; i < 1000; i++) + sketch1.update(value++); + + auto sketch2 = updateThetaSketch::builder().build(); + value = 500; + for (int i = 0; i < 1000; i++) + sketch2.update(value++); + + auto u = ThetaUnion::builder().build(); + u.update(sketch1); + u.update(sketch2); + auto sketch3 = u.getResult(); + EXPECT_FALSE(sketch3.isEmpty()); + EXPECT_FALSE(sketch3.isEstimationMode()); + EXPECT_TRUE(sketch3.getEstimate() == 1500.0); + + u.reset(); + sketch3 = u.getResult(); + EXPECT_TRUE(sketch3.getNumRetained() == 0); + EXPECT_TRUE(sketch3.isEmpty()); + EXPECT_FALSE(sketch3.isEstimationMode()); +} + +TEST(ThetaUnion, exactModeHalfOverlapWrappedCompact) { + auto sketch1 = updateThetaSketch::builder().build(); + int value = 0; + for (int i = 0; i < 1000; i++) + sketch1.update(value++); + auto bytes1 = sketch1.compact().serialize(); + + auto sketch2 = updateThetaSketch::builder().build(); + value = 500; + for (int i = 0; i < 1000; i++) + sketch2.update(value++); + auto bytes2 = sketch2.compact().serialize(); + + auto u = ThetaUnion::builder().build(); + u.update(wrappedCompactThetaSketch::wrap(bytes1.data(), bytes1.size())); + u.update(wrappedCompactThetaSketch::wrap(bytes2.data(), bytes2.size())); + compactThetaSketch sketch3 = u.getResult(); + EXPECT_FALSE(sketch3.isEmpty()); + EXPECT_FALSE(sketch3.isEstimationMode()); + EXPECT_TRUE(sketch3.getEstimate() == 1500.0); +} + +TEST(ThetaUnion, estimationModeHalfOverlap) { + auto sketch1 = updateThetaSketch::builder().build(); + int value = 0; + for (int i = 0; i < 10000; i++) + sketch1.update(value++); + + auto sketch2 = updateThetaSketch::builder().build(); + value = 5000; + for (int i = 0; i < 10000; i++) + sketch2.update(value++); + + auto u = ThetaUnion::builder().build(); + u.update(sketch1); + u.update(sketch2); + auto sketch3 = u.getResult(); + EXPECT_FALSE(sketch3.isEmpty()); + EXPECT_TRUE(sketch3.isEstimationMode()); + EXPECT_TRUE(sketch3.getEstimate() == Approx(15000).margin(15000 * 0.01)); + + u.reset(); + sketch3 = u.getResult(); + EXPECT_TRUE(sketch3.getNumRetained() == 0); + EXPECT_TRUE(sketch3.isEmpty()); + EXPECT_FALSE(sketch3.isEstimationMode()); +} + +TEST(ThetaUnion, seedMismatch) { + updateThetaSketch sketch = updateThetaSketch::builder().build(); + sketch.update(1); // non-empty should not be ignored + ThetaUnion u = ThetaUnion::builder().setSeed(123).build(); + EXPECT_THROW(u.update(sketch), VeloxRuntimeError); +} + +TEST(ThetaUnion, largerK) { + auto update_sketch1 = updateThetaSketch::builder().set_lg_k(14).build(); + for (int i = 0; i < 16384; ++i) + update_sketch1.update(i); + + auto update_sketch2 = updateThetaSketch::builder().set_lg_k(14).build(); + for (int i = 0; i < 26384; ++i) + update_sketch2.update(i); + + auto update_sketch3 = updateThetaSketch::builder().set_lg_k(14).build(); + for (int i = 0; i < 86384; ++i) + update_sketch3.update(i); + + auto union1 = ThetaUnion::builder().set_lg_k(16).build(); + union1.update(update_sketch2); + union1.update(update_sketch1); + union1.update(update_sketch3); + auto result1 = union1.getResult(); + EXPECT_TRUE(result1.getEstimate() == update_sketch3.getEstimate()); + + auto union2 = ThetaUnion::builder().set_lg_k(16).build(); + union2.update(update_sketch1); + union2.update(update_sketch3); + union2.update(update_sketch2); + auto result2 = union2.getResult(); + EXPECT_TRUE(result2.getEstimate() == update_sketch3.getEstimate()); +} + +} // namespace facebook::velox::common::theta diff --git a/velox/external/theta/tests/test_sketch_files/theta_compact_empty_from_java_v1.sk b/velox/external/theta/tests/test_sketch_files/theta_compact_empty_from_java_v1.sk new file mode 100644 index 00000000000..e8d50a7204e Binary files /dev/null and b/velox/external/theta/tests/test_sketch_files/theta_compact_empty_from_java_v1.sk differ diff --git a/velox/external/theta/tests/test_sketch_files/theta_compact_empty_from_java_v2.sk b/velox/external/theta/tests/test_sketch_files/theta_compact_empty_from_java_v2.sk new file mode 100644 index 00000000000..bf630a7b12c Binary files /dev/null and b/velox/external/theta/tests/test_sketch_files/theta_compact_empty_from_java_v2.sk differ diff --git a/velox/external/theta/tests/test_sketch_files/theta_compact_estimation_from_java_v1.sk b/velox/external/theta/tests/test_sketch_files/theta_compact_estimation_from_java_v1.sk new file mode 100644 index 00000000000..eb6b271a155 Binary files /dev/null and b/velox/external/theta/tests/test_sketch_files/theta_compact_estimation_from_java_v1.sk differ diff --git a/velox/external/theta/tests/test_sketch_files/theta_compact_estimation_from_java_v2.sk b/velox/external/theta/tests/test_sketch_files/theta_compact_estimation_from_java_v2.sk new file mode 100644 index 00000000000..e4952977a4a Binary files /dev/null and b/velox/external/theta/tests/test_sketch_files/theta_compact_estimation_from_java_v2.sk differ