42
42
/**
43
43
* A dictionary based on Trie data structure that maps enumerations of byte[] to
44
44
* int IDs.
45
- *
45
+ * <p>
46
46
* With Trie the memory footprint of the mapping is kinda minimized at the cost
47
47
* CPU, if compared to HashMap of ID Arrays. Performance test shows Trie is
48
48
* roughly 10 times slower, so there's a cache layer overlays on top of Trie and
49
49
* gracefully fall back to Trie using a weak reference.
50
- *
50
+ * <p>
51
51
* The implementation is thread-safe.
52
- *
52
+ *
53
53
* @author yangli9
54
54
*/
55
- @ SuppressWarnings ({ "rawtypes" , "unchecked" })
55
+ @ SuppressWarnings ({"rawtypes" , "unchecked" })
56
56
public class TrieDictionary <T > extends CacheDictionary <T > {
57
57
private static final long serialVersionUID = 1L ;
58
58
59
- public static final byte [] MAGIC = new byte [] { 0x54 , 0x72 , 0x69 , 0x65 , 0x44 , 0x69 , 0x63 , 0x74 }; // "TrieDict"
59
+ public static final byte [] MAGIC = new byte []{ 0x54 , 0x72 , 0x69 , 0x65 , 0x44 , 0x69 , 0x63 , 0x74 }; // "TrieDict"
60
60
public static final int MAGIC_SIZE_I = MAGIC .length ;
61
61
62
62
public static final int BIT_IS_LAST_CHILD = 0x80 ;
@@ -104,7 +104,7 @@ private void init(byte[] trieBytes) {
104
104
105
105
String converterName = headIn .readUTF ();
106
106
if (converterName .isEmpty () == false )
107
- this . bytesConvert = ClassUtil . forName (converterName , BytesConverter . class ). newInstance ( );
107
+ setConverterByName (converterName );
108
108
109
109
this .nValues = BytesUtil .readUnsigned (trieBytes , headSize + sizeChildOffset , sizeNoValuesBeneath );
110
110
this .sizeOfId = BytesUtil .sizeForValue (baseId + nValues + 1L ); // note baseId could raise 1 byte in ID space, +1 to reserve all 0xFF for NULL case
@@ -119,6 +119,10 @@ private void init(byte[] trieBytes) {
119
119
}
120
120
}
121
121
122
+ protected void setConverterByName (String converterName ) throws Exception {
123
+ this .bytesConvert = ClassUtil .forName (converterName , BytesConverter .class ).newInstance ();
124
+ }
125
+
122
126
@ Override
123
127
public int getMinId () {
124
128
return baseId ;
@@ -151,19 +155,14 @@ protected int getIdFromValueBytesWithoutCache(byte[] value, int offset, int len,
151
155
152
156
/**
153
157
* returns a code point from [0, nValues), preserving order of value
154
- *
155
- * @param n
156
- * -- the offset of current node
157
- * @param inp
158
- * -- input value bytes to lookup
159
- * @param o
160
- * -- offset in the input value bytes matched so far
161
- * @param inpEnd
162
- * -- end of input
163
- * @param roundingFlag
164
- * -- =0: return -1 if not found
165
- * -- <0: return closest smaller if not found, return -1
166
- * -- >0: return closest bigger if not found, return nValues
158
+ *
159
+ * @param n -- the offset of current node
160
+ * @param inp -- input value bytes to lookup
161
+ * @param o -- offset in the input value bytes matched so far
162
+ * @param inpEnd -- end of input
163
+ * @param roundingFlag -- =0: return -1 if not found
164
+ * -- <0: return closest smaller if not found, return -1
165
+ * -- >0: return closest bigger if not found, return nValues
167
166
*/
168
167
private int lookupSeqNoFromValue (int n , byte [] inp , int o , int inpEnd , int roundingFlag ) {
169
168
if (o == inpEnd ) // special 'empty' value
@@ -257,13 +256,10 @@ protected int getValueBytesFromIdImpl(int id, byte[] returnValue, int offset) {
257
256
/**
258
257
* returns a code point from [0, nValues), preserving order of value, or -1
259
258
* if not found
260
- *
261
- * @param n
262
- * -- the offset of current node
263
- * @param seq
264
- * -- the code point under track
265
- * @param returnValue
266
- * -- where return value is written to
259
+ *
260
+ * @param n -- the offset of current node
261
+ * @param seq -- the code point under track
262
+ * @param returnValue -- where return value is written to
267
263
*/
268
264
private int lookupValueFromSeqNo (int n , int seq , byte [] returnValue , int offset ) {
269
265
int o = offset ;
0 commit comments