From 63bbe45d863eed0d56e3d08c35e7e007463b7420 Mon Sep 17 00:00:00 2001 From: Shawn Yang Date: Fri, 3 May 2024 00:03:22 +0800 Subject: [PATCH] feat(java): type meta encoding for java (#1556) ## What does this PR do? This PR implements type meta encoding for java proposed in #1240 . The type meta encoding in xlang spec proposed in #1413 will be finished in another PR based on this PR. The spec has been updated too: type meta header ``` | 8 bytes meta header | meta size | variable bytes | variable bytes | variable bytes | +-------------------------------+-----------|--------------------+-------------------+----------------+ | 7 bytes hash + 1 bytes header | 1~2 bytes | current class meta | parent class meta | ... | ``` And the encoding for packge/class/field name has been updated to: ``` - Package name encoding(omitted when class is registered): - encoding algorithm: `UTF8/ALL_TO_LOWER_SPECIAL/LOWER_UPPER_DIGIT_SPECIAL` - Header: `6 bits size | 2 bits encoding flags`. The `6 bits size: 0~63` will be used to indicate size `0~62`, the value `63` the size need more byte to read, the encoding will encode `size - 62` as a varint next. - Class name encoding(omitted when class is registered): - encoding algorithm: `UTF8/LOWER_UPPER_DIGIT_SPECIAL/FIRST_TO_LOWER_SPECIAL/ALL_TO_LOWER_SPECIAL` - header: `6 bits size | 2 bits encoding flags`. The `6 bits size: 0~63` will be used to indicate size `1~64`, the value `63` the size need more byte to read, the encoding will encode `size - 63` as a varint next. - Field info: - header(8 bits): `3 bits size + 2 bits field name encoding + polymorphism flag + nullability flag + ref tracking flag`. Users can use annotation to provide those info. - 2 bits field name encoding: - encoding: `UTF8/ALL_TO_LOWER_SPECIAL/LOWER_UPPER_DIGIT_SPECIAL/TAG_ID` - If tag id is used, i.e. field name is written by an unsigned varint tag id. 2 bits encoding will be `11`. - size of field name: - The `3 bits size: 0~7` will be used to indicate length `1~7`, the value `6` the size read more bytes, the encoding will encode `size - 7` as a varint next. - If encoding is `TAG_ID`, then num_bytes of field name will be used to store tag id. - Field name: If type id is set, type id will be used instead. Otherwise meta string encoding length and data will be written instead. ``` ## Meta size Before this PR: ```java class org.apache.fury.benchmark.data.MediaContent 78 class org.apache.fury.benchmark.data.Media 208 class org.apache.fury.benchmark.data.Image 114 ``` With this PR: ```java class org.apache.fury.benchmark.data.MediaContent 53 class org.apache.fury.benchmark.data.Media 114 class org.apache.fury.benchmark.data.Image 68 ``` The size of class meta reduced by half, which is a great gain. The size can be reduded more if we introduce field name hash, but it's not related to this PR. We can discuss it in another PR. ## Related issues #1240 #203 #202 ## Does this PR introduce any user-facing change? - [ ] Does this PR introduce any public API change? - [ ] Does this PR introduce any binary protocol compatibility change? ## Benchmark --- docs/specification/java_serialization_spec.md | 51 +-- .../fury/builder/MetaSharedCodecBuilder.java | 6 +- .../fury/builder/ObjectCodecBuilder.java | 17 +- .../java/org/apache/fury/meta/ClassDef.java | 286 ++++++++--------- .../org/apache/fury/meta/ClassDefDecoder.java | 162 ++++++++++ .../org/apache/fury/meta/ClassDefEncoder.java | 303 ++++++++++++++++++ .../java/org/apache/fury/meta/Encoders.java | 74 +++++ .../apache/fury/meta/MetaStringEncoder.java | 49 ++- .../apache/fury/reflect/ReflectionUtils.java | 43 ++- .../org/apache/fury/resolver/ClassInfo.java | 4 +- .../apache/fury/resolver/ClassResolver.java | 73 ++++- .../apache/fury/resolver/MetaStringBytes.java | 12 +- .../fury/serializer/MetaSharedSerializer.java | 32 +- .../fury/serializer/ObjectSerializer.java | 20 +- .../fury/serializer/StringSerializer.java | 2 +- .../serializer/UnexistedClassSerializers.java | 6 +- .../java/org/apache/fury/type/Descriptor.java | 39 ++- .../apache/fury/type/DescriptorGrouper.java | 16 +- .../org/apache/fury/type/GenericType.java | 4 - .../fury-core/native-image.properties | 1 + .../apache/fury/meta/ClassDefEncoderTest.java | 65 ++++ .../org/apache/fury/meta/ClassDefTest.java | 36 +-- .../fury/type/DescriptorGrouperTest.java | 7 +- 23 files changed, 1009 insertions(+), 299 deletions(-) create mode 100644 java/fury-core/src/main/java/org/apache/fury/meta/ClassDefDecoder.java create mode 100644 java/fury-core/src/main/java/org/apache/fury/meta/ClassDefEncoder.java create mode 100644 java/fury-core/src/main/java/org/apache/fury/meta/Encoders.java create mode 100644 java/fury-core/src/test/java/org/apache/fury/meta/ClassDefEncoderTest.java diff --git a/docs/specification/java_serialization_spec.md b/docs/specification/java_serialization_spec.md index 38dd53268a..813f5a673c 100644 --- a/docs/specification/java_serialization_spec.md +++ b/docs/specification/java_serialization_spec.md @@ -112,9 +112,9 @@ For Schema consistent mode, class will be encoded as an enumerated string by ful the meta layout for schema evolution mode: ``` -| 8 bytes meta header | variable bytes | variable bytes | variable bytes | -+-------------------------------+--------------------+-------------------+----------------+ -| 7 bytes hash + 1 bytes header | current class meta | parent class meta | ... | +| 8 bytes meta header | meta size | variable bytes | variable bytes | variable bytes | ++-------------------------------+-----------|--------------------+-------------------+----------------+ +| 7 bytes hash + 1 bytes header | 1~2 bytes | current class meta | parent class meta | ... | ``` Class meta are encoded from parent class to leaf class, only class with serializable fields will be encoded. @@ -128,8 +128,14 @@ Meta header is a 64 bits number value encoded in little endian order. class doesn't have fields to serialize, or we're in a context which serialize fields of current class only( `ObjectStreamSerializer#SlotInfo` is an example), num classes will be 1. - 5rd bit is used to indicate whether this class needs schema evolution. +- 6rd bit is used to indicate whether the size sum of all layers meta is less than 256. - Other 56 bits is used to store the unique hash of `flags + all layers class meta`. +### Meta size + +- If the size sum of all layers meta is less than 256, then one byte is written next to indicate the length of meta. +- Otherwise, write size as two bytes in little endian. + ### Single layer class meta ``` @@ -150,34 +156,33 @@ Meta header is a 64 bits number value encoded in little endian order. fields info of those fields which aren't annotated by tag id for deserializing schema consistent fields, then use fields info in meta for deserializing compatible fields. - Package name encoding(omitted when class is registered): - - Header: - - If meta string encoding is `LOWER_SPECIAL` and the length of encoded string `<=` 128, then header will be - `7 bits size + flag(set)`. - Otherwise, header will be `4 bits unset + 3 bits encoding flags + flag(unset)` - - Package name: - - If bit flag is set, then package name will be encoded meta string binary. - - Otherwise, it will be `| unsigned varint length | encoded meta string binary |` -- Class name encoding(omitted when class is registered):: - - header: - - If meta string encoding is in `LOWER_SPECIAL~LOWER_UPPER_DIGIT_SPECIAL (0~3)`, and the length of encoded - string `<=` 32, then the header will be `5 bits size + 2 bits encoding flags + flag(set)`. - - Otherwise, header will be `| unsigned varint length | encoded meta string binary |` + - encoding algorithm: `UTF8/ALL_TO_LOWER_SPECIAL/LOWER_UPPER_DIGIT_SPECIAL` + - Header: `6 bits size | 2 bits encoding flags`. The `6 bits size: 0~63` will be used to indicate size `0~62`, + the value `63` the size need more byte to read, the encoding will encode `size - 62` as a varint next. +- Class name encoding(omitted when class is registered): + - encoding algorithm: `UTF8/LOWER_UPPER_DIGIT_SPECIAL/FIRST_TO_LOWER_SPECIAL/ALL_TO_LOWER_SPECIAL` + - header: `6 bits size | 2 bits encoding flags`. The `6 bits size: 0~63` will be used to indicate size `1~64`, + the value `63` the size need more byte to read, the encoding will encode `size - 63` as a varint next. - Field info: - header(8 - bits): `reserved 1 bit + 3 bits field name encoding + polymorphism flag + nullability flag + ref tracking flag + tag id flag`. + bits): `3 bits size + 2 bits field name encoding + polymorphism flag + nullability flag + ref tracking flag`. Users can use annotation to provide those info. - - tag id: when set to 1, field name will be written by an unsigned varint tag id. - - ref tracking: when set to 0, ref tracking will be disabled for this field. - - nullability: when set to 0, this field won't be null. + - 2 bits field name encoding: + - encoding: `UTF8/ALL_TO_LOWER_SPECIAL/LOWER_UPPER_DIGIT_SPECIAL/TAG_ID` + - If tag id is used, i.e. field name is written by an unsigned varint tag id. 2 bits encoding will be `11`. + - size of field name: + - The `3 bits size: 0~7` will be used to indicate length `1~7`, the value `6` the size read more bytes, + the encoding will encode `size - 7` as a varint next. + - If encoding is `TAG_ID`, then num_bytes of field name will be used to store tag id. + - ref tracking: when set to 1, ref tracking will be enabled for this field. + - nullability: when set to 1, this field can be null. - polymorphism: when set to 1, the actual type of field will be the declared field type even the type if not `final`. - - 3 bits field name encoding will be set to meta string encoding flags when tag id is not set. - type id: - For registered type-consistent classes, it will be the registered class id. - Otherwise it will be encoded as `OBJECT_ID` if it isn't `final` and `FINAL_OBJECT_ID` if it's `final`. The - meta - for such types is written separately instead of inlining here is to reduce meta space cost if object of this - type is serialized in current object graph multiple times, and the field value may be null too. + meta for such types is written separately instead of inlining here is to reduce meta space cost if object of + this type is serialized in current object graph multiple times, and the field value may be null too. - Field name: If type id is set, type id will be used instead. Otherwise meta string encoding length and data will be written instead. diff --git a/java/fury-core/src/main/java/org/apache/fury/builder/MetaSharedCodecBuilder.java b/java/fury-core/src/main/java/org/apache/fury/builder/MetaSharedCodecBuilder.java index 074c7c2ca5..580f22842d 100644 --- a/java/fury-core/src/main/java/org/apache/fury/builder/MetaSharedCodecBuilder.java +++ b/java/fury-core/src/main/java/org/apache/fury/builder/MetaSharedCodecBuilder.java @@ -76,7 +76,11 @@ public MetaSharedCodecBuilder(TypeRef beanType, Fury fury, ClassDef classDef) f -> MetaSharedSerializer.consolidateFields(f.getClassResolver(), beanClass, classDef)); DescriptorGrouper grouper = DescriptorGrouper.createDescriptorGrouper( - descriptors, true, fury.compressInt(), fury.compressLong()); + fury.getClassResolver()::isMonomorphic, + descriptors, + false, + fury.compressInt(), + fury.compressLong()); objectCodecOptimizer = new ObjectCodecOptimizer(beanClass, grouper, !fury.isBasicTypesRefIgnored(), ctx); } diff --git a/java/fury-core/src/main/java/org/apache/fury/builder/ObjectCodecBuilder.java b/java/fury-core/src/main/java/org/apache/fury/builder/ObjectCodecBuilder.java index 8ce55855f9..dede004386 100644 --- a/java/fury-core/src/main/java/org/apache/fury/builder/ObjectCodecBuilder.java +++ b/java/fury-core/src/main/java/org/apache/fury/builder/ObjectCodecBuilder.java @@ -56,6 +56,7 @@ import org.apache.fury.codegen.Expression.StaticInvoke; import org.apache.fury.codegen.ExpressionVisitor; import org.apache.fury.memory.Platform; +import org.apache.fury.meta.ClassDef; import org.apache.fury.reflect.TypeRef; import org.apache.fury.serializer.ObjectSerializer; import org.apache.fury.serializer.PrimitiveSerializers.LongSerializer; @@ -87,13 +88,23 @@ public class ObjectCodecBuilder extends BaseObjectCodecBuilder { public ObjectCodecBuilder(Class beanClass, Fury fury) { super(TypeRef.of(beanClass), fury, Generated.GeneratedObjectSerializer.class); - Collection descriptors = - classResolver.getAllDescriptorsMap(beanClass, true).values(); + Collection descriptors; + boolean shareMeta = fury.getConfig().shareMetaContext(); + if (shareMeta) { + ClassDef classDef = classResolver.getClassDef(beanClass, true); + descriptors = classDef.getDescriptors(classResolver, beanClass); + } else { + descriptors = fury.getClassResolver().getAllDescriptorsMap(beanClass, true).values(); + } classVersionHash = new Literal(ObjectSerializer.computeVersionHash(descriptors), PRIMITIVE_INT_TYPE); DescriptorGrouper grouper = DescriptorGrouper.createDescriptorGrouper( - descriptors, false, fury.compressInt(), fury.compressLong()); + fury.getClassResolver()::isMonomorphic, + descriptors, + false, + fury.compressInt(), + fury.compressLong()); objectCodecOptimizer = new ObjectCodecOptimizer(beanClass, grouper, !fury.isBasicTypesRefIgnored(), ctx); if (isRecord) { diff --git a/java/fury-core/src/main/java/org/apache/fury/meta/ClassDef.java b/java/fury-core/src/main/java/org/apache/fury/meta/ClassDef.java index 319a0972e5..53e73844a5 100644 --- a/java/fury-core/src/main/java/org/apache/fury/meta/ClassDef.java +++ b/java/fury-core/src/main/java/org/apache/fury/meta/ClassDef.java @@ -19,6 +19,7 @@ package org.apache.fury.meta; +import static org.apache.fury.meta.ClassDefEncoder.buildFields; import static org.apache.fury.type.TypeUtils.COLLECTION_TYPE; import static org.apache.fury.type.TypeUtils.MAP_TYPE; import static org.apache.fury.type.TypeUtils.collectionOf; @@ -27,7 +28,6 @@ import java.io.ObjectStreamClass; import java.io.Serializable; import java.lang.reflect.Field; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collection; import java.util.Comparator; @@ -35,26 +35,22 @@ import java.util.List; import java.util.Map; import java.util.Objects; -import java.util.function.Function; +import java.util.SortedMap; import org.apache.fury.Fury; import org.apache.fury.builder.MetaSharedCodecBuilder; -import org.apache.fury.collection.IdentityObjectIntMap; import org.apache.fury.config.CompatibleMode; import org.apache.fury.config.FuryBuilder; import org.apache.fury.logging.Logger; import org.apache.fury.logging.LoggerFactory; import org.apache.fury.memory.MemoryBuffer; -import org.apache.fury.memory.MemoryUtils; import org.apache.fury.memory.Platform; import org.apache.fury.reflect.ReflectionUtils; import org.apache.fury.reflect.TypeRef; import org.apache.fury.resolver.ClassResolver; import org.apache.fury.serializer.CompatibleSerializer; import org.apache.fury.type.Descriptor; -import org.apache.fury.type.DescriptorGrouper; import org.apache.fury.type.FinalObjectTypeStub; import org.apache.fury.type.GenericType; -import org.apache.fury.util.MurmurHash3; import org.apache.fury.util.Preconditions; /** @@ -74,10 +70,12 @@ * @see FuryBuilder#withMetaContextShare * @see ReflectionUtils#getFieldOffset */ -@SuppressWarnings("UnstableApiUsage") public class ClassDef implements Serializable { private static final Logger LOG = LoggerFactory.getLogger(ClassDef.class); + static final int SCHEMA_COMPATIBLE_FLAG = 0b10000; + public static final int SIZE_TWO_BYTES_FLAG = 0b100000; + static final int EXT_FLAG = 0b1000000; // TODO use field offset to sort field, which will hit l1-cache more. Since // `objectFieldOffset` is not part of jvm-specification, it may change between different jdk // vendor. But the deserialization peer use the class definition to create deserializer, it's OK @@ -107,15 +105,21 @@ public class ClassDef implements Serializable { private final Map extMeta; // Unique id for class def. If class def are same between processes, then the id will // be same too. - private long id; - - // cache for serialization. - private transient byte[] serialized; - - private ClassDef(String className, List fieldsInfo, Map extMeta) { + private final long id; + private final byte[] encoded; + private transient List descriptors; + + ClassDef( + String className, + List fieldsInfo, + Map extMeta, + long id, + byte[] encoded) { this.className = className; this.fieldsInfo = fieldsInfo; this.extMeta = extMeta; + this.id = id; + this.encoded = encoded; } /** @@ -145,6 +149,10 @@ public long getId() { return id; } + public byte[] getEncoded() { + return encoded; + } + @Override public boolean equals(Object o) { if (this == o) { @@ -164,137 +172,63 @@ public int hashCode() { return Objects.hash(className, fieldsInfo, extMeta); } + @Override + public String toString() { + return "ClassDef{" + + "className='" + + className + + '\'' + + ", fieldsInfo=" + + fieldsInfo + + ", extMeta=" + + extMeta + + ", id=" + + id + + '}'; + } + /** Write class definition to buffer. */ public void writeClassDef(MemoryBuffer buffer) { - byte[] serialized = this.serialized; - if (serialized == null) { - MemoryBuffer buf = MemoryUtils.buffer(32); - IdentityObjectIntMap map = new IdentityObjectIntMap<>(8, 0.5f); - writeSharedString(buf, map, className); - buf.writeVarUint32Small7(fieldsInfo.size()); - for (FieldInfo fieldInfo : fieldsInfo) { - writeSharedString(buf, map, fieldInfo.definedClass); - byte[] bytes = fieldInfo.fieldName.getBytes(StandardCharsets.UTF_8); - buf.writePrimitiveArrayWithSize(bytes, Platform.BYTE_ARRAY_OFFSET, bytes.length); - fieldInfo.fieldType.write(buf); - } - buf.writeVarUint32Small7(extMeta.size()); - extMeta.forEach( - (k, v) -> { - byte[] keyBytes = k.getBytes(StandardCharsets.UTF_8); - byte[] valueBytes = v.getBytes(StandardCharsets.UTF_8); - buf.writePrimitiveArrayWithSize(keyBytes, Platform.BYTE_ARRAY_OFFSET, keyBytes.length); - buf.writePrimitiveArrayWithSize( - valueBytes, Platform.BYTE_ARRAY_OFFSET, valueBytes.length); - }); - serialized = this.serialized = buf.getBytes(0, buf.writerIndex()); - id = MurmurHash3.murmurhash3_x64_128(serialized, 0, serialized.length, 47)[0]; - // this id will be part of generated codec, a negative number won't be allowed in class name. - id = Math.abs(id); - } - buffer.writeBytes(serialized); - buffer.writeInt64(id); + buffer.writeBytes(encoded); } - private static void writeSharedString( - MemoryBuffer buffer, IdentityObjectIntMap map, String str) { - int newId = map.size; - int id = map.putOrGet(str, newId); - if (id >= 0) { - // TODO use flagged varint. - buffer.writeBoolean(true); - buffer.writeVarUint32Small7(id); - } else { - buffer.writeBoolean(false); - byte[] bytes = str.getBytes(StandardCharsets.UTF_8); - buffer.writePrimitiveArrayWithSize(bytes, Platform.BYTE_ARRAY_OFFSET, bytes.length); - } + /** Read class definition from buffer. */ + public static ClassDef readClassDef(ClassResolver classResolver, MemoryBuffer buffer) { + return ClassDefDecoder.decodeClassDef(classResolver, buffer, buffer.readInt64()); } /** Read class definition from buffer. */ - public static ClassDef readClassDef(MemoryBuffer buffer) { - List strings = new ArrayList<>(); - String className = readSharedString(buffer, strings); - List fieldInfos = new ArrayList<>(); - int numFields = buffer.readVarUint32Small7(); - for (int i = 0; i < numFields; i++) { - String definedClass = readSharedString(buffer, strings); - String fieldName = new String(buffer.readBytesAndSize(), StandardCharsets.UTF_8); - fieldInfos.add(new FieldInfo(definedClass, fieldName, FieldType.read(buffer))); - } - int extMetaSize = buffer.readVarUint32Small7(); - Map extMeta = new HashMap<>(); - for (int i = 0; i < extMetaSize; i++) { - extMeta.put( - new String(buffer.readBytesAndSize(), StandardCharsets.UTF_8), - new String(buffer.readBytesAndSize(), StandardCharsets.UTF_8)); - } - long id = buffer.readInt64(); - ClassDef classDef = new ClassDef(className, fieldInfos, extMeta); - classDef.id = id; - return classDef; + public static ClassDef readClassDef( + ClassResolver classResolver, MemoryBuffer buffer, long header) { + return ClassDefDecoder.decodeClassDef(classResolver, buffer, header); } - private static String readSharedString(MemoryBuffer buffer, List strings) { - String str; - if (buffer.readBoolean()) { - return strings.get(buffer.readVarUint32Small7()); - } else { - str = new String(buffer.readBytesAndSize(), StandardCharsets.UTF_8); - strings.add(str); - return str; + public List getDescriptors(ClassResolver resolver, Class cls) { + if (descriptors == null) { + SortedMap allDescriptorsMap = resolver.getAllDescriptorsMap(cls, true); + Map descriptorsMap = new HashMap<>(); + for (Map.Entry e : allDescriptorsMap.entrySet()) { + if (descriptorsMap.put( + e.getKey().getDeclaringClass().getName() + "." + e.getKey().getName(), e.getValue()) + != null) { + throw new IllegalStateException("Duplicate key"); + } + } + descriptors = new ArrayList<>(fieldsInfo.size()); + for (ClassDef.FieldInfo fieldInfo : fieldsInfo) { + Descriptor descriptor = + descriptorsMap.get(fieldInfo.getDefinedClass() + "." + fieldInfo.getFieldName()); + Descriptor newDesc = fieldInfo.toDescriptor(resolver); + if (descriptor != null) { + // Make DescriptorGrouper have consistent order whether field exist or not + descriptor = descriptor.copyWithTypeName(newDesc.getTypeName()); + descriptors.add(descriptor); + } else { + descriptors.add(newDesc); + } + } } - } - - public static ClassDef buildClassDef(Class cls, Fury fury) { - Comparator comparator = - DescriptorGrouper.getPrimitiveComparator(fury.compressInt(), fury.compressLong()); - DescriptorGrouper descriptorGrouper = - new DescriptorGrouper( - fury.getClassResolver().getAllDescriptorsMap(cls, true).values(), - false, - Function.identity(), - comparator, - DescriptorGrouper.COMPARATOR_BY_TYPE_AND_NAME); - ClassResolver classResolver = fury.getClassResolver(); - List fields = new ArrayList<>(); - descriptorGrouper - .getPrimitiveDescriptors() - .forEach(descriptor -> fields.add(descriptor.getField())); - descriptorGrouper - .getBoxedDescriptors() - .forEach(descriptor -> fields.add(descriptor.getField())); - descriptorGrouper - .getFinalDescriptors() - .forEach(descriptor -> fields.add(descriptor.getField())); - descriptorGrouper - .getOtherDescriptors() - .forEach(descriptor -> fields.add(descriptor.getField())); - descriptorGrouper - .getCollectionDescriptors() - .forEach(descriptor -> fields.add(descriptor.getField())); - descriptorGrouper.getMapDescriptors().forEach(descriptor -> fields.add(descriptor.getField())); - return buildClassDef(classResolver, cls, fields); - } - - /** Build class definition from fields of class. */ - public static ClassDef buildClassDef( - ClassResolver classResolver, Class type, List fields) { - return buildClassDef(classResolver, type, fields, new HashMap<>()); - } - - public static ClassDef buildClassDef( - ClassResolver classResolver, Class type, List fields, Map extMeta) { - List fieldInfos = new ArrayList<>(); - for (Field field : fields) { - FieldInfo fieldInfo = - new FieldInfo( - field.getDeclaringClass().getName(), - field.getName(), - buildFieldType(classResolver, field)); - fieldInfos.add(fieldInfo); - } - return new ClassDef(type.getName(), fieldInfos, extMeta); + return descriptors; } /** @@ -310,7 +244,7 @@ public static class FieldInfo implements Serializable { private final FieldType fieldType; - private FieldInfo(String definedClass, String fieldName, FieldType fieldType) { + FieldInfo(String definedClass, String fieldName, FieldType fieldType) { this.definedClass = definedClass; this.fieldName = fieldName; this.fieldType = fieldType; @@ -326,6 +260,14 @@ public String getFieldName() { return fieldName; } + public boolean hasTypeTag() { + return false; + } + + public short getTypeTag() { + return -1; + } + /** Returns type of current field. */ public FieldType getFieldType() { return fieldType; @@ -336,7 +278,7 @@ public FieldType getFieldType() { * null. Don't invoke this method if class does have fieldName field. In such case, * reflection should be used to get the descriptor. */ - public Descriptor toDescriptor(ClassResolver classResolver) { + Descriptor toDescriptor(ClassResolver classResolver) { TypeRef typeRef = fieldType.toTypeToken(classResolver); // This field doesn't exist in peer class, so any legal modifier will be OK. int stubModifiers = ReflectionUtils.getField(getClass(), "fieldName").getModifiers(); @@ -364,7 +306,16 @@ public int hashCode() { @Override public String toString() { - return "FieldInfo{" + "fieldName='" + fieldName + '\'' + ", fieldType=" + fieldType + '}'; + return "FieldInfo{" + + "definedClass='" + + definedClass + + '\'' + + ", fieldName='" + + fieldName + + '\'' + + ", fieldType=" + + fieldType + + '}'; } } @@ -406,38 +357,39 @@ public int hashCode() { } public void write(MemoryBuffer buffer) { - buffer.writeBoolean(isMonomorphic); + byte header = (byte) (isMonomorphic ? 1 : 0); if (this instanceof RegisteredFieldType) { - buffer.writeByte(0); - buffer.writeInt16(((RegisteredFieldType) this).getClassId()); + short classId = ((RegisteredFieldType) this).getClassId(); + buffer.writeVarUint32Small7(((3 + classId) << 1) | header); } else if (this instanceof CollectionFieldType) { - buffer.writeByte(1); + buffer.writeVarUint32Small7((2 << 1) | header); ((CollectionFieldType) this).elementType.write(buffer); } else if (this instanceof MapFieldType) { - buffer.writeByte(2); + buffer.writeVarUint32Small7((1 << 1) | header); MapFieldType mapFieldType = (MapFieldType) this; mapFieldType.keyType.write(buffer); mapFieldType.valueType.write(buffer); } else { Preconditions.checkArgument(this instanceof ObjectFieldType); - buffer.writeByte(3); + buffer.writeVarUint32Small7(header); } } public static FieldType read(MemoryBuffer buffer) { - boolean isFinal = buffer.readBoolean(); - byte typecode = buffer.readByte(); - switch (typecode) { - case 0: - return new RegisteredFieldType(isFinal, buffer.readInt16()); - case 1: - return new CollectionFieldType(isFinal, read(buffer)); - case 2: - return new MapFieldType(isFinal, read(buffer), read(buffer)); - case 3: - return new ObjectFieldType(isFinal); - default: - throw new IllegalStateException(String.format("Unsupported type code %s", typecode)); + int header = buffer.readVarUint32Small7(); + boolean isMonomorphic = (header & 0b1) != 0; + return read(buffer, isMonomorphic, header >>> 1); + } + + public static FieldType read(MemoryBuffer buffer, boolean isFinal, int typeId) { + if (typeId == 0) { + return new ObjectFieldType(isFinal); + } else if (typeId == 1) { + return new MapFieldType(isFinal, read(buffer), read(buffer)); + } else if (typeId == 2) { + return new CollectionFieldType(isFinal, read(buffer)); + } else { + return new RegisteredFieldType(isFinal, (short) (typeId - 3)); } } } @@ -638,7 +590,7 @@ public int hashCode() { static FieldType buildFieldType(ClassResolver classResolver, Field field) { Preconditions.checkNotNull(field); Class rawType = field.getType(); - boolean isFinal = GenericType.isFinalByDefault(rawType); + boolean isFinal = classResolver.isMonomorphic(rawType); if (Collection.class.isAssignableFrom(rawType)) { GenericType genericType = GenericType.build(field.getGenericType()); return new CollectionFieldType( @@ -706,4 +658,24 @@ private static FieldType buildFieldType(ClassResolver classResolver, GenericType } } } + + public static ClassDef buildClassDef(Fury fury, Class cls) { + return buildClassDef(fury, cls, true); + } + + public static ClassDef buildClassDef(Fury fury, Class cls, boolean resolveParent) { + return ClassDefEncoder.buildClassDef( + fury.getClassResolver(), cls, buildFields(fury, cls, resolveParent), new HashMap<>()); + } + + /** Build class definition from fields of class. */ + public static ClassDef buildClassDef( + ClassResolver classResolver, Class type, List fields) { + return buildClassDef(classResolver, type, fields, new HashMap<>()); + } + + public static ClassDef buildClassDef( + ClassResolver classResolver, Class type, List fields, Map extMeta) { + return ClassDefEncoder.buildClassDef(classResolver, type, fields, extMeta); + } } diff --git a/java/fury-core/src/main/java/org/apache/fury/meta/ClassDefDecoder.java b/java/fury-core/src/main/java/org/apache/fury/meta/ClassDefDecoder.java new file mode 100644 index 0000000000..48d2491294 --- /dev/null +++ b/java/fury-core/src/main/java/org/apache/fury/meta/ClassDefDecoder.java @@ -0,0 +1,162 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.fury.meta; + +import static org.apache.fury.meta.ClassDef.SIZE_TWO_BYTES_FLAG; +import static org.apache.fury.meta.Encoders.fieldNameEncodings; +import static org.apache.fury.meta.Encoders.pkgEncodings; +import static org.apache.fury.meta.Encoders.typeNameEncodings; + +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.apache.fury.memory.MemoryBuffer; +import org.apache.fury.meta.ClassDef.FieldType; +import org.apache.fury.meta.MetaString.Encoding; +import org.apache.fury.reflect.ReflectionUtils; +import org.apache.fury.resolver.ClassResolver; +import org.apache.fury.util.Preconditions; + +/** + * An decoder which decode binary into {@link ClassDef}. See spec documentation: + * docs/specification/java_serialization_spec.md ... + */ +class ClassDefDecoder { + public static ClassDef decodeClassDef(ClassResolver classResolver, MemoryBuffer buffer, long id) { + boolean sizeTwoBytes = (id & SIZE_TWO_BYTES_FLAG) != 0; + MemoryBuffer encoded = MemoryBuffer.newHeapBuffer(32); + encoded.writeInt64(id); + int size; + if (sizeTwoBytes) { + size = buffer.readInt16() & 0xffff; + encoded.writeInt16((short) size); + } else { + size = buffer.readByte() & 0xff; + encoded.writeByte(size); + } + buffer.checkReadableBytes(size); + encoded.writeBytes(buffer.getBytes(buffer.readerIndex(), size)); + long header = id & 0xff; + int numClasses = (int) (header & 0b1111); + if (numClasses == 0b1111) { + numClasses += buffer.readVarUint32Small7(); + } + numClasses += 1; + String className = null; + List classFields = new ArrayList<>(); + for (int i = 0; i < numClasses; i++) { + // | num fields + register flag | header + package name | header + class name + // | header + type id + field name | next field info | ... | + int currentClassHeader = buffer.readVarUint32Small7(); + boolean isRegistered = (currentClassHeader & 0b1) != 0; + int numFields = currentClassHeader >>> 1; + String fullClassName; + if (isRegistered) { + int registeredId = buffer.readVarUint32Small7(); + fullClassName = classResolver.getClassInfo((short) registeredId).getCls().getName(); + } else { + String pkg = readPkgName(buffer); + String typeName = readTypeName(buffer); + fullClassName = ReflectionUtils.getFullClassName(pkg, typeName); + } + className = fullClassName; + List fieldInfos = readFieldsInfo(buffer, fullClassName, numFields); + classFields.addAll(fieldInfos); + } + boolean hasExtMeta = (header & 0b1000000) != 0; + Map extMeta = new HashMap<>(); + if (hasExtMeta) { + int extMetaSize = buffer.readVarUint32Small7(); + for (int i = 0; i < extMetaSize; i++) { + extMeta.put( + new String(buffer.readBytesAndSize(), StandardCharsets.UTF_8), + new String(buffer.readBytesAndSize(), StandardCharsets.UTF_8)); + } + } + return new ClassDef( + className, classFields, extMeta, id, encoded.getBytes(0, encoded.writerIndex())); + } + + private static List readFieldsInfo( + MemoryBuffer buffer, String className, int numFields) { + List fieldInfos = new ArrayList<>(numFields); + for (int i = 0; i < numFields; i++) { + int header = buffer.readByte() & 0xff; + // `3 bits size + 2 bits field name encoding + polymorphism flag + nullability flag + ref + // tracking flag` + // TODO(chaokunyang) read type tag + int encodingFlags = (header >>> 3) & 0b11; + boolean useTagID = encodingFlags == 3; + Preconditions.checkArgument( + !useTagID, "Type tag not supported currently, parsed fieldInfos %s", fieldInfos); + int size = header >>> 5; + if (size == 7) { + size += buffer.readVarUint32Small7(); + } + size += 1; + Encoding encoding = fieldNameEncodings[encodingFlags]; + String fieldName = Encoders.FIELD_NAME_DECODER.decode(buffer.readBytes(size), encoding); + boolean isMonomorphic = (header & 0b100) != 0; + int typeId = buffer.readVarUint32Small14(); + FieldType fieldType = FieldType.read(buffer, isMonomorphic, typeId); + fieldInfos.add(new ClassDef.FieldInfo(className, fieldName, fieldType)); + } + return fieldInfos; + } + + private static String readPkgName(MemoryBuffer buffer) { + // - Package name encoding(omitted when class is registered): + // - encoding algorithm: `UTF8/ALL_TO_LOWER_SPECIAL/LOWER_UPPER_DIGIT_SPECIAL` + // - Header: `6 bits size | 2 bits encoding flags`. + // The `6 bits size: 0~63` will be used to indicate size `0~62`, + // the value `63` the size need more byte to read, the encoding will encode `size - 62` as + // a varint next. + int header = buffer.readByte() & 0xff; + int encodingFlags = header & 0b11; + Encoding encoding = pkgEncodings[encodingFlags]; + return readName(Encoders.PACKAGE_DECODER, buffer, header, encoding, 62); + } + + private static String readTypeName(MemoryBuffer buffer) { + // - Class name encoding(omitted when class is registered): + // - encoding algorithm: + // `UTF8/LOWER_UPPER_DIGIT_SPECIAL/FIRST_TO_LOWER_SPECIAL/ALL_TO_LOWER_SPECIAL` + // - header: `6 bits size | 2 bits encoding flags`. + // The `6 bits size: 0~63` will be used to indicate size `1~64`, + // the value `63` the size need more byte to read, the encoding will encode `size - 63` as + // a varint next. + int header = buffer.readByte() & 0xff; + int encodingFlags = header & 0b11; + Encoding encoding = typeNameEncodings[encodingFlags]; + return readName(Encoders.TYPE_NAME_DECODER, buffer, header, encoding, 63); + } + + private static String readName( + MetaStringDecoder decoder, MemoryBuffer buffer, int header, Encoding encoding, int max) { + int size = header >> 2; + if (size == max) { + size = buffer.readVarUint32Small7() + max; + } + return decoder.decode(buffer.readBytes(size), encoding); + } +} diff --git a/java/fury-core/src/main/java/org/apache/fury/meta/ClassDefEncoder.java b/java/fury-core/src/main/java/org/apache/fury/meta/ClassDefEncoder.java new file mode 100644 index 0000000000..f154aa5a4f --- /dev/null +++ b/java/fury-core/src/main/java/org/apache/fury/meta/ClassDefEncoder.java @@ -0,0 +1,303 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.fury.meta; + +import static org.apache.fury.meta.ClassDef.EXT_FLAG; +import static org.apache.fury.meta.ClassDef.SCHEMA_COMPATIBLE_FLAG; +import static org.apache.fury.meta.ClassDef.SIZE_TWO_BYTES_FLAG; +import static org.apache.fury.meta.Encoders.fieldNameEncodingsList; +import static org.apache.fury.meta.Encoders.pkgEncodingsList; +import static org.apache.fury.meta.Encoders.typeNameEncodingsList; + +import java.lang.reflect.Field; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.function.Function; +import org.apache.fury.Fury; +import org.apache.fury.memory.MemoryBuffer; +import org.apache.fury.memory.MemoryUtils; +import org.apache.fury.memory.Platform; +import org.apache.fury.meta.ClassDef.FieldInfo; +import org.apache.fury.reflect.ReflectionUtils; +import org.apache.fury.resolver.ClassResolver; +import org.apache.fury.type.Descriptor; +import org.apache.fury.type.DescriptorGrouper; +import org.apache.fury.util.MurmurHash3; +import org.apache.fury.util.Preconditions; + +/** + * An encoder which encode {@link ClassDef} into binary. See spec documentation: + * docs/specification/java_serialization_spec.md ... + */ +class ClassDefEncoder { + static List buildFields(Fury fury, Class cls, boolean resolveParent) { + Comparator comparator = + DescriptorGrouper.getPrimitiveComparator(fury.compressInt(), fury.compressLong()); + DescriptorGrouper descriptorGrouper = + new DescriptorGrouper( + fury.getClassResolver()::isMonomorphic, + fury.getClassResolver().getAllDescriptorsMap(cls, resolveParent).values(), + false, + Function.identity(), + comparator, + DescriptorGrouper.COMPARATOR_BY_TYPE_AND_NAME); + List fields = new ArrayList<>(); + descriptorGrouper + .getPrimitiveDescriptors() + .forEach(descriptor -> fields.add(descriptor.getField())); + descriptorGrouper + .getBoxedDescriptors() + .forEach(descriptor -> fields.add(descriptor.getField())); + descriptorGrouper + .getFinalDescriptors() + .forEach(descriptor -> fields.add(descriptor.getField())); + descriptorGrouper + .getOtherDescriptors() + .forEach(descriptor -> fields.add(descriptor.getField())); + descriptorGrouper + .getCollectionDescriptors() + .forEach(descriptor -> fields.add(descriptor.getField())); + descriptorGrouper.getMapDescriptors().forEach(descriptor -> fields.add(descriptor.getField())); + return fields; + } + + static List buildFieldsInfo(ClassResolver resolver, Class cls) { + return buildFieldsInfo(resolver, buildFields(resolver.getFury(), cls, true)); + } + + static List buildFieldsInfo(ClassResolver resolver, List fields) { + List fieldInfos = new ArrayList<>(); + for (Field field : fields) { + FieldInfo fieldInfo = + new FieldInfo( + field.getDeclaringClass().getName(), + field.getName(), + ClassDef.buildFieldType(resolver, field)); + fieldInfos.add(fieldInfo); + } + return fieldInfos; + } + + /** Build class definition from fields of class. */ + static ClassDef buildClassDef( + ClassResolver classResolver, Class type, List fields, Map extMeta) { + List fieldInfos = buildFieldsInfo(classResolver, fields); + Map> classLayers = getClassFields(type, fieldInfos); + fieldInfos = new ArrayList<>(fieldInfos.size()); + classLayers.values().forEach(fieldInfos::addAll); + MemoryBuffer encodeClassDef = encodeClassDef(classResolver, type, classLayers, extMeta); + byte[] classDefBytes = encodeClassDef.getBytes(0, encodeClassDef.writerIndex()); + return new ClassDef( + type.getName(), fieldInfos, extMeta, encodeClassDef.getInt64(0), classDefBytes); + } + + // see spec documentation: docs/specification/java_serialization_spec.md + // https://fury.apache.org/docs/specification/fury_java_serialization_spec + static MemoryBuffer encodeClassDef( + ClassResolver classResolver, + Class type, + Map> classLayers, + Map extMeta) { + MemoryBuffer buffer = MemoryUtils.buffer(32); + buffer.increaseWriterIndex(9); // header + one byte size + long header; + int encodedSize = classLayers.size() - 1; // num class must be greater than 0 + if (encodedSize > 0b1110) { + header = 0b1111; + buffer.writeVarUint32Small7(encodedSize - 0b1110); + } else { + header = encodedSize; + } + header |= SCHEMA_COMPATIBLE_FLAG; + if (!extMeta.isEmpty()) { + header |= EXT_FLAG; + } + for (Map.Entry> entry : classLayers.entrySet()) { + String className = entry.getKey(); + List fields = entry.getValue(); + // | num fields + register flag | header + package name | header + class name + // | header + type id + field name | next field info | ... | + int currentClassHeader = (fields.size() << 1); + if (classResolver.isRegistered(type)) { + currentClassHeader |= 1; + buffer.writeVarUint32Small7(currentClassHeader); + buffer.writeVarUint32Small7(classResolver.getRegisteredClassId(type)); + } else { + buffer.writeVarUint32Small7(currentClassHeader); + String pkg = ReflectionUtils.getPackage(className); + String typeName = ReflectionUtils.getSimpleClassName(className); + writePkgName(buffer, pkg); + writeTypeName(buffer, typeName); + } + writeFieldsInfo(buffer, fields); + } + if (!extMeta.isEmpty()) { + buffer.writeVarUint32Small7(extMeta.size()); + for (Map.Entry entry : extMeta.entrySet()) { + String k = entry.getKey(); + String v = entry.getValue(); + byte[] keyBytes = k.getBytes(StandardCharsets.UTF_8); + byte[] valueBytes = v.getBytes(StandardCharsets.UTF_8); + buffer.writePrimitiveArrayWithSize(keyBytes, Platform.BYTE_ARRAY_OFFSET, keyBytes.length); + buffer.writePrimitiveArrayWithSize( + valueBytes, Platform.BYTE_ARRAY_OFFSET, valueBytes.length); + } + } + byte[] encodedClassDef = buffer.getBytes(0, buffer.writerIndex()); + long hash = MurmurHash3.murmurhash3_x64_128(encodedClassDef, 0, encodedClassDef.length, 47)[0]; + // this id will be part of generated codec, a negative number won't be allowed in class name. + hash <<= 8; + header |= Math.abs(hash); + int len = buffer.writerIndex() - 9; + if (len > 255) { + header |= SIZE_TWO_BYTES_FLAG; + } + buffer.putInt64(0, header); + if (len > 255) { + MemoryBuffer buf = MemoryBuffer.newHeapBuffer(len + 1); + buf.writeInt64(header); + buf.writeInt16((short) len); + buf.writeBytes(buffer.getBytes(9, len)); + buffer = buf; + } else { + buffer.putByte(8, (byte) len); + } + return buffer; + } + + static Map> getClassFields(Class type, List fieldsInfo) { + Map> sortedClassFields = new LinkedHashMap<>(); + if (fieldsInfo.isEmpty()) { + sortedClassFields.put(type.getName(), new ArrayList<>()); + return sortedClassFields; + } + Map> classFields = groupClassFields(fieldsInfo); + for (Class clz : ReflectionUtils.getAllClasses(type, true)) { + List fieldInfos = classFields.get(clz.getName()); + if (fieldInfos != null) { + sortedClassFields.put(clz.getName(), fieldInfos); + } + } + classFields = sortedClassFields; + return classFields; + } + + static Map> groupClassFields(List fieldsInfo) { + Map> classFields = new HashMap<>(); + for (FieldInfo fieldInfo : fieldsInfo) { + String definedClass = fieldInfo.getDefinedClass(); + classFields.computeIfAbsent(definedClass, k -> new ArrayList<>()).add(fieldInfo); + } + return classFields; + } + + private static void writeFieldsInfo(MemoryBuffer buffer, List fields) { + for (FieldInfo fieldInfo : fields) { + ClassDef.FieldType fieldType = fieldInfo.getFieldType(); + // `3 bits size + 2 bits field name encoding + polymorphism flag + nullability flag + ref + // tracking flag` + int header = ((fieldType.isMonomorphic() ? 1 : 0) << 2); + // Encoding `UTF8/ALL_TO_LOWER_SPECIAL/LOWER_UPPER_DIGIT_SPECIAL/TAG_ID` + MetaString metaString = Encoders.encodeFieldName(fieldInfo.getFieldName()); + int encodingFlags = fieldNameEncodingsList.indexOf(metaString.getEncoding()); + byte[] encoded = metaString.getBytes(); + int size = (encoded.length - 1); + if (fieldInfo.hasTypeTag()) { + size = fieldInfo.getTypeTag(); + encodingFlags = 3; + } + header |= (byte) (encodingFlags << 3); + boolean bigSize = size >= 7; + if (bigSize) { + header |= 0b11100000; + buffer.writeByte(header); + buffer.writeVarUint32Small7(size - 7); + } else { + header |= (size << 5); + buffer.writeByte(header); + } + if (!fieldInfo.hasTypeTag()) { + buffer.writeBytes(encoded); + } + if (fieldType instanceof ClassDef.RegisteredFieldType) { + short classId = ((ClassDef.RegisteredFieldType) fieldType).getClassId(); + buffer.writeVarUint32Small7(3 + classId); + } else if (fieldType instanceof ClassDef.CollectionFieldType) { + buffer.writeVarUint32Small7(2); + // TODO remove it when new collection deserialization jit finished. + ((ClassDef.CollectionFieldType) fieldType).getElementType().write(buffer); + } else if (fieldType instanceof ClassDef.MapFieldType) { + buffer.writeVarUint32Small7(1); + // TODO remove it when new map deserialization jit finished. + ClassDef.MapFieldType mapFieldType = (ClassDef.MapFieldType) fieldType; + mapFieldType.getKeyType().write(buffer); + mapFieldType.getValueType().write(buffer); + } else { + Preconditions.checkArgument(fieldType instanceof ClassDef.ObjectFieldType); + buffer.writeVarUint32Small7(0); + } + } + } + + private static void writePkgName(MemoryBuffer buffer, String pkg) { + // - Package name encoding(omitted when class is registered): + // - encoding algorithm: `UTF8/ALL_TO_LOWER_SPECIAL/LOWER_UPPER_DIGIT_SPECIAL` + // - Header: `6 bits size | 2 bits encoding flags`. + // The `6 bits size: 0~63` will be used to indicate size `0~62`, + // the value `63` the size need more byte to read, the encoding will encode `size - 62` as + // a varint next. + MetaString pkgMetaString = Encoders.encodePackage(pkg); + byte[] encoded = pkgMetaString.getBytes(); + int pkgHeader = (encoded.length << 2) | pkgEncodingsList.indexOf(pkgMetaString.getEncoding()); + writeName(buffer, encoded, pkgHeader, 62); + } + + private static void writeTypeName(MemoryBuffer buffer, String typeName) { + // - Class name encoding(omitted when class is registered): + // - encoding algorithm: + // `UTF8/LOWER_UPPER_DIGIT_SPECIAL/FIRST_TO_LOWER_SPECIAL/ALL_TO_LOWER_SPECIAL` + // - header: `6 bits size | 2 bits encoding flags`. + // The `6 bits size: 0~63` will be used to indicate size `1~64`, + // the value `63` the size need more byte to read, the encoding will encode `size - 63` as + // a varint next. + MetaString metaString = Encoders.encodeTypeName(typeName); + byte[] encoded = metaString.getBytes(); + int header = (encoded.length << 2) | typeNameEncodingsList.indexOf(metaString.getEncoding()); + writeName(buffer, encoded, header, 63); + } + + private static void writeName(MemoryBuffer buffer, byte[] encoded, int header, int max) { + boolean bigSize = encoded.length > max; + if (bigSize) { + header |= 0b11111100; + buffer.writeVarUint32Small7(header); + buffer.writeVarUint32Small7(encoded.length - max); + } else { + buffer.writeByte(header); + } + buffer.writeBytes(encoded); + } +} diff --git a/java/fury-core/src/main/java/org/apache/fury/meta/Encoders.java b/java/fury-core/src/main/java/org/apache/fury/meta/Encoders.java new file mode 100644 index 0000000000..a16c8f983e --- /dev/null +++ b/java/fury-core/src/main/java/org/apache/fury/meta/Encoders.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.fury.meta; + +import static org.apache.fury.meta.MetaString.Encoding.ALL_TO_LOWER_SPECIAL; +import static org.apache.fury.meta.MetaString.Encoding.FIRST_TO_LOWER_SPECIAL; +import static org.apache.fury.meta.MetaString.Encoding.LOWER_UPPER_DIGIT_SPECIAL; +import static org.apache.fury.meta.MetaString.Encoding.UTF_8; + +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import org.apache.fury.meta.MetaString.Encoding; + +/** A class used to encode package/class/field name. */ +public class Encoders { + public static final MetaStringEncoder PACKAGE_ENCODER = new MetaStringEncoder('.', '_'); + public static final MetaStringDecoder PACKAGE_DECODER = new MetaStringDecoder('.', '_'); + public static final MetaStringEncoder TYPE_NAME_ENCODER = new MetaStringEncoder('$', '_'); + public static final MetaStringDecoder TYPE_NAME_DECODER = new MetaStringDecoder('$', '_'); + static final MetaStringEncoder FIELD_NAME_ENCODER = new MetaStringEncoder('$', '_'); + static final MetaStringDecoder FIELD_NAME_DECODER = new MetaStringDecoder('$', '_'); + private static final ConcurrentMap pgkMetaStringCache = + new ConcurrentHashMap<>(); + private static final ConcurrentMap typeMetaStringCache = + new ConcurrentHashMap<>(); + private static final ConcurrentMap fieldMetaStringCache = + new ConcurrentHashMap<>(); + static final Encoding[] pkgEncodings = + new Encoding[] {UTF_8, ALL_TO_LOWER_SPECIAL, LOWER_UPPER_DIGIT_SPECIAL}; + static final List pkgEncodingsList = Arrays.asList(pkgEncodings); + + static final Encoding[] typeNameEncodings = + new Encoding[] { + UTF_8, LOWER_UPPER_DIGIT_SPECIAL, FIRST_TO_LOWER_SPECIAL, ALL_TO_LOWER_SPECIAL + }; + static final List typeNameEncodingsList = Arrays.asList(typeNameEncodings); + + static final Encoding[] fieldNameEncodings = + new Encoding[] {UTF_8, LOWER_UPPER_DIGIT_SPECIAL, ALL_TO_LOWER_SPECIAL}; + static final List fieldNameEncodingsList = Arrays.asList(fieldNameEncodings); + + public static MetaString encodePackage(String pkg) { + return pgkMetaStringCache.computeIfAbsent(pkg, k -> PACKAGE_ENCODER.encode(pkg, pkgEncodings)); + } + + public static MetaString encodeTypeName(String typeName) { + return typeMetaStringCache.computeIfAbsent( + typeName, k -> TYPE_NAME_ENCODER.encode(typeName, typeNameEncodings)); + } + + public static MetaString encodeFieldName(String fieldName) { + return fieldMetaStringCache.computeIfAbsent( + fieldName, k -> FIELD_NAME_ENCODER.encode(fieldName, fieldNameEncodings)); + } +} diff --git a/java/fury-core/src/main/java/org/apache/fury/meta/MetaStringEncoder.java b/java/fury-core/src/main/java/org/apache/fury/meta/MetaStringEncoder.java index 6c8d69c20f..43b483292f 100644 --- a/java/fury-core/src/main/java/org/apache/fury/meta/MetaStringEncoder.java +++ b/java/fury-core/src/main/java/org/apache/fury/meta/MetaStringEncoder.java @@ -20,16 +20,13 @@ package org.apache.fury.meta; import java.nio.charset.StandardCharsets; +import java.util.HashSet; +import org.apache.fury.collection.Collections; import org.apache.fury.meta.MetaString.Encoding; import org.apache.fury.util.Preconditions; /** Encodes plain text strings into MetaString objects with specified encoding mechanisms. */ public class MetaStringEncoder { - public static final MetaStringEncoder PACKAGE_ENCODER = new MetaStringEncoder('.', '_'); - public static final MetaStringDecoder PACKAGE_DECODER = new MetaStringDecoder('.', '_'); - public static final MetaStringEncoder TYPE_NAME_ENCODER = new MetaStringEncoder('$', '_'); - public static final MetaStringDecoder TYPE_NAME_DECODER = new MetaStringDecoder('$', '_'); - private final char specialChar1; private final char specialChar2; @@ -52,10 +49,14 @@ public MetaStringEncoder(char specialChar1, char specialChar2) { * @return A MetaString object representing the encoded string. */ public MetaString encode(String input) { + return encode(input, Encoding.values()); + } + + public MetaString encode(String input, Encoding[] encodings) { if (input.isEmpty()) { return new MetaString(input, Encoding.UTF_8, specialChar1, specialChar2, new byte[0]); } - Encoding encoding = computeEncoding(input); + Encoding encoding = computeEncoding(input, encodings); return encode(input, encoding); } @@ -95,27 +96,43 @@ public MetaString encode(String input, Encoding encoding) { } public Encoding computeEncoding(String input) { + return computeEncoding(input, Encoding.values()); + } + + public Encoding computeEncoding(String input, Encoding[] encodings) { + HashSet encodingSet = Collections.ofHashSet(encodings); if (input.isEmpty()) { - return Encoding.LOWER_SPECIAL; + if (encodingSet.contains(Encoding.LOWER_SPECIAL)) { + return Encoding.LOWER_SPECIAL; + } } char[] chars = input.toCharArray(); StringStatistics statistics = computeStatistics(chars); if (statistics.canLowerSpecialEncoded) { - return Encoding.LOWER_SPECIAL; - } else if (statistics.canLowerUpperDigitSpecialEncoded) { + if (encodingSet.contains(Encoding.LOWER_SPECIAL)) { + return Encoding.LOWER_SPECIAL; + } + } + if (statistics.canLowerUpperDigitSpecialEncoded) { if (statistics.digitCount != 0) { - return Encoding.LOWER_UPPER_DIGIT_SPECIAL; - } else { - int upperCount = statistics.upperCount; - if (upperCount == 1 && Character.isUpperCase(chars[0])) { + if (encodingSet.contains(Encoding.LOWER_UPPER_DIGIT_SPECIAL)) { + return Encoding.LOWER_UPPER_DIGIT_SPECIAL; + } + } + int upperCount = statistics.upperCount; + if (upperCount == 1 && Character.isUpperCase(chars[0])) { + if (encodingSet.contains(Encoding.FIRST_TO_LOWER_SPECIAL)) { return Encoding.FIRST_TO_LOWER_SPECIAL; } - if ((chars.length + upperCount) * 5 < (chars.length * 6)) { + } + if ((chars.length + upperCount) * 5 < (chars.length * 6)) { + if (encodingSet.contains(Encoding.ALL_TO_LOWER_SPECIAL)) { return Encoding.ALL_TO_LOWER_SPECIAL; - } else { - return Encoding.LOWER_UPPER_DIGIT_SPECIAL; } } + if (encodingSet.contains(Encoding.LOWER_UPPER_DIGIT_SPECIAL)) { + return Encoding.LOWER_UPPER_DIGIT_SPECIAL; + } } return Encoding.UTF_8; } diff --git a/java/fury-core/src/main/java/org/apache/fury/reflect/ReflectionUtils.java b/java/fury-core/src/main/java/org/apache/fury/reflect/ReflectionUtils.java index aa1ae5cbe2..d837896c84 100644 --- a/java/fury-core/src/main/java/org/apache/fury/reflect/ReflectionUtils.java +++ b/java/fury-core/src/main/java/org/apache/fury/reflect/ReflectionUtils.java @@ -50,6 +50,7 @@ import org.apache.fury.memory.Platform; import org.apache.fury.util.GraalvmSupport; import org.apache.fury.util.Preconditions; +import org.apache.fury.util.StringUtils; import org.apache.fury.util.function.Functions; import org.apache.fury.util.unsafe._JDKAccess; @@ -215,6 +216,25 @@ private static void getAllInterfaces(Class cls, LinkedHashSet> inter } } + /** Get all classes from leaf to {@link Object}. */ + public static List> getAllClasses(Class cls) { + List> classes = new ArrayList<>(); + Class clz = cls; + while (clz != null) { + classes.add(clz); + clz = clz.getSuperclass(); + } + return classes; + } + + public static List> getAllClasses(Class cls, boolean topToLeaf) { + List> classes = getAllClasses(cls); + if (topToLeaf) { + Collections.reverse(classes); + } + return classes; + } + /** Returns true if any method named {@code methodName} has exception. */ public static boolean hasException(Class cls, String methodName) { List methods = findMethods(cls, methodName); @@ -517,12 +537,7 @@ public static String getPackage(Class cls) { // Janino generated class's package might be null if (cls.getPackage() == null) { String className = cls.getName(); - int index = className.lastIndexOf("."); - if (index != -1) { - pkg = className.substring(0, index); - } else { - pkg = ""; - } + return getPackage(className); } else { pkg = cls.getPackage().getName(); } @@ -553,6 +568,22 @@ public static String getCanonicalName(Class cls) { return canonicalName; } + public static String getSimpleClassName(String className) { + int index = className.lastIndexOf("."); + if (index != -1) { + return className.substring(index + 1); + } else { + return className; + } + } + + public static String getFullClassName(String pkg, String className) { + if (StringUtils.isBlank(pkg)) { + return className; + } + return pkg + "." + className; + } + @CodegenInvoke public static Class loadClass(Class neighbor, String className) { try { diff --git a/java/fury-core/src/main/java/org/apache/fury/resolver/ClassInfo.java b/java/fury-core/src/main/java/org/apache/fury/resolver/ClassInfo.java index 1ca10cd466..070adb2b20 100644 --- a/java/fury-core/src/main/java/org/apache/fury/resolver/ClassInfo.java +++ b/java/fury-core/src/main/java/org/apache/fury/resolver/ClassInfo.java @@ -19,8 +19,8 @@ package org.apache.fury.resolver; -import static org.apache.fury.meta.MetaStringEncoder.PACKAGE_ENCODER; -import static org.apache.fury.meta.MetaStringEncoder.TYPE_NAME_ENCODER; +import static org.apache.fury.meta.Encoders.PACKAGE_ENCODER; +import static org.apache.fury.meta.Encoders.TYPE_NAME_ENCODER; import org.apache.fury.collection.Tuple2; import org.apache.fury.config.Language; diff --git a/java/fury-core/src/main/java/org/apache/fury/resolver/ClassResolver.java b/java/fury-core/src/main/java/org/apache/fury/resolver/ClassResolver.java index 0bf8787c30..e44af81090 100644 --- a/java/fury-core/src/main/java/org/apache/fury/resolver/ClassResolver.java +++ b/java/fury-core/src/main/java/org/apache/fury/resolver/ClassResolver.java @@ -19,16 +19,17 @@ package org.apache.fury.resolver; -import static org.apache.fury.meta.MetaStringEncoder.PACKAGE_DECODER; -import static org.apache.fury.meta.MetaStringEncoder.PACKAGE_ENCODER; -import static org.apache.fury.meta.MetaStringEncoder.TYPE_NAME_DECODER; +import static org.apache.fury.collection.Collections.ofHashMap; +import static org.apache.fury.meta.ClassDef.SIZE_TWO_BYTES_FLAG; +import static org.apache.fury.meta.Encoders.PACKAGE_DECODER; +import static org.apache.fury.meta.Encoders.PACKAGE_ENCODER; +import static org.apache.fury.meta.Encoders.TYPE_NAME_DECODER; import static org.apache.fury.serializer.CodegenSerializer.loadCodegenSerializer; import static org.apache.fury.serializer.CodegenSerializer.loadCompatibleCodegenSerializer; import static org.apache.fury.serializer.CodegenSerializer.supportCodegenForJavaSerialization; import static org.apache.fury.type.TypeUtils.OBJECT_TYPE; import static org.apache.fury.type.TypeUtils.getRawType; -import com.google.common.collect.ImmutableMap; import java.io.Externalizable; import java.io.IOException; import java.io.Serializable; @@ -245,6 +246,7 @@ private static class ExtRegistry { private final Set> getClassCtx = new HashSet<>(); private final Map, FieldResolver> fieldResolverMap = new HashMap<>(); private final Map> classIdToDef = new HashMap<>(); + private final Map, ClassDef> currentLayerClassDef = new HashMap<>(); // TODO(chaokunyang) Better to use soft reference, see ObjectStreamClass. private final ConcurrentHashMap, Boolean>, SortedMap> descriptorsCache = new ConcurrentHashMap<>(); @@ -480,6 +482,10 @@ public void register(Class cls, Short id, boolean createSerializer) { } } + public boolean isRegistered(Class cls) { + return extRegistry.registeredClassIdMap.get(cls) != null; + } + public Short getRegisteredClassId(Class cls) { return extRegistry.registeredClassIdMap.get(cls); } @@ -1048,6 +1054,7 @@ public boolean needToWriteRef(Class cls) { public ClassInfo getClassInfo(short classId) { ClassInfo classInfo = registeredId2ClassInfo[classId]; + assert classInfo != null : classId; if (classInfo.serializer == null) { addSerializer(classInfo.cls, createSerializer(classInfo.cls)); classInfo = classInfoMap.get(classInfo.cls); @@ -1275,8 +1282,9 @@ public void writeClassWithMetaShare(MemoryBuffer buffer, ClassInfo classInfo) { || serializer instanceof ObjectSerializer || serializer instanceof MetaSharedSerializer)) { classDef = - classDefMap.computeIfAbsent(classInfo.cls, cls -> ClassDef.buildClassDef(cls, fury)); + classDefMap.computeIfAbsent(classInfo.cls, cls -> ClassDef.buildClassDef(fury, cls)); } else { + // TODO(chaokunyang) support more types meta-share serialization classDef = classDefMap.computeIfAbsent( classInfo.cls, @@ -1285,7 +1293,7 @@ public void writeClassWithMetaShare(MemoryBuffer buffer, ClassInfo classInfo) { this, cls, new ArrayList<>(), - ImmutableMap.of(META_SHARE_FIELDS_INFO_KEY, "false"))); + ofHashMap(META_SHARE_FIELDS_INFO_KEY, "false"))); } metaContext.writingClassDefs.add(classDef); } @@ -1339,7 +1347,7 @@ private ClassInfo readClassInfoWithMetaShare(MemoryBuffer buffer, MetaContext me classInfo = getMetaSharedClassInfo(classDef, cls); // Share serializer for same version class def to avoid too much different meta // context take up too much memory. - extRegistry.classIdToDef.put(classDef.getId(), Tuple2.of(classDef, classInfo)); + putClassDef(classDef, classInfo); } else { classInfo = classDefTuple.f1; } @@ -1366,6 +1374,9 @@ private ClassInfo getMetaSharedClassInfo(ClassDef classDef, Class clz) { Preconditions.checkNotNull(classId); return classInfo; } + if (clz.isArray() || cls.isEnum()) { + return getClassInfo(cls); + } Class sc = fury.getJITContext() .registerSerializerJITCallback( @@ -1406,13 +1417,19 @@ public void readClassDefs(MemoryBuffer buffer) { buffer.readerIndex(classDefOffset); int numClassDefs = buffer.readVarUint32Small14(); for (int i = 0; i < numClassDefs; i++) { - ClassDef readClassDef = ClassDef.readClassDef(buffer); - // Share same class def to reduce memory footprint, since there may be many meta context. - ClassDef classDef = - extRegistry.classIdToDef.computeIfAbsent( - readClassDef.getId(), key -> Tuple2.of(readClassDef, null)) - .f0; - metaContext.readClassDefs.add(classDef); + long id = buffer.readInt64(); + long hash = id >>> 8; + Tuple2 tuple2 = extRegistry.classIdToDef.get(hash); + if (tuple2 != null) { + int size = + (id & SIZE_TWO_BYTES_FLAG) == 0 + ? buffer.readByte() & 0xff + : buffer.readInt16() & 0xffff; + buffer.increaseReaderIndex(size); + } else { + tuple2 = readClassDef(buffer, id); + } + metaContext.readClassDefs.add(tuple2.f0); // Will be set lazily, so even some classes doesn't exist, remaining classinfo // can be created still. metaContext.readClassInfos.add(null); @@ -1420,6 +1437,33 @@ public void readClassDefs(MemoryBuffer buffer) { buffer.readerIndex(readerIndex); } + private Tuple2 readClassDef(MemoryBuffer buffer, long header) { + ClassDef readClassDef = ClassDef.readClassDef(this, buffer, header); + Tuple2 tuple2 = extRegistry.classIdToDef.get(readClassDef.getId()); + if (tuple2 == null) { + tuple2 = putClassDef(readClassDef, null); + } + return tuple2; + } + + private Tuple2 putClassDef(ClassDef classDef, ClassInfo classInfo) { + Tuple2 tuple2 = Tuple2.of(classDef, classInfo); + extRegistry.classIdToDef.put(classDef.getId(), tuple2); + return tuple2; + } + + public ClassDef getClassDef(Class cls, boolean resolveParent) { + if (resolveParent) { + return classDefMap.computeIfAbsent(cls, k -> ClassDef.buildClassDef(fury, cls)); + } + ClassDef classDef = extRegistry.currentLayerClassDef.get(cls); + if (classDef == null) { + classDef = ClassDef.buildClassDef(fury, cls, false); + extRegistry.currentLayerClassDef.put(cls, classDef); + } + return classDef; + } + /** * Native code for ClassResolver.writeClass is too big to inline, so inline it manually. * @@ -1487,7 +1531,6 @@ public Class readClassInternal(MemoryBuffer buffer) { MetaStringBytes packageBytes = metaStringResolver.readMetaStringBytesWithFlag(buffer, header); MetaStringBytes simpleClassNameBytes = metaStringResolver.readMetaStringBytes(buffer); classInfo = loadBytesToClassInfo(packageBytes, simpleClassNameBytes); - } else { classInfo = registeredId2ClassInfo[(short) (header >> 1)]; } diff --git a/java/fury-core/src/main/java/org/apache/fury/resolver/MetaStringBytes.java b/java/fury-core/src/main/java/org/apache/fury/resolver/MetaStringBytes.java index 24f84bdba7..8867a9e789 100644 --- a/java/fury-core/src/main/java/org/apache/fury/resolver/MetaStringBytes.java +++ b/java/fury-core/src/main/java/org/apache/fury/resolver/MetaStringBytes.java @@ -95,9 +95,13 @@ public int hashCode() { @Override public String toString() { - // TODO support other str encoding. - String str = new String(bytes); - ; - return "string: " + str + " " + "size: " + bytes.length + " " + Arrays.toString(bytes); + return "MetaStringBytes{" + + "hashCode=" + + hashCode + + ", size=" + + bytes.length + + ", bytes=" + + Arrays.toString(bytes) + + '}'; } } diff --git a/java/fury-core/src/main/java/org/apache/fury/serializer/MetaSharedSerializer.java b/java/fury-core/src/main/java/org/apache/fury/serializer/MetaSharedSerializer.java index e243089717..87f1a91b61 100644 --- a/java/fury-core/src/main/java/org/apache/fury/serializer/MetaSharedSerializer.java +++ b/java/fury-core/src/main/java/org/apache/fury/serializer/MetaSharedSerializer.java @@ -20,14 +20,9 @@ package org.apache.fury.serializer; import java.lang.invoke.MethodHandle; -import java.lang.reflect.Field; -import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; -import java.util.HashMap; import java.util.List; -import java.util.Map; -import java.util.SortedMap; import java.util.stream.Collectors; import org.apache.fury.Fury; import org.apache.fury.builder.MetaSharedCodecBuilder; @@ -94,7 +89,11 @@ public MetaSharedSerializer(Fury fury, Class type, ClassDef classDef) { Collection descriptors = consolidateFields(fury.getClassResolver(), type, classDef); DescriptorGrouper descriptorGrouper = DescriptorGrouper.createDescriptorGrouper( - descriptors, true, fury.compressInt(), fury.getConfig().compressLong()); + fury.getClassResolver()::isMonomorphic, + descriptors, + false, + fury.compressInt(), + fury.getConfig().compressLong()); // d.getField() may be null if not exists in this class when meta share enabled. isRecord = RecordUtils.isRecord(type); if (isRecord) { @@ -299,25 +298,6 @@ static boolean skipPrimitiveFieldValueFailed(Fury fury, short classId, MemoryBuf */ public static Collection consolidateFields( ClassResolver classResolver, Class cls, ClassDef classDef) { - SortedMap allDescriptorsMap = classResolver.getAllDescriptorsMap(cls, true); - Map descriptorsMap = new HashMap<>(); - for (Map.Entry e : allDescriptorsMap.entrySet()) { - if (descriptorsMap.put( - e.getKey().getDeclaringClass().getName() + "." + e.getKey().getName(), e.getValue()) - != null) { - throw new IllegalStateException("Duplicate key"); - } - } - List descriptors = new ArrayList<>(classDef.getFieldsInfo().size()); - for (ClassDef.FieldInfo fieldInfo : classDef.getFieldsInfo()) { - Descriptor descriptor = - descriptorsMap.get(fieldInfo.getDefinedClass() + "." + fieldInfo.getFieldName()); - if (descriptor != null) { - descriptors.add(descriptor); - } else { - descriptors.add(fieldInfo.toDescriptor(classResolver)); - } - } - return descriptors; + return classDef.getDescriptors(classResolver, cls); } } diff --git a/java/fury-core/src/main/java/org/apache/fury/serializer/ObjectSerializer.java b/java/fury-core/src/main/java/org/apache/fury/serializer/ObjectSerializer.java index 1f61d73467..8e9a2aac92 100644 --- a/java/fury-core/src/main/java/org/apache/fury/serializer/ObjectSerializer.java +++ b/java/fury-core/src/main/java/org/apache/fury/serializer/ObjectSerializer.java @@ -19,6 +19,7 @@ package org.apache.fury.serializer; +import static org.apache.fury.type.DescriptorGrouper.createDescriptorGrouper; import static org.apache.fury.type.TypeUtils.getRawType; import java.lang.invoke.MethodHandle; @@ -34,6 +35,7 @@ import org.apache.fury.exception.FuryException; import org.apache.fury.memory.MemoryBuffer; import org.apache.fury.memory.Platform; +import org.apache.fury.meta.ClassDef; import org.apache.fury.reflect.FieldAccessor; import org.apache.fury.reflect.ReflectionUtils; import org.apache.fury.reflect.TypeRef; @@ -95,11 +97,21 @@ public ObjectSerializer(Fury fury, Class cls, boolean resolveParent) { // Use `setSerializerIfAbsent` to avoid overwriting existing serializer for class when used // as data serializer. classResolver.setSerializerIfAbsent(cls, this); - Collection descriptors = - fury.getClassResolver().getAllDescriptorsMap(cls, resolveParent).values(); + Collection descriptors; + boolean shareMeta = fury.getConfig().shareMetaContext(); + if (shareMeta) { + ClassDef classDef = classResolver.getClassDef(cls, resolveParent); + descriptors = classDef.getDescriptors(classResolver, cls); + } else { + descriptors = fury.getClassResolver().getAllDescriptorsMap(cls, resolveParent).values(); + } DescriptorGrouper descriptorGrouper = - DescriptorGrouper.createDescriptorGrouper( - descriptors, false, fury.compressInt(), fury.compressLong()); + createDescriptorGrouper( + fury.getClassResolver()::isMonomorphic, + descriptors, + false, + fury.compressInt(), + fury.compressLong()); isRecord = RecordUtils.isRecord(cls); if (isRecord) { constructor = RecordUtils.getRecordConstructor(cls).f1; diff --git a/java/fury-core/src/main/java/org/apache/fury/serializer/StringSerializer.java b/java/fury-core/src/main/java/org/apache/fury/serializer/StringSerializer.java index ce5e144793..2e26096e3a 100644 --- a/java/fury-core/src/main/java/org/apache/fury/serializer/StringSerializer.java +++ b/java/fury-core/src/main/java/org/apache/fury/serializer/StringSerializer.java @@ -231,7 +231,7 @@ public String readBytesString(MemoryBuffer buffer) { byte[] heapMemory = buffer.getHeapMemory(); if (heapMemory != null) { final int arrIndex = buffer._unsafeHeapReaderIndex(); - buffer._increaseReaderIndexUnsafe(numBytes); + buffer.increaseReaderIndex(numBytes); bytes = new byte[numBytes]; System.arraycopy(heapMemory, arrIndex, bytes, 0, numBytes); } else { diff --git a/java/fury-core/src/main/java/org/apache/fury/serializer/UnexistedClassSerializers.java b/java/fury-core/src/main/java/org/apache/fury/serializer/UnexistedClassSerializers.java index 44864e2db1..fe469982cc 100644 --- a/java/fury-core/src/main/java/org/apache/fury/serializer/UnexistedClassSerializers.java +++ b/java/fury-core/src/main/java/org/apache/fury/serializer/UnexistedClassSerializers.java @@ -185,7 +185,11 @@ private ClassFieldsInfo getClassFieldsInfo(ClassDef classDef) { fury.getClassResolver(), UnexistedSkipClass.class, classDef); DescriptorGrouper descriptorGrouper = DescriptorGrouper.createDescriptorGrouper( - descriptors, true, fury.compressInt(), fury.compressLong()); + fury.getClassResolver()::isMonomorphic, + descriptors, + false, + fury.compressInt(), + fury.compressLong()); Tuple3< Tuple2, ObjectSerializer.GenericTypeField[], diff --git a/java/fury-core/src/main/java/org/apache/fury/type/Descriptor.java b/java/fury-core/src/main/java/org/apache/fury/type/Descriptor.java index 3b6e683558..8c3f4b8f5d 100644 --- a/java/fury-core/src/main/java/org/apache/fury/type/Descriptor.java +++ b/java/fury-core/src/main/java/org/apache/fury/type/Descriptor.java @@ -70,8 +70,13 @@ public static void clearDescriptorCache() { descCache = CacheBuilder.newBuilder().weakKeys().softValues().concurrencyLevel(64).build(); } + // All fields should not be mutable except as lazy load, + // because Descriptor is cached in `descCache`. + // And mutable fields may make some serializer read wrong field + // value such as `typeName`. private TypeRef typeRef; private Class type; + private final String typeName; private final String name; private final int modifier; private final String declaringClass; @@ -81,6 +86,7 @@ public static void clearDescriptorCache() { public Descriptor(Field field, TypeRef typeRef, Method readMethod, Method writeMethod) { this.field = field; + this.typeName = field.getType().getName(); this.name = field.getName(); this.modifier = field.getModifiers(); this.declaringClass = field.getDeclaringClass().getName(); @@ -91,6 +97,7 @@ public Descriptor(Field field, TypeRef typeRef, Method readMethod, Method wri public Descriptor(TypeRef typeRef, String name, int modifier, String declaringClass) { this.field = null; + this.typeName = typeRef.getRawType().getName(); this.name = name; this.modifier = modifier; this.declaringClass = declaringClass; @@ -101,6 +108,7 @@ public Descriptor(TypeRef typeRef, String name, int modifier, String declarin private Descriptor(Field field, Method readMethod) { this.field = field; + this.typeName = field.getType().getName(); this.name = field.getName(); this.modifier = field.getModifiers(); this.declaringClass = field.getDeclaringClass().getName(); @@ -111,6 +119,7 @@ private Descriptor(Field field, Method readMethod) { private Descriptor( TypeRef typeRef, + String typeName, String name, int modifier, String declaringClass, @@ -118,6 +127,7 @@ private Descriptor( Method readMethod, Method writeMethod) { this.typeRef = typeRef; + this.typeName = typeName; this.name = name; this.modifier = modifier; this.declaringClass = declaringClass; @@ -126,8 +136,14 @@ private Descriptor( this.writeMethod = writeMethod; } - public Descriptor copy(TypeRef typeRef, Method readMethod, Method writeMethod) { - return new Descriptor(typeRef, name, modifier, declaringClass, field, readMethod, writeMethod); + public Descriptor copy(Method readMethod, Method writeMethod) { + return new Descriptor( + typeRef, typeName, name, modifier, declaringClass, field, readMethod, writeMethod); + } + + public Descriptor copyWithTypeName(String typeName) { + return new Descriptor( + typeRef, typeName, name, modifier, declaringClass, field, readMethod, writeMethod); } public Field getField() { @@ -158,6 +174,10 @@ public Method getWriteMethod() { return writeMethod; } + public String getTypeName() { + return typeName; + } + /** Try not use {@link TypeRef#getRawType()} since it's expensive. */ public Class getRawType() { Class type = this.type; @@ -182,11 +202,18 @@ public TypeRef getTypeRef() { @Override public String toString() { final StringBuilder sb = new StringBuilder("Descriptor{"); + sb.append("typeName=").append(typeName); sb.append("name=").append(name); - sb.append(", field=").append(field); - sb.append(", readMethod=").append(readMethod); - sb.append(", writeMethod=").append(writeMethod); - sb.append(", typeToken=").append(typeRef); + sb.append("modifier=").append(modifier); + if (field != null) { + sb.append(", field=").append(field.getDeclaringClass().getSimpleName()).append('.'); + } + if (readMethod != null) { + sb.append(", readMethod=").append(readMethod); + } + if (writeMethod != null) { + sb.append(", writeMethod=").append(writeMethod); + } sb.append('}'); return sb.toString(); } diff --git a/java/fury-core/src/main/java/org/apache/fury/type/DescriptorGrouper.java b/java/fury-core/src/main/java/org/apache/fury/type/DescriptorGrouper.java index 0308422c27..ca9d8a1991 100644 --- a/java/fury-core/src/main/java/org/apache/fury/type/DescriptorGrouper.java +++ b/java/fury-core/src/main/java/org/apache/fury/type/DescriptorGrouper.java @@ -28,7 +28,7 @@ import java.util.List; import java.util.TreeSet; import java.util.function.Function; -import org.apache.fury.reflect.ReflectionUtils; +import java.util.function.Predicate; import org.apache.fury.util.record.RecordUtils; /** @@ -104,12 +104,11 @@ private static boolean isCompressedType(Class cls, boolean compressInt, boole // sort by field name to fix order if type is same. int c = d1 - // Use raw type instead of generic type so that fields with type token + // Use type name instead of generic type so that fields with type ref // constructed in ClassDef which take pojo as non-final Object type // will have consistent order between processes if the fields doesn't exist in peer. - .getRawType() - .getName() - .compareTo(d2.getRawType().getName()); + .getTypeName() + .compareTo(d2.getTypeName()); if (c == 0) { c = d1.getName().compareTo(d2.getName()); if (c == 0) { @@ -138,6 +137,7 @@ private static boolean isCompressedType(Class cls, boolean compressInt, boole * @param comparator comparator for non-primitive fields. */ public DescriptorGrouper( + Predicate> isMonomorphic, Collection descriptors, boolean descriptorsGroupedOrdered, Function descriptorUpdator, @@ -163,7 +163,7 @@ public DescriptorGrouper( collectionDescriptors.add(descriptorUpdator.apply(descriptor)); } else if (TypeUtils.isMap(descriptor.getRawType())) { mapDescriptors.add(descriptorUpdator.apply(descriptor)); - } else if (ReflectionUtils.isMonomorphic(descriptor.getRawType())) { + } else if (isMonomorphic.test(descriptor.getRawType())) { finalDescriptors.add(descriptorUpdator.apply(descriptor)); } else { otherDescriptors.add(descriptorUpdator.apply(descriptor)); @@ -215,16 +215,18 @@ private static Descriptor createDescriptor(Descriptor d) { if (readMethod == null && d.getWriteMethod() == null) { return d; } - return d.copy(d.getTypeRef(), readMethod, null); + return d.copy(readMethod, null); } public static DescriptorGrouper createDescriptorGrouper( + Predicate> isMonomorphic, Collection descriptors, boolean descriptorsGroupedOrdered, boolean compressInt, boolean compressLong) { Comparator comparator = getPrimitiveComparator(compressInt, compressLong); return new DescriptorGrouper( + isMonomorphic, descriptors, descriptorsGroupedOrdered, DescriptorGrouper::createDescriptor, diff --git a/java/fury-core/src/main/java/org/apache/fury/type/GenericType.java b/java/fury-core/src/main/java/org/apache/fury/type/GenericType.java index 81e2d3e8ce..86202fea73 100644 --- a/java/fury-core/src/main/java/org/apache/fury/type/GenericType.java +++ b/java/fury-core/src/main/java/org/apache/fury/type/GenericType.java @@ -214,8 +214,4 @@ public boolean hasGenericParameters() { public String toString() { return "GenericType{" + typeRef.toString() + '}'; } - - public static boolean isFinalByDefault(Class cls) { - return defaultFinalPredicate.test(cls); - } } diff --git a/java/fury-core/src/main/resources/META-INF/native-image/org.apache.fury/fury-core/native-image.properties b/java/fury-core/src/main/resources/META-INF/native-image/org.apache.fury/fury-core/native-image.properties index a71070b1e1..8a14afa927 100644 --- a/java/fury-core/src/main/resources/META-INF/native-image/org.apache.fury/fury-core/native-image.properties +++ b/java/fury-core/src/main/resources/META-INF/native-image/org.apache.fury/fury-core/native-image.properties @@ -166,6 +166,7 @@ Args=--initialize-at-build-time=org.apache.fury.memory.MemoryBuffer,\ org.apache.fury.meta.MetaStringDecoder$1,\ org.apache.fury.meta.MetaStringEncoder$1,\ org.apache.fury.meta.MetaString$Encoding,\ + org.apache.fury.meta.Encoders,\ org.apache.fury.reflect.Types$ClassOwnership,\ org.apache.fury.reflect.Types$ClassOwnership$1,\ org.apache.fury.reflect.Types$ClassOwnership$2,\ diff --git a/java/fury-core/src/test/java/org/apache/fury/meta/ClassDefEncoderTest.java b/java/fury-core/src/test/java/org/apache/fury/meta/ClassDefEncoderTest.java new file mode 100644 index 0000000000..6baa2149a5 --- /dev/null +++ b/java/fury-core/src/test/java/org/apache/fury/meta/ClassDefEncoderTest.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.fury.meta; + +import static org.apache.fury.meta.ClassDefEncoder.buildFieldsInfo; +import static org.apache.fury.meta.ClassDefEncoder.getClassFields; + +import java.util.HashMap; +import java.util.List; +import org.apache.fury.Fury; +import org.apache.fury.memory.MemoryBuffer; +import org.apache.fury.test.bean.BeanA; +import org.apache.fury.test.bean.MapFields; +import org.apache.fury.test.bean.Struct; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class ClassDefEncoderTest { + + @Test + public void testBasicClassDef() throws Exception { + Fury fury = Fury.builder().withMetaContextShare(true).build(); + Class type = ClassDefTest.TestFieldsOrderClass1.class; + List fieldsInfo = buildFieldsInfo(fury.getClassResolver(), type); + MemoryBuffer buffer = + ClassDefEncoder.encodeClassDef( + fury.getClassResolver(), type, getClassFields(type, fieldsInfo), new HashMap<>()); + ClassDef classDef = ClassDef.readClassDef(fury.getClassResolver(), buffer); + Assert.assertEquals(classDef.getClassName(), type.getName()); + Assert.assertEquals(classDef.getFieldsInfo().size(), type.getDeclaredFields().length); + Assert.assertEquals(classDef.getFieldsInfo(), fieldsInfo); + } + + @Test + public void testBigMetaEncoding() { + for (Class type : + new Class[] { + MapFields.class, BeanA.class, Struct.createStructClass("TestBigMetaEncoding", 5) + }) { + Fury fury = Fury.builder().withMetaContextShare(true).build(); + ClassDef classDef = ClassDef.buildClassDef(fury, type); + ClassDef classDef1 = + ClassDef.readClassDef( + fury.getClassResolver(), MemoryBuffer.fromByteArray(classDef.getEncoded())); + Assert.assertEquals(classDef1, classDef); + } + } +} diff --git a/java/fury-core/src/test/java/org/apache/fury/meta/ClassDefTest.java b/java/fury-core/src/test/java/org/apache/fury/meta/ClassDefTest.java index d44b7bad62..45425d78dd 100644 --- a/java/fury-core/src/test/java/org/apache/fury/meta/ClassDefTest.java +++ b/java/fury-core/src/test/java/org/apache/fury/meta/ClassDefTest.java @@ -36,7 +36,7 @@ import org.testng.annotations.Test; public class ClassDefTest extends FuryTestBase { - private static class TestFieldsOrderClass1 { + static class TestFieldsOrderClass1 { private int intField2; private boolean booleanField; private Object objField; @@ -94,7 +94,7 @@ public void testClassDefSerialization() throws NoSuchFieldException { ImmutableList.of(TestFieldsOrderClass1.class.getDeclaredField("longField"))); MemoryBuffer buffer = MemoryBuffer.newHeapBuffer(32); classDef.writeClassDef(buffer); - ClassDef classDef1 = ClassDef.readClassDef(buffer); + ClassDef classDef1 = ClassDef.readClassDef(fury.getClassResolver(), buffer); assertEquals(classDef1.getClassName(), classDef.getClassName()); assertEquals(classDef1, classDef); } @@ -110,7 +110,7 @@ public void testClassDefSerialization() throws NoSuchFieldException { ReflectionUtils.getFields(TestFieldsOrderClass1.class, true).size()); MemoryBuffer buffer = MemoryBuffer.newHeapBuffer(32); classDef.writeClassDef(buffer); - ClassDef classDef1 = ClassDef.readClassDef(buffer); + ClassDef classDef1 = ClassDef.readClassDef(fury.getClassResolver(), buffer); assertEquals(classDef1.getClassName(), classDef.getClassName()); assertEquals(classDef1, classDef); } @@ -126,7 +126,7 @@ public void testClassDefSerialization() throws NoSuchFieldException { ReflectionUtils.getFields(TestFieldsOrderClass2.class, true).size()); MemoryBuffer buffer = MemoryBuffer.newHeapBuffer(32); classDef.writeClassDef(buffer); - ClassDef classDef1 = ClassDef.readClassDef(buffer); + ClassDef classDef1 = ClassDef.readClassDef(fury.getClassResolver(), buffer); assertEquals(classDef1.getClassName(), classDef.getClassName()); assertEquals(classDef1, classDef); } @@ -147,7 +147,7 @@ public void testDuplicateFieldsClass() { ReflectionUtils.getFields(DuplicateFieldClass.class, true).size()); MemoryBuffer buffer = MemoryBuffer.newHeapBuffer(32); classDef.writeClassDef(buffer); - ClassDef classDef1 = ClassDef.readClassDef(buffer); + ClassDef classDef1 = ClassDef.readClassDef(fury.getClassResolver(), buffer); assertEquals(classDef1.getClassName(), classDef.getClassName()); assertEquals(classDef1, classDef); } @@ -156,21 +156,15 @@ public void testDuplicateFieldsClass() { @Test public void testContainerClass() { Fury fury = Fury.builder().withMetaContextShare(true).build(); - { - ClassDef classDef = - ClassDef.buildClassDef( - fury.getClassResolver(), - ContainerClass.class, - ReflectionUtils.getFields(ContainerClass.class, true)); - assertEquals(classDef.getClassName(), ContainerClass.class.getName()); - assertEquals( - classDef.getFieldsInfo().size(), - ReflectionUtils.getFields(ContainerClass.class, true).size()); - MemoryBuffer buffer = MemoryBuffer.newHeapBuffer(32); - classDef.writeClassDef(buffer); - ClassDef classDef1 = ClassDef.readClassDef(buffer); - assertEquals(classDef1.getClassName(), classDef.getClassName()); - assertEquals(classDef1, classDef); - } + List fields = ReflectionUtils.getFields(ContainerClass.class, true); + ClassDef classDef = + ClassDef.buildClassDef(fury.getClassResolver(), ContainerClass.class, fields); + assertEquals(classDef.getClassName(), ContainerClass.class.getName()); + assertEquals(classDef.getFieldsInfo().size(), fields.size()); + MemoryBuffer buffer = MemoryBuffer.newHeapBuffer(32); + classDef.writeClassDef(buffer); + ClassDef classDef1 = ClassDef.readClassDef(fury.getClassResolver(), buffer); + assertEquals(classDef1.getClassName(), classDef.getClassName()); + assertEquals(classDef1, classDef); } } diff --git a/java/fury-core/src/test/java/org/apache/fury/type/DescriptorGrouperTest.java b/java/fury-core/src/test/java/org/apache/fury/type/DescriptorGrouperTest.java index 7eebdef093..d8c7be439f 100644 --- a/java/fury-core/src/test/java/org/apache/fury/type/DescriptorGrouperTest.java +++ b/java/fury-core/src/test/java/org/apache/fury/type/DescriptorGrouperTest.java @@ -31,6 +31,7 @@ import java.util.Map; import java.util.Random; import java.util.stream.Collectors; +import org.apache.fury.reflect.ReflectionUtils; import org.apache.fury.reflect.TypeRef; import org.testng.annotations.Test; @@ -149,7 +150,8 @@ public void testGrouper() { descriptors.add( new Descriptor(new TypeRef>() {}, "c" + index++, -1, "TestClass")); DescriptorGrouper grouper = - DescriptorGrouper.createDescriptorGrouper(descriptors, false, false, false); + DescriptorGrouper.createDescriptorGrouper( + ReflectionUtils::isMonomorphic, descriptors, false, false, false); { List> classes = grouper.getPrimitiveDescriptors().stream() @@ -224,7 +226,8 @@ public void testGrouper() { @Test public void testCompressedPrimitiveGrouper() { DescriptorGrouper grouper = - DescriptorGrouper.createDescriptorGrouper(createDescriptors(), false, true, true); + DescriptorGrouper.createDescriptorGrouper( + ReflectionUtils::isMonomorphic, createDescriptors(), false, true, true); { List> classes = grouper.getPrimitiveDescriptors().stream()