-
Notifications
You must be signed in to change notification settings - Fork 2.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[HUDI-7995] MercifulJsonConverter support convert number to fixed #11637
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,13 +23,17 @@ | |
import org.apache.hudi.exception.HoodieIOException; | ||
|
||
import com.fasterxml.jackson.databind.ObjectMapper; | ||
import org.apache.avro.LogicalTypes; | ||
import org.apache.avro.Schema; | ||
import org.apache.avro.Schema.Type; | ||
import org.apache.avro.generic.GenericData; | ||
import org.apache.avro.generic.GenericFixed; | ||
import org.apache.avro.generic.GenericRecord; | ||
|
||
import java.io.IOException; | ||
import java.io.Serializable; | ||
import java.math.BigDecimal; | ||
import java.math.RoundingMode; | ||
import java.nio.ByteBuffer; | ||
import java.util.ArrayList; | ||
import java.util.Collections; | ||
|
@@ -38,6 +42,7 @@ | |
import java.util.Map; | ||
import java.util.concurrent.ConcurrentHashMap; | ||
|
||
import static org.apache.hudi.avro.HoodieAvroUtils.DECIMAL_CONVERSION; | ||
import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes; | ||
|
||
/** | ||
|
@@ -301,6 +306,14 @@ private static JsonToAvroFieldProcessor generateFixedTypeHandler() { | |
return new JsonToAvroFieldProcessor() { | ||
@Override | ||
public Pair<Boolean, Object> convert(Object value, String name, Schema schema, boolean shouldSanitize, String invalidCharMask) { | ||
// the value can be Number | ||
if (value instanceof Number) { | ||
LogicalTypes.Decimal decimalType = (LogicalTypes.Decimal) schema.getLogicalType(); | ||
BigDecimal bigDecimal = new java.math.BigDecimal(value.toString()).setScale(decimalType.getScale(), RoundingMode.HALF_UP); | ||
GenericFixed genericFixed = DECIMAL_CONVERSION.toFixed(bigDecimal, schema, schema.getLogicalType()); | ||
return Pair.of(true, new GenericData.Fixed(schema, genericFixed.bytes())); | ||
} | ||
|
||
// The ObjectMapper use List to represent FixedType | ||
// eg: "decimal_val": [0, 0, 14, -63, -52] will convert to ArrayList<Integer> | ||
List<Integer> converval = (List<Integer>) value; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hi, do you know why this value is a list? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The Decimal type uses a byte array to store data internally There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. thanks, can you provide a JSON example? and add it to ut for make ut more complete There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. already added a json example in ut There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. in ut, you directly specified the list, is should be meaningless, List should use Array Type. I don't know why here value can be List type? I am not sure, does |
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,17 +20,24 @@ | |
|
||
import org.apache.hudi.common.testutils.SchemaTestUtil; | ||
|
||
import com.fasterxml.jackson.databind.JsonNode; | ||
import com.fasterxml.jackson.databind.ObjectMapper; | ||
import org.apache.avro.Conversions; | ||
import org.apache.avro.Conversions.DecimalConversion; | ||
import org.apache.avro.Schema; | ||
import org.apache.avro.generic.GenericData; | ||
import org.apache.avro.generic.GenericData.Fixed; | ||
import org.apache.avro.generic.GenericRecord; | ||
import org.junit.jupiter.api.Assertions; | ||
import org.junit.jupiter.api.Test; | ||
|
||
import java.io.IOException; | ||
import java.math.BigDecimal; | ||
import java.util.HashMap; | ||
import java.util.Map; | ||
|
||
import static org.junit.jupiter.api.Assertions.assertEquals; | ||
|
||
public class TestMercifulJsonConverter { | ||
private static final ObjectMapper MAPPER = new ObjectMapper(); | ||
private static final MercifulJsonConverter CONVERTER = new MercifulJsonConverter(true,"__"); | ||
|
@@ -99,4 +106,44 @@ public void conversionWithFieldNameAliases() throws IOException { | |
|
||
Assertions.assertEquals(rec, CONVERTER.convert(json, sanitizedSchema)); | ||
} | ||
|
||
@Test | ||
public void testConvertNumberToFixed() throws IOException { | ||
String testSchemaStr = "{\"type\": \"record\",\"name\": \"test_record\",\"namespace\": \"test_namespace\",\"fields\": " | ||
+ "[{\"name\": \"decimal_field\",\"type\": [\"null\",{\"type\": \"fixed\",\"name\": \"fixed\",\"namespace\": \"test_namespace.decimal_field\",\"size\": 9," | ||
+ " \"logicalType\": \"decimal\",\"precision\": 20,\"scale\": 0}],\"default\": null}," | ||
+ "{\"name\": \"decimal2_field\",\"type\": [\"null\",{\"type\": \"fixed\",\"name\": \"fixed\",\"namespace\": \"test_namespace.decimal2_field\",\"size\": 9," | ||
+ " \"logicalType\": \"decimal\",\"precision\": 20,\"scale\": 2}],\"default\": null}," | ||
+ "{\"name\": \"decimal3_field\",\"type\": [\"null\",{\"type\": \"fixed\",\"name\": \"fixed\",\"namespace\": \"test_namespace.decimal3_field\",\"size\": 9," | ||
+ " \"logicalType\": \"decimal\",\"precision\": 20,\"scale\": 2}],\"default\": null}," | ||
+ "{\"name\": \"int_field\",\"type\": [\"null\",\"int\"],\"default\": null}," | ||
+ "{\"name\": \"long_field\",\"type\": [\"null\",\"long\"],\"default\": null}," | ||
+ "{\"name\": \"string_field\",\"type\": [\"null\",\"string\"],\"default\": null}]}"; | ||
Schema schema = Schema.parse(testSchemaStr); | ||
|
||
String testValueStr = "{\n" | ||
+ " \"decimal_field\": 1720623716,\n" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the schema type is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because avro store Decimal type as Fixed type internally There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the decimal type is |
||
+ " \"decimal2_field\": 1720623716.23,\n" | ||
+ " \"decimal3_field\": [0, 0, 0, 0, 40, 15, -73, 111, 39],\n" | ||
+ " \"int_field\": 1720623716,\n" | ||
+ " \"long_field\": 1720623716,\n" | ||
+ " \"string_field\": \"STRING040467046577\"\n" | ||
+ "}"; | ||
|
||
GenericRecord record = CONVERTER.convert(testValueStr, schema); | ||
ObjectMapper objectMapper = new ObjectMapper(); | ||
JsonNode root = objectMapper.readTree(testValueStr); | ||
|
||
assertEquals(root.get("decimal_field").asLong(), convertFixedToDecimal((Fixed) record.get("decimal_field")).longValue()); | ||
assertEquals(root.get("decimal2_field").asDouble(), convertFixedToDecimal((Fixed) record.get("decimal2_field")).doubleValue()); | ||
|
||
Fixed testFixedValue = new Fixed(((Fixed) record.get("decimal3_field")).getSchema(), new byte[] {0, 0, 0, 0, 40, 15, -73, 111, 39}); | ||
assertEquals(1720623716.23, convertFixedToDecimal(testFixedValue).doubleValue()); | ||
assertEquals(testFixedValue, record.get("decimal3_field")); | ||
} | ||
|
||
private BigDecimal convertFixedToDecimal(GenericData.Fixed fixedRecord) { | ||
DecimalConversion decimalConversion = new Conversions.DecimalConversion(); | ||
return decimalConversion.fromFixed(fixedRecord, fixedRecord.getSchema(), fixedRecord.getSchema().getLogicalType()); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why number type will use Fixed type schema?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In my case, I dump mysql binlog to kafka via debezium.
There is a Decimal(20, 0) type field and a Decimal(20, 2) field in the mysql source table, when dumping to kafka, the 2 fields are both stored as number instead of list of Integer, eg 1234567 for Decimal(20, 0), 1234567.89 for Decimal(20, 2).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes, so I think you avro schema
decimal_field
should not befixed
, should be float or doubleThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, in my case, the decimal(20, 0) is for bigint and decimal(20, 2) for double, So the debezium decode as I expected.
And the hudi JdbcSchemaProvider convert the mysql table schema as expected to Decimal(20, 0) and Decimal(20, 2).
But when hudi try decoding the bigint and double field from kafka, it throw exception which is unexpected.
the source table schema like
tranlated avro schema like
The bigint(20) is tranlated to Decimal(20, 0), but the data in kafka is just long, not the list of Integer, so the json convert throw below exception.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
got it.