Skip to content

Commit 6e9168b

Browse files
Test fixes
1 parent 735d3c1 commit 6e9168b

File tree

2 files changed

+83
-105
lines changed

2 files changed

+83
-105
lines changed

adapter/avro/src/test/java/org/apache/arrow/adapter/avro/ArrowToAvroDataTest.java

Lines changed: 1 addition & 105 deletions
Original file line numberDiff line numberDiff line change
@@ -2872,8 +2872,7 @@ public void testWriteDictEnumEncoded() throws Exception {
28722872
// Write an AVRO block using the producer classes
28732873
try (FileOutputStream fos = new FileOutputStream(dataFile)) {
28742874
BinaryEncoder encoder = new EncoderFactory().directBinaryEncoder(fos, null);
2875-
CompositeAvroProducer producer =
2876-
ArrowToAvroUtils.createCompositeProducer(vectors, dictionaries);
2875+
CompositeAvroProducer producer = ArrowToAvroUtils.createCompositeProducer(vectors, dictionaries);
28772876
for (int row = 0; row < rowCount; row++) {
28782877
producer.produce(encoder);
28792878
}
@@ -2898,107 +2897,4 @@ record = datumReader.read(record, decoder);
28982897
}
28992898
}
29002899
}
2901-
2902-
@Test
2903-
public void testWriteEnumDecoded() throws Exception {
2904-
2905-
// Dict encoded fields that are not valid Avro enums should be decoded on write
2906-
2907-
BufferAllocator allocator = new RootAllocator();
2908-
2909-
// Create a dictionary
2910-
FieldType dictionaryField = new FieldType(false, new ArrowType.Utf8(), null);
2911-
VarCharVector dictionaryVector =
2912-
new VarCharVector(new Field("dictionary", dictionaryField, null), allocator);
2913-
2914-
dictionaryVector.allocateNew(3);
2915-
dictionaryVector.set(0, "passion fruit".getBytes()); // spaced not allowed
2916-
dictionaryVector.set(1, "banana".getBytes());
2917-
dictionaryVector.set(2, "cherry".getBytes());
2918-
dictionaryVector.setValueCount(3);
2919-
2920-
Dictionary dictionary =
2921-
new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null));
2922-
2923-
FieldType dictionaryField2 = new FieldType(false, new ArrowType.Int(64, true), null);
2924-
BigIntVector dictionaryVector2 =
2925-
new BigIntVector(new Field("dictionary2", dictionaryField2, null), allocator);
2926-
2927-
dictionaryVector2.allocateNew(3);
2928-
dictionaryVector2.set(0, 0L);
2929-
dictionaryVector2.set(1, 1L);
2930-
dictionaryVector2.set(2, 2L);
2931-
dictionaryVector2.setValueCount(3);
2932-
2933-
Dictionary dictionary2 =
2934-
new Dictionary(dictionaryVector2, new DictionaryEncoding(2L, false, null));
2935-
2936-
DictionaryProvider dictionaries =
2937-
new DictionaryProvider.MapDictionaryProvider(dictionary, dictionary2);
2938-
2939-
// Field definition
2940-
FieldType stringField = new FieldType(false, new ArrowType.Utf8(), null);
2941-
VarCharVector stringVector =
2942-
new VarCharVector(new Field("enumField", stringField, null), allocator);
2943-
stringVector.allocateNew(10);
2944-
stringVector.setSafe(0, "passion fruit".getBytes());
2945-
stringVector.setSafe(1, "banana".getBytes());
2946-
stringVector.setSafe(2, "cherry".getBytes());
2947-
stringVector.setSafe(3, "cherry".getBytes());
2948-
stringVector.setSafe(4, "passion fruit".getBytes());
2949-
stringVector.setSafe(5, "banana".getBytes());
2950-
stringVector.setSafe(6, "passion fruit".getBytes());
2951-
stringVector.setSafe(7, "cherry".getBytes());
2952-
stringVector.setSafe(8, "banana".getBytes());
2953-
stringVector.setSafe(9, "passion fruit".getBytes());
2954-
stringVector.setValueCount(10);
2955-
2956-
FieldType longField = new FieldType(false, new ArrowType.Int(64, true), null);
2957-
BigIntVector longVector = new BigIntVector(new Field("enumField2", longField, null), allocator);
2958-
longVector.allocateNew(10);
2959-
for (int i = 0; i < 10; i++) {
2960-
longVector.setSafe(i, (long) i % 3);
2961-
}
2962-
longVector.setValueCount(10);
2963-
2964-
IntVector encodedVector = (IntVector) DictionaryEncoder.encode(stringVector, dictionary);
2965-
IntVector encodedVector2 = (IntVector) DictionaryEncoder.encode(longVector, dictionary2);
2966-
2967-
// Set up VSR
2968-
List<FieldVector> vectors = Arrays.asList(encodedVector, encodedVector2);
2969-
int rowCount = 10;
2970-
2971-
try (VectorSchemaRoot root = new VectorSchemaRoot(vectors)) {
2972-
2973-
File dataFile = new File(TMP, "testWriteEnumDecodedavro");
2974-
2975-
// Write an AVRO block using the producer classes
2976-
try (FileOutputStream fos = new FileOutputStream(dataFile)) {
2977-
BinaryEncoder encoder = new EncoderFactory().directBinaryEncoder(fos, null);
2978-
CompositeAvroProducer producer =
2979-
ArrowToAvroUtils.createCompositeProducer(vectors, dictionaries);
2980-
for (int row = 0; row < rowCount; row++) {
2981-
producer.produce(encoder);
2982-
}
2983-
encoder.flush();
2984-
}
2985-
2986-
// Set up reading the AVRO block as a GenericRecord
2987-
Schema schema = ArrowToAvroUtils.createAvroSchema(root.getSchema().getFields(), dictionaries);
2988-
GenericDatumReader<GenericRecord> datumReader = new GenericDatumReader<>(schema);
2989-
2990-
try (InputStream inputStream = new FileInputStream(dataFile)) {
2991-
2992-
BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(inputStream, null);
2993-
GenericRecord record = null;
2994-
2995-
// Read and check values
2996-
for (int row = 0; row < rowCount; row++) {
2997-
record = datumReader.read(record, decoder);
2998-
assertEquals(stringVector.getObject(row).toString(), record.get("enumField").toString());
2999-
assertEquals(longVector.getObject(row), record.get("enumField2"));
3000-
}
3001-
}
3002-
}
3003-
}
30042900
}

adapter/avro/src/test/java/org/apache/arrow/adapter/avro/ArrowToAvroSchemaTest.java

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,13 @@
1717
package org.apache.arrow.adapter.avro;
1818

1919
import static org.junit.jupiter.api.Assertions.assertEquals;
20+
import static org.junit.jupiter.api.Assertions.assertThrows;
2021

2122
import java.util.Arrays;
2223
import java.util.List;
2324
import org.apache.arrow.memory.BufferAllocator;
2425
import org.apache.arrow.memory.RootAllocator;
26+
import org.apache.arrow.vector.BigIntVector;
2527
import org.apache.arrow.vector.VarCharVector;
2628
import org.apache.arrow.vector.dictionary.Dictionary;
2729
import org.apache.arrow.vector.dictionary.DictionaryProvider;
@@ -1437,4 +1439,84 @@ public void testWriteDictEnumEncoded() {
14371439
assertEquals("banana", enumField.schema().getEnumSymbols().get(1));
14381440
assertEquals("cherry", enumField.schema().getEnumSymbols().get(2));
14391441
}
1442+
1443+
@Test
1444+
public void testWriteDictEnumInvalid() {
1445+
1446+
BufferAllocator allocator = new RootAllocator();
1447+
1448+
// Create a dictionary
1449+
FieldType dictionaryField = new FieldType(false, new ArrowType.Utf8(), null);
1450+
VarCharVector dictionaryVector =
1451+
new VarCharVector(new Field("dictionary", dictionaryField, null), allocator);
1452+
1453+
dictionaryVector.allocateNew(3);
1454+
dictionaryVector.set(0, "passion fruit".getBytes());
1455+
dictionaryVector.set(1, "banana".getBytes());
1456+
dictionaryVector.set(2, "cherry".getBytes());
1457+
dictionaryVector.setValueCount(3);
1458+
1459+
Dictionary dictionary =
1460+
new Dictionary(
1461+
dictionaryVector, new DictionaryEncoding(0L, false, new ArrowType.Int(8, true)));
1462+
DictionaryProvider dictionaries = new DictionaryProvider.MapDictionaryProvider(dictionary);
1463+
1464+
List<Field> fields =
1465+
Arrays.asList(
1466+
new Field(
1467+
"enumField",
1468+
new FieldType(false, new ArrowType.Int(8, true), dictionary.getEncoding(), null),
1469+
null));
1470+
1471+
// Dictionary field contains values that are not valid enums
1472+
// Should be decoded and output as a string field
1473+
1474+
Schema schema = ArrowToAvroUtils.createAvroSchema(fields, "TestRecord", null, dictionaries);
1475+
1476+
assertEquals(Schema.Type.RECORD, schema.getType());
1477+
assertEquals(1, schema.getFields().size());
1478+
1479+
Schema.Field enumField = schema.getField("enumField");
1480+
assertEquals(Schema.Type.STRING, enumField.schema().getType());
1481+
}
1482+
1483+
@Test
1484+
public void testWriteDictEnumInvalid2() {
1485+
1486+
BufferAllocator allocator = new RootAllocator();
1487+
1488+
// Create a dictionary
1489+
FieldType dictionaryField = new FieldType(false, new ArrowType.Int(64, true), null);
1490+
BigIntVector dictionaryVector =
1491+
new BigIntVector(new Field("dictionary", dictionaryField, null), allocator);
1492+
1493+
dictionaryVector.allocateNew(3);
1494+
dictionaryVector.set(0, 123L);
1495+
dictionaryVector.set(1, 456L);
1496+
dictionaryVector.set(2, 789L);
1497+
dictionaryVector.setValueCount(3);
1498+
1499+
Dictionary dictionary =
1500+
new Dictionary(
1501+
dictionaryVector, new DictionaryEncoding(0L, false, new ArrowType.Int(8, true)));
1502+
DictionaryProvider dictionaries = new DictionaryProvider.MapDictionaryProvider(dictionary);
1503+
1504+
List<Field> fields =
1505+
Arrays.asList(
1506+
new Field(
1507+
"enumField",
1508+
new FieldType(false, new ArrowType.Int(8, true), dictionary.getEncoding(), null),
1509+
null));
1510+
1511+
// Dictionary field encodes LONG values rather than STRING
1512+
// Should be doecded and output as a LONG field
1513+
1514+
Schema schema = ArrowToAvroUtils.createAvroSchema(fields, "TestRecord", null, dictionaries);
1515+
1516+
assertEquals(Schema.Type.RECORD, schema.getType());
1517+
assertEquals(1, schema.getFields().size());
1518+
1519+
Schema.Field enumField = schema.getField("enumField");
1520+
assertEquals(Schema.Type.LONG, enumField.schema().getType());
1521+
}
14401522
}

0 commit comments

Comments
 (0)