Skip to content
78 changes: 72 additions & 6 deletions data/src/test/java/org/apache/iceberg/data/RandomGenericData.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,10 @@
import java.time.LocalTime;
import java.time.OffsetDateTime;
import java.time.ZoneOffset;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Random;
import java.util.Set;
import java.util.UUID;
Expand All @@ -46,13 +48,37 @@ public class RandomGenericData {
private RandomGenericData() {}

public static List<Record> generate(Schema schema, int numRecords, long seed) {
RandomRecordGenerator generator = new RandomRecordGenerator(seed);
List<Record> records = Lists.newArrayListWithExpectedSize(numRecords);
for (int i = 0; i < numRecords; i += 1) {
records.add((Record) TypeUtil.visit(schema, generator));
}
return Lists.newArrayList(generateIcebergGenerics(schema, numRecords, () -> new RandomRecordGenerator(seed)));
}

return records;
public static Iterable<Record> generateFallbackRecords(Schema schema, int numRecords, long seed, long numDictRows) {
return generateIcebergGenerics(schema, numRecords, () -> new FallbackGenerator(seed, numDictRows));
}

public static Iterable<Record> generateDictionaryEncodableRecords(Schema schema, int numRecords, long seed) {
return generateIcebergGenerics(schema, numRecords, () -> new DictionaryEncodedGenerator(seed));
}

private static Iterable<Record> generateIcebergGenerics(Schema schema, int numRecords,
Supplier<RandomDataGenerator<Record>> supplier) {
return () -> new Iterator<Record>() {
private final RandomDataGenerator<Record> generator = supplier.get();
private int count = 0;

@Override
public boolean hasNext() {
return count < numRecords;
}

@Override
public Record next() {
if (!hasNext()) {
throw new NoSuchElementException();
}
++count;
return (Record) TypeUtil.visit(schema, generator);
}
};
}

private static class RandomRecordGenerator extends RandomDataGenerator<Record> {
Expand All @@ -78,6 +104,46 @@ public Record struct(Types.StructType struct, Iterable<Object> fieldResults) {
}
}

private static class DictionaryEncodedGenerator extends RandomRecordGenerator {
DictionaryEncodedGenerator(long seed) {
super(seed);
}

@Override
protected int getMaxEntries() {
// Here we limited the max entries in LIST or MAP to be 3, because we have the mechanism to duplicate
// the keys in RandomDataGenerator#map while the dictionary encoder will generate a string with
// limited values("0","1","2"). It's impossible for us to request the generator to generate more than 3 keys,
// otherwise we will get in a infinite loop in RandomDataGenerator#map.
return 3;
}

@Override
protected Object randomValue(Type.PrimitiveType primitive, Random random) {
return RandomUtil.generateDictionaryEncodablePrimitive(primitive, random);
}
}

private static class FallbackGenerator extends RandomRecordGenerator {
private final long dictionaryEncodedRows;
private long rowCount = 0;

FallbackGenerator(long seed, long numDictionaryEncoded) {
super(seed);
this.dictionaryEncodedRows = numDictionaryEncoded;
}

@Override
protected Object randomValue(Type.PrimitiveType primitive, Random rand) {
this.rowCount += 1;
if (rowCount > dictionaryEncodedRows) {
return RandomUtil.generatePrimitive(primitive, rand);
} else {
return RandomUtil.generateDictionaryEncodablePrimitive(primitive, rand);
}
}
}

public abstract static class RandomDataGenerator<T> extends TypeUtil.CustomOrderSchemaVisitor<Object> {
private final Random random;
private static final int MAX_ENTRIES = 20;
Expand Down
Loading