diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/DataGenerator.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/DataGenerator.java index 635cedd8fd6a0..1aec45fa3f287 100644 --- a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/DataGenerator.java +++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/DataGenerator.java @@ -22,7 +22,7 @@ public class DataGenerator { private final FieldDataGenerator topLevelGenerator; public DataGenerator(DataGeneratorSpecification specification) { - this.topLevelGenerator = new ObjectFieldDataGenerator(specification, 0); + this.topLevelGenerator = new ObjectFieldDataGenerator(specification); } public void writeMapping(XContentBuilder mapping) throws IOException { diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/DataGeneratorSpecification.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/DataGeneratorSpecification.java index a2eb62a38d2ba..4a0ed074b1411 100644 --- a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/DataGeneratorSpecification.java +++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/DataGeneratorSpecification.java @@ -13,13 +13,59 @@ /** * Allows configuring behavior of {@link DataGenerator}. + * @param arbitrary provides arbitrary values used during generation * @param maxFieldCountPerLevel maximum number of fields that an individual object in mapping has. * Applies to subobjects. * @param maxObjectDepth maximum depth of nested objects - * @param arbitrary provides arbitrary values used during generation + * @param nestedFieldsLimit how many total nested fields can be present in a produced mapping */ -public record DataGeneratorSpecification(int maxFieldCountPerLevel, int maxObjectDepth, Arbitrary arbitrary) { - public DataGeneratorSpecification() { - this(50, 3, new RandomBasedArbitrary()); +public record DataGeneratorSpecification(Arbitrary arbitrary, int maxFieldCountPerLevel, int maxObjectDepth, int nestedFieldsLimit) { + + public static Builder builder() { + return new Builder(); + } + + public static DataGeneratorSpecification buildDefault() { + return builder().build(); + } + + public static class Builder { + private Arbitrary arbitrary; + private int maxFieldCountPerLevel; + private int maxObjectDepth; + private int nestedFieldsLimit; + + public Builder() { + // Simply sufficiently big numbers to get some permutations + maxFieldCountPerLevel = 50; + maxObjectDepth = 3; + // Default value of index.mapping.nested_fields.limit + nestedFieldsLimit = 50; + arbitrary = new RandomBasedArbitrary(); + } + + public Builder withArbitrary(Arbitrary arbitrary) { + this.arbitrary = arbitrary; + return this; + } + + public Builder withMaxFieldCountPerLevel(int maxFieldCountPerLevel) { + this.maxFieldCountPerLevel = maxFieldCountPerLevel; + return this; + } + + public Builder withMaxObjectDepth(int maxObjectDepth) { + this.maxObjectDepth = maxObjectDepth; + return this; + } + + public Builder withNestedFieldsLimit(int nestedFieldsLimit) { + this.nestedFieldsLimit = nestedFieldsLimit; + return this; + } + + public DataGeneratorSpecification build() { + return new DataGeneratorSpecification(arbitrary, maxFieldCountPerLevel, maxObjectDepth, nestedFieldsLimit); + } } } diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/arbitrary/Arbitrary.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/arbitrary/Arbitrary.java index 7132a04e7fe40..139994d530f77 100644 --- a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/arbitrary/Arbitrary.java +++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/arbitrary/Arbitrary.java @@ -16,6 +16,8 @@ public interface Arbitrary { boolean generateSubObject(); + boolean generateNestedObject(); + int childFieldCount(int lowerBound, int upperBound); String fieldName(int lengthLowerBound, int lengthUpperBound); diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/arbitrary/RandomBasedArbitrary.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/arbitrary/RandomBasedArbitrary.java index 3506fdef797a4..71152191e27f9 100644 --- a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/arbitrary/RandomBasedArbitrary.java +++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/arbitrary/RandomBasedArbitrary.java @@ -23,6 +23,12 @@ public boolean generateSubObject() { return randomDouble() <= 0.1; } + @Override + public boolean generateNestedObject() { + // Using a static 10% change, this is just a chosen value that can be tweaked. + return randomDouble() <= 0.1; + } + @Override public int childFieldCount(int lowerBound, int upperBound) { return randomIntBetween(lowerBound, upperBound); diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/Context.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/Context.java new file mode 100644 index 0000000000000..b78e1e2dda0d4 --- /dev/null +++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/Context.java @@ -0,0 +1,49 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.logsdb.datageneration.fields; + +import org.elasticsearch.logsdb.datageneration.DataGeneratorSpecification; + +class Context { + private final DataGeneratorSpecification specification; + private final int objectDepth; + private final int nestedFieldsCount; + + Context(DataGeneratorSpecification specification) { + this(specification, 0, 0); + } + + private Context(DataGeneratorSpecification specification, int objectDepth, int nestedFieldsCount) { + this.specification = specification; + this.objectDepth = objectDepth; + this.nestedFieldsCount = nestedFieldsCount; + } + + public DataGeneratorSpecification specification() { + return specification; + } + + public Context subObject() { + return new Context(specification, objectDepth + 1, nestedFieldsCount); + } + + public Context nestedObject() { + return new Context(specification, objectDepth + 1, nestedFieldsCount + 1); + } + + public boolean shouldAddObjectField() { + return specification.arbitrary().generateSubObject() && objectDepth < specification.maxObjectDepth(); + } + + public boolean shouldAddNestedField() { + return specification.arbitrary().generateNestedObject() + && objectDepth < specification.maxObjectDepth() + && nestedFieldsCount < specification.nestedFieldsLimit(); + } +} diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/GenericSubObjectFieldDataGenerator.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/GenericSubObjectFieldDataGenerator.java new file mode 100644 index 0000000000000..cc1ae57b8996c --- /dev/null +++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/GenericSubObjectFieldDataGenerator.java @@ -0,0 +1,107 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.logsdb.datageneration.fields; + +import org.elasticsearch.core.CheckedConsumer; +import org.elasticsearch.logsdb.datageneration.FieldDataGenerator; +import org.elasticsearch.logsdb.datageneration.FieldType; +import org.elasticsearch.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +/** + * Generic generator for any type of object field (e.g. "object", "nested"). + */ +public class GenericSubObjectFieldDataGenerator { + private final Context context; + + private final List childFields; + + public GenericSubObjectFieldDataGenerator(Context context) { + this.context = context; + + childFields = new ArrayList<>(); + generateChildFields(); + } + + public CheckedConsumer mappingWriter( + CheckedConsumer customMappingParameters + ) { + return b -> { + b.startObject(); + customMappingParameters.accept(b); + + b.startObject("properties"); + for (var childField : childFields) { + b.field(childField.fieldName); + childField.generator.mappingWriter().accept(b); + } + b.endObject(); + + b.endObject(); + }; + } + + public CheckedConsumer fieldValueGenerator() { + return b -> { + b.startObject(); + + for (var childField : childFields) { + b.field(childField.fieldName); + childField.generator.fieldValueGenerator().accept(b); + } + + b.endObject(); + }; + } + + private void generateChildFields() { + var existingFields = new HashSet(); + // no child fields is legal + var childFieldsCount = context.specification().arbitrary().childFieldCount(0, context.specification().maxFieldCountPerLevel()); + + for (int i = 0; i < childFieldsCount; i++) { + var fieldName = generateFieldName(existingFields); + + if (context.shouldAddObjectField()) { + childFields.add(new ChildField(fieldName, new ObjectFieldDataGenerator(context.subObject()))); + } else if (context.shouldAddNestedField()) { + childFields.add(new ChildField(fieldName, new NestedFieldDataGenerator(context.nestedObject()))); + } else { + var fieldType = context.specification().arbitrary().fieldType(); + addLeafField(fieldType, fieldName); + } + } + } + + private void addLeafField(FieldType type, String fieldName) { + var generator = switch (type) { + case LONG -> new LongFieldDataGenerator(context.specification().arbitrary()); + case KEYWORD -> new KeywordFieldDataGenerator(context.specification().arbitrary()); + }; + + childFields.add(new ChildField(fieldName, generator)); + } + + private String generateFieldName(Set existingFields) { + var fieldName = context.specification().arbitrary().fieldName(1, 10); + while (existingFields.contains(fieldName)) { + fieldName = context.specification().arbitrary().fieldName(1, 10); + } + existingFields.add(fieldName); + + return fieldName; + } + + private record ChildField(String fieldName, FieldDataGenerator generator) {} +} diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/NestedFieldDataGenerator.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/NestedFieldDataGenerator.java new file mode 100644 index 0000000000000..acceb3aebe421 --- /dev/null +++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/NestedFieldDataGenerator.java @@ -0,0 +1,33 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.logsdb.datageneration.fields; + +import org.elasticsearch.core.CheckedConsumer; +import org.elasticsearch.logsdb.datageneration.FieldDataGenerator; +import org.elasticsearch.xcontent.XContentBuilder; + +import java.io.IOException; + +public class NestedFieldDataGenerator implements FieldDataGenerator { + private final GenericSubObjectFieldDataGenerator delegate; + + public NestedFieldDataGenerator(Context context) { + this.delegate = new GenericSubObjectFieldDataGenerator(context); + } + + @Override + public CheckedConsumer mappingWriter() { + return delegate.mappingWriter(b -> b.field("type", "nested")); + } + + @Override + public CheckedConsumer fieldValueGenerator() { + return delegate.fieldValueGenerator(); + } +} diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/ObjectFieldDataGenerator.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/ObjectFieldDataGenerator.java index c9afdac2484c3..8cbedefe14ae5 100644 --- a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/ObjectFieldDataGenerator.java +++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/ObjectFieldDataGenerator.java @@ -11,91 +11,28 @@ import org.elasticsearch.core.CheckedConsumer; import org.elasticsearch.logsdb.datageneration.DataGeneratorSpecification; import org.elasticsearch.logsdb.datageneration.FieldDataGenerator; -import org.elasticsearch.logsdb.datageneration.FieldType; import org.elasticsearch.xcontent.XContentBuilder; import java.io.IOException; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.List; -import java.util.Set; public class ObjectFieldDataGenerator implements FieldDataGenerator { - private final DataGeneratorSpecification specification; - private final int depth; + private final GenericSubObjectFieldDataGenerator delegate; - private final List childFields; + public ObjectFieldDataGenerator(DataGeneratorSpecification specification) { + this(new Context(specification)); + } - public ObjectFieldDataGenerator(DataGeneratorSpecification specification, int depth) { - this.specification = specification; - this.depth = depth; - this.childFields = new ArrayList<>(); - generateChildFields(); + ObjectFieldDataGenerator(Context context) { + this.delegate = new GenericSubObjectFieldDataGenerator(context); } @Override public CheckedConsumer mappingWriter() { - return b -> { - b.startObject().startObject("properties"); - - for (var childField : childFields) { - b.field(childField.fieldName); - childField.generator.mappingWriter().accept(b); - } - - b.endObject().endObject(); - }; + return delegate.mappingWriter(b -> {}); } @Override public CheckedConsumer fieldValueGenerator() { - return b -> { - b.startObject(); - - for (var childField : childFields) { - b.field(childField.fieldName); - childField.generator.fieldValueGenerator().accept(b); - } - - b.endObject(); - }; - } - - private void generateChildFields() { - var existingFields = new HashSet(); - // no child fields is legal - var childFieldsCount = specification.arbitrary().childFieldCount(0, specification.maxFieldCountPerLevel()); - - for (int i = 0; i < childFieldsCount; i++) { - var fieldName = generateFieldName(existingFields); - - if (specification.arbitrary().generateSubObject() && depth < specification.maxObjectDepth()) { - childFields.add(new ChildField(fieldName, new ObjectFieldDataGenerator(specification, depth + 1))); - } else { - var fieldType = specification.arbitrary().fieldType(); - addLeafField(fieldType, fieldName); - } - } - } - - private void addLeafField(FieldType type, String fieldName) { - var generator = switch (type) { - case LONG -> new LongFieldDataGenerator(specification.arbitrary()); - case KEYWORD -> new KeywordFieldDataGenerator(specification.arbitrary()); - }; - - childFields.add(new ChildField(fieldName, generator)); + return delegate.fieldValueGenerator(); } - - private String generateFieldName(Set existingFields) { - var fieldName = specification.arbitrary().fieldName(1, 10); - while (existingFields.contains(fieldName)) { - fieldName = specification.arbitrary().fieldName(1, 10); - } - existingFields.add(fieldName); - - return fieldName; - } - - private record ChildField(String fieldName, FieldDataGenerator generator) {} } diff --git a/test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/DataGeneratorSnapshotTests.java b/test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/DataGeneratorSnapshotTests.java index af2722a1db467..868c8c749ea11 100644 --- a/test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/DataGeneratorSnapshotTests.java +++ b/test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/DataGeneratorSnapshotTests.java @@ -16,7 +16,13 @@ public class DataGeneratorSnapshotTests extends ESTestCase { public void testSnapshot() throws Exception { - var dataGenerator = new DataGenerator(new DataGeneratorSpecification(5, 2, new TestArbitrary())); + var dataGenerator = new DataGenerator( + DataGeneratorSpecification.builder() + .withArbitrary(new TestArbitrary()) + .withMaxFieldCountPerLevel(5) + .withMaxObjectDepth(2) + .build() + ); var mapping = XContentBuilder.builder(XContentType.JSON.xContent()).prettyPrint(); dataGenerator.writeMapping(mapping); @@ -31,22 +37,45 @@ public void testSnapshot() throws Exception { "f1" : { "properties" : { "f2" : { - "type" : "keyword" - }, - "f3" : { "properties" : { + "f3" : { + "type" : "keyword" + }, "f4" : { "type" : "long" - }, - "f5" : { + } + } + }, + "f5" : { + "properties" : { + "f6" : { "type" : "keyword" + }, + "f7" : { + "type" : "long" } } } } }, - "f6" : { - "type" : "long" + "f8" : { + "type" : "nested", + "properties" : { + "f9" : { + "type" : "nested", + "properties" : { + "f10" : { + "type" : "keyword" + }, + "f11" : { + "type" : "long" + } + } + }, + "f12" : { + "type" : "keyword" + } + } } } } @@ -55,13 +84,22 @@ public void testSnapshot() throws Exception { var expectedDocument = """ { "f1" : { - "f2" : "string1", - "f3" : { - "f4" : 0, - "f5" : "string2" + "f2" : { + "f3" : "string1", + "f4" : 0 + }, + "f5" : { + "f6" : "string2", + "f7" : 1 } }, - "f6" : 1 + "f8" : { + "f9" : { + "f10" : "string3", + "f11" : 2 + }, + "f12" : "string4" + } }"""; assertEquals(expectedMapping, Strings.toString(mapping)); @@ -69,7 +107,6 @@ public void testSnapshot() throws Exception { } private class TestArbitrary implements Arbitrary { - private boolean generateSubObject = true; private int generatedFields = 0; private FieldType fieldType = FieldType.KEYWORD; private long longValue = 0; @@ -77,13 +114,12 @@ private class TestArbitrary implements Arbitrary { @Override public boolean generateSubObject() { - if (generateSubObject) { - generateSubObject = false; - return true; - } + return generatedFields < 6; + } - generateSubObject = true; - return false; + @Override + public boolean generateNestedObject() { + return generatedFields > 6 && generatedFields < 12; } @Override diff --git a/test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/DataGeneratorTests.java b/test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/DataGeneratorTests.java index d829ee1dc0b12..cd8b2424ac5ae 100644 --- a/test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/DataGeneratorTests.java +++ b/test/framework/src/test/java/org/elasticsearch/logsdb/datageneration/DataGeneratorTests.java @@ -12,7 +12,6 @@ import org.elasticsearch.index.mapper.MapperServiceTestCase; import org.elasticsearch.index.mapper.SourceToParse; import org.elasticsearch.logsdb.datageneration.arbitrary.Arbitrary; -import org.elasticsearch.logsdb.datageneration.arbitrary.RandomBasedArbitrary; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentType; @@ -21,7 +20,7 @@ public class DataGeneratorTests extends ESTestCase { public void testDataGeneratorSanity() throws IOException { - var dataGenerator = new DataGenerator(new DataGeneratorSpecification()); + var dataGenerator = new DataGenerator(DataGeneratorSpecification.buildDefault()); var mapping = XContentBuilder.builder(XContentType.JSON.xContent()); dataGenerator.writeMapping(mapping); @@ -33,8 +32,60 @@ public void testDataGeneratorSanity() throws IOException { } public void testDataGeneratorProducesValidMappingAndDocument() throws IOException { - // Let's keep number of fields under 1000 field limit - var dataGenerator = new DataGenerator(new DataGeneratorSpecification(10, 3, new RandomBasedArbitrary())); + // Make sure objects, nested objects and all field types are covered. + var testArbitrary = new Arbitrary() { + private boolean subObjectCovered = false; + private boolean nestedCovered = false; + private int generatedFields = 0; + + @Override + public boolean generateSubObject() { + if (subObjectCovered == false) { + subObjectCovered = true; + return true; + } + + return false; + } + + @Override + public boolean generateNestedObject() { + if (nestedCovered == false) { + nestedCovered = true; + return true; + } + + return false; + } + + @Override + public int childFieldCount(int lowerBound, int upperBound) { + // Make sure to generate enough fields to go through all field types. + return 20; + } + + @Override + public String fieldName(int lengthLowerBound, int lengthUpperBound) { + return "f" + generatedFields++; + } + + @Override + public FieldType fieldType() { + return FieldType.values()[generatedFields % FieldType.values().length]; + } + + @Override + public long longValue() { + return randomLong(); + } + + @Override + public String stringValue(int lengthLowerBound, int lengthUpperBound) { + return randomAlphaOfLengthBetween(lengthLowerBound, lengthUpperBound); + } + }; + + var dataGenerator = new DataGenerator(DataGeneratorSpecification.builder().withArbitrary(testArbitrary).build()); var mapping = XContentBuilder.builder(XContentType.JSON.xContent()); dataGenerator.writeMapping(mapping); @@ -49,7 +100,7 @@ public void testDataGeneratorProducesValidMappingAndDocument() throws IOExceptio } public void testDataGeneratorStressTest() throws IOException { - // Let's generate 1000000 fields to test an extreme case (2 levels of nested objects + 1 leaf level with 100 fields per object). + // Let's generate 1000000 fields to test an extreme case (2 levels of objects + 1 leaf level with 100 fields per object). var arbitrary = new Arbitrary() { private int generatedFields = 0; @@ -58,6 +109,11 @@ public boolean generateSubObject() { return true; } + @Override + public boolean generateNestedObject() { + return false; + } + @Override public int childFieldCount(int lowerBound, int upperBound) { return upperBound; @@ -83,7 +139,9 @@ public String stringValue(int lengthLowerBound, int lengthUpperBound) { return ""; } }; - var dataGenerator = new DataGenerator(new DataGeneratorSpecification(100, 2, arbitrary)); + var dataGenerator = new DataGenerator( + DataGeneratorSpecification.builder().withArbitrary(arbitrary).withMaxFieldCountPerLevel(100).withMaxObjectDepth(2).build() + ); var mapping = XContentBuilder.builder(XContentType.JSON.xContent()); dataGenerator.writeMapping(mapping);