From b69f32c6fc33f204825d18168cb15017da77be72 Mon Sep 17 00:00:00 2001 From: Jasper Potts <1466205+jasperpotts@users.noreply.github.com> Date: Fri, 23 May 2025 16:57:51 -0700 Subject: [PATCH 1/5] A POC for better JSON performance Signed-off-by: Jasper Potts <1466205+jasperpotts@users.noreply.github.com> --- .../impl/generators/TestGenerator.java | 11 +- .../generators/json/JsonCodecGenerator.java | 1 + .../json/JsonCodecParseMethodGenerator.java | 180 ++--- .../json/JsonCodecWriteMethodGenerator.java | 100 +-- .../com/hedera/pbj/runtime/JsonCodec.java | 45 +- .../com/hedera/pbj/runtime/JsonTools.java | 427 ++++-------- .../runtime/io/WritableSequentialData.java | 20 +- .../io/buffer/ByteArrayBufferedData.java | 51 ++ .../hedera/pbj/runtime/json/JsonLexer.java | 635 ++++++++++++++++++ .../CharBufferToWritableSequentialData.java | 2 +- .../src/main/java/module-info.java | 1 + .../io/buffer/BufferedDataTestBase.java | 8 +- .../pbj/runtime/json/JsonLexerTest.java | 355 ++++++++++ pbj-integration-tests/build.gradle.kts | 10 +- .../integration/jmh/BufferedDataBench.java | 71 ++ .../hedera/pbj/integration/jmh/JsonBench.java | 20 +- 16 files changed, 1482 insertions(+), 455 deletions(-) create mode 100644 pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/json/JsonLexer.java create mode 100644 pbj-core/pbj-runtime/src/test/java/com/hedera/pbj/runtime/json/JsonLexerTest.java create mode 100644 pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/BufferedDataBench.java diff --git a/pbj-core/pbj-compiler/src/main/java/com/hedera/pbj/compiler/impl/generators/TestGenerator.java b/pbj-core/pbj-compiler/src/main/java/com/hedera/pbj/compiler/impl/generators/TestGenerator.java index 737f35b8..16ad7775 100644 --- a/pbj-core/pbj-compiler/src/main/java/com/hedera/pbj/compiler/impl/generators/TestGenerator.java +++ b/pbj-core/pbj-compiler/src/main/java/com/hedera/pbj/compiler/impl/generators/TestGenerator.java @@ -386,8 +386,17 @@ private static String generateTestMethod(final String modelClassName, final Stri assertEquals(charBuffer2, charBuffer); // Test JSON Reading - final $modelClassName jsonReadPbj = $modelClassName.JSON.parse(JsonTools.parseJson(charBuffer), false, Integer.MAX_VALUE); + String jsonString = charBuffer.toString(); + System.out.println("jsonString: " + jsonString); + System.out.flush(); + try{ + final $modelClassName jsonReadPbj = $modelClassName.JSON.parse(BufferedData.wrap(jsonString.getBytes( + StandardCharsets.UTF_8)), false, Integer.MAX_VALUE); assertEquals(modelObj, jsonReadPbj); + } catch (Exception e) { + System.err.println("JSON read: " + jsonString); + throw e; + } } @SuppressWarnings("EqualsWithItself") diff --git a/pbj-core/pbj-compiler/src/main/java/com/hedera/pbj/compiler/impl/generators/json/JsonCodecGenerator.java b/pbj-core/pbj-compiler/src/main/java/com/hedera/pbj/compiler/impl/generators/json/JsonCodecGenerator.java index 2e6d5d7d..15518651 100644 --- a/pbj-core/pbj-compiler/src/main/java/com/hedera/pbj/compiler/impl/generators/json/JsonCodecGenerator.java +++ b/pbj-core/pbj-compiler/src/main/java/com/hedera/pbj/compiler/impl/generators/json/JsonCodecGenerator.java @@ -60,6 +60,7 @@ public void generate( writer.addImport("com.hedera.pbj.runtime.*"); writer.addImport("com.hedera.pbj.runtime.io.*"); writer.addImport("com.hedera.pbj.runtime.io.buffer.*"); + writer.addImport("com.hedera.pbj.runtime.json.JsonLexer"); writer.addImport("java.io.IOException"); writer.addImport("java.nio.*"); writer.addImport("java.nio.charset.*"); diff --git a/pbj-core/pbj-compiler/src/main/java/com/hedera/pbj/compiler/impl/generators/json/JsonCodecParseMethodGenerator.java b/pbj-core/pbj-compiler/src/main/java/com/hedera/pbj/compiler/impl/generators/json/JsonCodecParseMethodGenerator.java index a545a1db..25e0acd2 100644 --- a/pbj-core/pbj-compiler/src/main/java/com/hedera/pbj/compiler/impl/generators/json/JsonCodecParseMethodGenerator.java +++ b/pbj-core/pbj-compiler/src/main/java/com/hedera/pbj/compiler/impl/generators/json/JsonCodecParseMethodGenerator.java @@ -45,41 +45,42 @@ static String generateUnsetOneOfConstants(final List fields) { static String generateParseObjectMethod(final String modelClassName, final List fields) { return """ - /** - * Parses a HashObject object from JSON parse tree for object JSONParser.ObjContext. - * Throws an UnknownFieldException wrapped in a ParseException if in strict mode ONLY. - * - * @param root The JSON parsed object tree to parse data from - * @return Parsed HashObject model object or null if data input was null or empty - * @throws ParseException If parsing fails - */ + /** {@inheritDoc} */ public @NonNull $modelClassName parse( - @Nullable final JSONParser.ObjContext root, + @Nullable final JsonLexer lexer, final boolean strictMode, - final int maxDepth) throws ParseException { - if (maxDepth < 0) { - throw new ParseException("Reached maximum allowed depth of nested messages"); - } + final boolean parseUnknownFields, + final int maxDepth) + throws ParseException { try { // -- TEMP STATE FIELDS -------------------------------------- $fieldDefs - - // -- EXTRACT VALUES FROM PARSE TREE --------------------------------------------- - - for (JSONParser.PairContext kvPair : root.pair()) { - switch (kvPair.STRING().getText()) { - $caseStatements + // start parsing + lexer.openObject(); + boolean isFirst = true; + while(true) { + if (isFirst) { + isFirst = false; + } else if(!lexer.nextFieldOrClose()){ + break; + } + // read field name and colon + final String fieldName = lexer.readString(); + if (fieldName == null) break;// there are no fields or no more fields + lexer.consumeColon(); + // read and handle field value + switch (fieldName) { + $caseStatements default: { if (strictMode) { // Since we are parsing is strict mode, this is an exceptional condition. - throw new UnknownFieldException(kvPair.STRING().getText()); + throw new UnknownFieldException(fieldName); } } } } - return new $modelClassName($fieldsList); - } catch (Exception ex) { + } catch (IOException ex) { throw new ParseException(ex); } } @@ -94,7 +95,7 @@ static String generateParseObjectMethod(final String modelClassName, final List< .replace( "$fieldsList", fields.stream().map(field -> "temp_" + field.name()).collect(Collectors.joining(", "))) - .replace("$caseStatements", generateCaseStatements(fields)) + .replace("$caseStatements", generateCaseStatements(fields).indent(DEFAULT_INDENT * 4)) .indent(DEFAULT_INDENT); } @@ -108,98 +109,103 @@ static String generateParseObjectMethod(final String modelClassName, final List< private static String generateCaseStatements(final List fields) { StringBuilder sb = new StringBuilder(); for (Field field : fields) { - if (field instanceof final OneOfField oneOfField) { + if (field.repeated()) { + sb.append("case \"" + toJsonFieldName(field.name()) + "\" /* [" + field.fieldNumber() + "] */ : {\n"); + sb.append(""" + lexer.openArray(); + boolean isFirst2 = true; + temp_$fieldName = new ArrayList<>(); + while(true) { + if (isFirst2) { + isFirst2 = false; + } else if(!lexer.nextFieldOrClose()){ + break; + } + // read value + temp_$fieldName.add($fieldValueCode); + } + break; + } + """.replace("$fieldName", field.name()) + .replace("$fieldValueCode", generateFieldCaseStatement(field))); + } else if (field instanceof final OneOfField oneOfField) { for (final Field subField : oneOfField.fields()) { sb.append("case \"" + toJsonFieldName(subField.name()) + "\" /* [" + subField.fieldNumber() + "] */ " + ": temp_" + oneOfField.name() + " = new %s<>(\n".formatted(oneOfField.className()) + oneOfField.getEnumClassRef().indent(DEFAULT_INDENT) + "." + Common.camelToUpperSnake(subField.name()) + ", \n".indent(DEFAULT_INDENT)); - generateFieldCaseStatement(sb, subField, "kvPair.value()"); + sb.append(generateFieldCaseStatement(subField)); sb.append("); break;\n"); } + } else if (field.type() == Field.FieldType.MAP) { + final MapField mapField = (MapField) field; + sb.append("case \"" + toJsonFieldName(field.name()) + "\" /* [" + field.fieldNumber() + "] */ : {\n"); + sb.append(""" + lexer.openObject(); + boolean isFirst2 = true; + temp_$fieldName = new HashMap<>(); + while(true) { + if (isFirst2) { + isFirst2 = false; + } else if(!lexer.nextFieldOrClose()){ + break; + } + // read value + var key = $fieldKeyCode; + lexer.consumeColon(); + var value = $fieldValueCode; + temp_$fieldName.put(key, value); + } + break; + } + """.replace("$fieldName", field.name()) + .replace("$fieldKeyCode", generateFieldCaseStatement(mapField.keyField())) + .replace("$fieldValueCode", generateFieldCaseStatement(mapField.valueField()))); } else { sb.append("case \"" + toJsonFieldName(field.name()) + "\" /* [" + field.fieldNumber() + "] */ " + ": temp_" + field.name() + " = "); - generateFieldCaseStatement(sb, field, "kvPair.value()"); + sb.append(generateFieldCaseStatement(field)); sb.append("; break;\n"); } } - return sb.toString(); + return sb.toString().indent(DEFAULT_INDENT*2); } /** * Generate switch case statement for a field. * - * @param field field to generate case statement for - * @param origSB StringBuilder to append code to - * @param valueGetter normally a "kvPair.value()", but may be different e.g. for maps parsing + * @param field field to generate case statement for + * @return string of case statement code */ - private static void generateFieldCaseStatement( - final StringBuilder origSB, final Field field, final String valueGetter) { + private static String generateFieldCaseStatement(final Field field) { final StringBuilder sb = new StringBuilder(); - if (field.repeated()) { - if (field.type() == Field.FieldType.MESSAGE) { - sb.append("parseObjArray($valueGetter.arr(), " + field.messageType() + ".JSON, maxDepth - 1)"); - } else { - sb.append("$valueGetter.arr().value().stream().map(v -> "); - switch (field.type()) { - case ENUM -> sb.append(field.messageType() + ".fromString(v.STRING().getText())"); - case INT32, UINT32, SINT32, FIXED32, SFIXED32 -> sb.append("parseInteger(v)"); - case INT64, UINT64, SINT64, FIXED64, SFIXED64 -> sb.append("parseLong(v)"); - case FLOAT -> sb.append("parseFloat(v)"); - case DOUBLE -> sb.append("parseDouble(v)"); - case STRING -> sb.append("unescape(v.STRING().getText())"); - case BOOL -> sb.append("parseBoolean(v)"); - case BYTES -> sb.append("Bytes.fromBase64(v.STRING().getText())"); - default -> throw new RuntimeException("Unknown field type [" + field.type() + "]"); - } - sb.append(").toList()"); - } - } else if (field.optionalValueType()) { + if (field.optionalValueType()) { switch (field.messageType()) { - case "Int32Value", "UInt32Value" -> sb.append("parseInteger($valueGetter)"); - case "Int64Value", "UInt64Value" -> sb.append("parseLong($valueGetter)"); - case "FloatValue" -> sb.append("parseFloat($valueGetter)"); - case "DoubleValue" -> sb.append("parseDouble($valueGetter)"); - case "StringValue" -> sb.append("unescape($valueGetter.STRING().getText())"); - case "BoolValue" -> sb.append("parseBoolean($valueGetter)"); - case "BytesValue" -> sb.append("Bytes.fromBase64($valueGetter.STRING().getText())"); + case "Int32Value", "UInt32Value" -> sb.append("(int)lexer.readSignedInteger()"); + case "Int64Value", "UInt64Value" -> sb.append("lexer.readSignedInteger()"); + case "FloatValue" -> sb.append("(float)lexer.readDouble()"); + case "DoubleValue" -> sb.append("lexer.readDouble()"); + case "StringValue" -> sb.append("lexer.readString()"); + case "BoolValue" -> sb.append("lexer.readBoolean()"); + case "BytesValue" -> sb.append("lexer.readBytes()"); default -> throw new RuntimeException("Unknown message type [" + field.messageType() + "]"); } - } else if (field.type() == Field.FieldType.MAP) { - final MapField mapField = (MapField) field; - - final StringBuilder keySB = new StringBuilder(); - final StringBuilder valueSB = new StringBuilder(); - - generateFieldCaseStatement(keySB, mapField.keyField(), "mapKV"); - generateFieldCaseStatement(valueSB, mapField.valueField(), "mapKV.value()"); - - sb.append( - """ - $valueGetter.getChild(JSONParser.ObjContext.class, 0).pair().stream() - .collect(Collectors.toMap( - mapKV -> $mapEntryKey, - new UncheckedThrowingFunction<>(mapKV -> $mapEntryValue) - ))""" - .replace("$mapEntryKey", keySB.toString()) - .replace("$mapEntryValue", valueSB.toString())); } else { switch (field.type()) { - case MESSAGE -> sb.append(field.javaFieldType() - + ".JSON.parse($valueGetter.getChild(JSONParser.ObjContext.class, 0), false, maxDepth - 1)"); - case ENUM -> sb.append(field.javaFieldType() + ".fromString($valueGetter.STRING().getText())"); - case INT32, UINT32, SINT32, FIXED32, SFIXED32 -> sb.append("parseInteger($valueGetter)"); - case INT64, UINT64, SINT64, FIXED64, SFIXED64 -> sb.append("parseLong($valueGetter)"); - case FLOAT -> sb.append("parseFloat($valueGetter)"); - case DOUBLE -> sb.append("parseDouble($valueGetter)"); - case STRING -> sb.append("unescape($valueGetter.STRING().getText())"); - case BOOL -> sb.append("parseBoolean($valueGetter)"); - case BYTES -> sb.append("Bytes.fromBase64($valueGetter.STRING().getText())"); + case MESSAGE -> sb.append(field.javaFieldTypeBase() + + ".JSON.parse(lexer, strictMode, parseUnknownFields, maxDepth - 1)"); + case ENUM -> sb.append("lexer.readEnum("+field.javaFieldTypeBase()+".class)"); + case INT32, UINT32, SINT32, FIXED32, SFIXED32 -> sb.append("(int)lexer.readSignedInteger()"); + case INT64, UINT64, SINT64, FIXED64, SFIXED64 -> sb.append("lexer.readSignedInteger()"); + case FLOAT -> sb.append("(float)lexer.readDouble()"); + case DOUBLE -> sb.append("lexer.readDouble()"); + case STRING -> sb.append("lexer.readString()"); + case BOOL -> sb.append("lexer.readBoolean()"); + case BYTES -> sb.append("lexer.readBytes()"); default -> throw new RuntimeException("Unknown field type [" + field.type() + "]"); } } - origSB.append(sb.toString().replace("$valueGetter", valueGetter)); + return sb.toString(); } } diff --git a/pbj-core/pbj-compiler/src/main/java/com/hedera/pbj/compiler/impl/generators/json/JsonCodecWriteMethodGenerator.java b/pbj-core/pbj-compiler/src/main/java/com/hedera/pbj/compiler/impl/generators/json/JsonCodecWriteMethodGenerator.java index a90a126a..62a386fc 100644 --- a/pbj-core/pbj-compiler/src/main/java/com/hedera/pbj/compiler/impl/generators/json/JsonCodecWriteMethodGenerator.java +++ b/pbj-core/pbj-compiler/src/main/java/com/hedera/pbj/compiler/impl/generators/json/JsonCodecWriteMethodGenerator.java @@ -18,7 +18,7 @@ /** * Code to generate the write method for Codec classes. */ -@SuppressWarnings("SwitchStatementWithTooFewBranches") +@SuppressWarnings({"SwitchStatementWithTooFewBranches", "StringConcatenationInsideStringBufferAppend"}) final class JsonCodecWriteMethodGenerator { static String generateWriteMethod(final String modelClassName, final List fields) { @@ -33,34 +33,34 @@ static String generateWriteMethod(final String modelClassName, final List field, modelClassName, "data.%s()".formatted(field.nameCamelFirstLower()))) .collect(Collectors.joining("\n")) .indent(DEFAULT_INDENT); - return """ /** - * Returns JSON string representing an item. - * - * @param data The item to convert. Must not be null. - * @param indent The indent to use for pretty printing - * @param inline When true the output will start with indent end with a new line otherwise - * it will just be the object "{...}" + * {@inheritDoc} */ @Override - public String toJSON(@NonNull $modelClass data, String indent, boolean inline) { - StringBuilder sb = new StringBuilder(); + public void write(@NonNull $modelClass data, @NonNull WritableSequentialData out, int initialIndent, int indentStep, boolean inline) { + final byte[] indentBytes = new byte[initialIndent]; + final byte[] childIndentBytes = new byte[initialIndent+indentStep]; + Arrays.fill(indentBytes, SPACE); + Arrays.fill(childIndentBytes, SPACE); // start - sb.append(inline ? "{\\n" : indent + "{\\n"); - final String childIndent = indent + INDENT; - // collect field lines - final List fieldLines = new ArrayList<>(); - $fieldWriteLines - // write field lines - if (!fieldLines.isEmpty()){ - sb.append(childIndent); - sb.append(String.join(",\\n"+childIndent, fieldLines)); - sb.append("\\n"); + if (inline) { + out.writeByte(OPEN_OBJECT); + } else { + out.writeBytes(indentBytes); + out.writeByte2(OPEN_OBJECT, NL); } + // write field lines + boolean isFirstField = true; + $fieldWriteLines // end - sb.append(indent + "}"); - return sb.toString(); + if (inline) { + out.writeByte(CLOSE_OBJECT); + } else { + out.writeByte(NL); + out.writeBytes(indentBytes); + out.writeByte(CLOSE_OBJECT); + } } """ .replace("$modelClass", modelClassName) @@ -79,33 +79,35 @@ public String toJSON(@NonNull $modelClass data, String indent, boolean inline) { private static String generateFieldWriteLines(final Field field, final String modelClassName, String getValueCode) { final String fieldDef = Common.camelToUpperSnake(field.name()); final String fieldName = '\"' + toJsonFieldName(field.name()) + '\"'; - final String basicFieldCode = generateBasicFieldLines(field, getValueCode, fieldDef, fieldName, "childIndent"); - String prefix = "// [" + field.fieldNumber() + "] - " + field.name() + "\n"; + final String basicFieldCode = generateBasicFieldLines(field, getValueCode, fieldDef, fieldName, "initialIndent+indentStep+indentStep"); // todo replace indent*2 with childIndentBytes + StringBuilder sb = new StringBuilder(); + sb.append("// [" + field.fieldNumber() + "] - " + field.name() + "\n"); if (field.parent() != null) { final OneOfField oneOfField = field.parent(); final String oneOfType = modelClassName + "." + oneOfField.nameCamelFirstUpper() + "OneOfType"; - prefix += "if (data." + oneOfField.nameCamelFirstLower() + "().kind() == " + oneOfType + "." - + Common.camelToUpperSnake(field.name()) + ")"; - prefix += "\n"; - return prefix + "fieldLines.add(" + basicFieldCode + ");"; + sb.append( "if (data." + oneOfField.nameCamelFirstLower() + "().kind() == " + oneOfType + "." + + Common.camelToUpperSnake(field.name()) + ") {\n"); } else { if (field.repeated()) { - return prefix + "if (!data." + field.nameCamelFirstLower() + "().isEmpty()) fieldLines.add(" - + basicFieldCode + ");"; + sb.append("if (!data." + field.nameCamelFirstLower() + "().isEmpty()) {\n"); } else if (field.type() == Field.FieldType.BYTES) { - return prefix + "if (data." + field.nameCamelFirstLower() + "() != " + field.javaDefault() + " && data." + sb.append("if (data." + field.nameCamelFirstLower() + "() != " + field.javaDefault() + " && data." + field.nameCamelFirstLower() + "() != null" + " && data." - + field.nameCamelFirstLower() + "().length() > 0) fieldLines.add(" + basicFieldCode + ");"; + + field.nameCamelFirstLower() + "().length() > 0) {\n"); } else if (field.type() == Field.FieldType.MAP) { - return prefix + "if (data." + field.nameCamelFirstLower() + "() != " + field.javaDefault() - + " && !data." + field.nameCamelFirstLower() + "().isEmpty()) fieldLines.add(" + basicFieldCode - + ");"; + sb.append("if (data." + field.nameCamelFirstLower() + "() != " + field.javaDefault() + + " && !data." + field.nameCamelFirstLower() + "().isEmpty()) {\n"); } else { - return prefix + "if (data." + field.nameCamelFirstLower() + "() != " + field.javaDefault() - + ") fieldLines.add(" + basicFieldCode + ");"; + sb.append("if (data." + field.nameCamelFirstLower() + "() != " + field.javaDefault() + + ") {\n"); } } + sb.append(" if (isFirstField) { isFirstField = false; } else { out.writeByte2(COMMA, NL); }\n"); + sb.append(" out.writeBytes(childIndentBytes);\n"); + sb.append(" "+ basicFieldCode + ";\n"); + sb.append("}"); + return sb.toString(); } @NonNull @@ -119,14 +121,15 @@ private static String generateBasicFieldLines( "UInt32Value", "FloatValue", "DoubleValue", - "BytesValue" -> "field(%s, %s)".formatted(fieldName, getValueCode); - case "Int64Value", "UInt64Value" -> "field(%s, %s, true)".formatted(fieldName, getValueCode); + "BytesValue" -> "field(out, %s, %s)".formatted(fieldName, getValueCode); + case "Int64Value", "UInt64Value" -> "field(out, %s, %s, true)".formatted(fieldName, getValueCode); default -> throw new UnsupportedOperationException( "Unhandled optional message type:" + field.messageType()); }; } else if (field.repeated()) { return switch (field.type()) { - case MESSAGE -> "arrayField(childIndent, $fieldName, $codec, $valueCode)" + case MESSAGE -> "arrayField(out, $indent, $fieldName, $codec, $valueCode)" + .replace("$indent", childIndent) .replace("$fieldName", fieldName) .replace("$fieldDef", fieldDef) .replace("$valueCode", getValueCode) @@ -134,7 +137,7 @@ private static String generateBasicFieldLines( "$codec", ((SingleField) field).messageTypeModelPackage() + "." + ((SingleField) field).completeClassName() + ".JSON"); - default -> "arrayField($fieldName, $fieldDef, $valueCode)" + default -> "arrayField(out, $fieldName, $fieldDef, $valueCode)" .replace("$fieldName", fieldName) .replace("$fieldDef", fieldDef) .replace("$valueCode", getValueCode); @@ -146,21 +149,22 @@ private static String generateBasicFieldLines( "v", Common.camelToUpperSnake(mapField.valueField().name()), "n", - "indent"); - return "field(%s, %s, $kEncoder, $vComposer)" + childIndent); +// return "field(out, %s, %s, $kEncoder, $vComposer)" + return "field(out, %s, %s, $vComposer)" .formatted(fieldName, getValueCode) // Maps in protobuf can only have simple scalar and not floating keys, so toString should do a good // job. // Also see https://protobuf.dev/programming-guides/proto3/#json - .replace("$kEncoder", "k -> escape(k.toString())") - .replace("$vComposer", "(n, v) -> " + vComposerMethod); +// .replace("$kEncoder", "k -> escape(k.toString())") + .replace("$vComposer", "(o, n, v) -> " + vComposerMethod.replaceAll("out","o")); } else { return switch (field.type()) { - case ENUM -> "field($fieldName, $valueCode.protoName())" + case ENUM -> "field(out, $fieldName, $valueCode.protoName())" .replace("$fieldName", fieldName) .replace("$fieldDef", fieldDef) .replace("$valueCode", getValueCode); - case MESSAGE -> "field($childIndent, $fieldName, $codec, $valueCode)" + case MESSAGE -> "field(out, $childIndent, $fieldName, $codec, $valueCode)" .replace("$childIndent", childIndent) .replace("$fieldName", fieldName) .replace("$fieldDef", fieldDef) @@ -169,7 +173,7 @@ private static String generateBasicFieldLines( "$codec", ((SingleField) field).messageTypeModelPackage() + "." + ((SingleField) field).completeClassName() + ".JSON"); - default -> "field(%s, %s)".formatted(fieldName, getValueCode); + default -> "field(out, %s, %s)".formatted(fieldName, getValueCode); }; } } diff --git a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/JsonCodec.java b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/JsonCodec.java index 937c56b2..99f9d3be 100644 --- a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/JsonCodec.java +++ b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/JsonCodec.java @@ -4,11 +4,12 @@ import com.hedera.pbj.runtime.io.ReadableSequentialData; import com.hedera.pbj.runtime.io.WritableSequentialData; import com.hedera.pbj.runtime.io.stream.WritableStreamingData; -import com.hedera.pbj.runtime.jsonparser.JSONParser; +import com.hedera.pbj.runtime.json.JsonLexer; import edu.umd.cs.findbugs.annotations.NonNull; import edu.umd.cs.findbugs.annotations.Nullable; import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.util.Objects; /** @@ -28,22 +29,25 @@ public interface JsonCodec extends Codec { final boolean parseUnknownFields, final int maxDepth) throws ParseException { - try { - return parse(JsonTools.parseJson(input), strictMode, maxDepth); - } catch (IOException ex) { - throw new ParseException(ex); - } + return parse(new JsonLexer(input), strictMode, parseUnknownFields, maxDepth); } /** * Parses a HashObject object from JSON parse tree for object JSONParser.ObjContext. Throws if in strict mode ONLY. * - * @param root The JSON parsed object tree to parse data from + * @param lexer The JSON lexer to parse with + * @param strictMode when {@code true}, the parser errors out on unknown fields; otherwise they'll be simply skipped. + * @param parseUnknownFields when {@code true} and strictMode is {@code false}, the parser will collect unknown + * fields in the unknownFields list in the model; otherwise they'll be simply skipped. + * @param maxDepth a ParseException will be thrown if the depth of nested messages exceeds the maxDepth value. * @return Parsed HashObject model object or null if data input was null or empty * @throws ParseException If parsing fails */ @NonNull - T parse(@Nullable final JSONParser.ObjContext root, final boolean strictMode, final int maxDepth) + T parse(@Nullable final JsonLexer lexer, + final boolean strictMode, + final boolean parseUnknownFields, + final int maxDepth) throws ParseException; /** @@ -54,9 +58,21 @@ T parse(@Nullable final JSONParser.ObjContext root, final boolean strictMode, fi * @throws IOException If the {@link WritableSequentialData} cannot be written to. */ default void write(@NonNull T item, @NonNull WritableSequentialData output) throws IOException { - output.writeUTF8(toJSON(item)); + write(item, output, 0, 2, false); } + /** + * Writes JSON representing an item in UTF8 to output. + * + * @param item The item to convert. Must not be null. + * @param out The output to write to. Must not be null. + * @param initialIndent The indent num of spaces to use for pretty printing from the first line + * @param indentStep The indent num of spaces to add for each nested object + * @param inline When true, the output will start with indent end with a new lines, otherwise + * it will just be the object "{...}" + */ + void write(@NonNull T item, @NonNull WritableSequentialData out, int initialIndent, int indentStep, boolean inline); + /** * Returns JSON string representing an item. * @@ -70,11 +86,16 @@ default String toJSON(@NonNull T item) { * Returns JSON string representing an item. * * @param item The item to convert. Must not be null. - * @param indent The indent to use for pretty printing - * @param inline When true the output will start with indent end with a new line otherwise + * @param indent The indent to use for pretty printing, only supports spaces + * @param inline When true, the output will start with indent end with a new lines, otherwise * it will just be the object "{...}" */ - String toJSON(@NonNull T item, String indent, boolean inline); + default String toJSON(@NonNull T item, String indent, boolean inline) { + ByteArrayOutputStream bout = new ByteArrayOutputStream(); + WritableStreamingData out = new WritableStreamingData(bout); + write(item, out, 0, indent.length(), inline); + return bout.toString(StandardCharsets.UTF_8); + } /** * Reads from this data input the length of the data within the input. The implementation may diff --git a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/JsonTools.java b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/JsonTools.java index 80590353..33917249 100644 --- a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/JsonTools.java +++ b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/JsonTools.java @@ -1,32 +1,20 @@ // SPDX-License-Identifier: Apache-2.0 package com.hedera.pbj.runtime; -import com.hedera.pbj.runtime.io.ReadableSequentialData; +import com.hedera.pbj.runtime.io.WritableSequentialData; import com.hedera.pbj.runtime.io.buffer.Bytes; -import com.hedera.pbj.runtime.jsonparser.JSONLexer; -import com.hedera.pbj.runtime.jsonparser.JSONParser; import edu.umd.cs.findbugs.annotations.NonNull; import edu.umd.cs.findbugs.annotations.Nullable; -import java.io.IOException; -import java.nio.CharBuffer; import java.util.Base64; import java.util.List; import java.util.Map; -import java.util.function.BiFunction; -import java.util.function.Function; -import java.util.stream.Collectors; -import org.antlr.v4.runtime.CharStreams; -import org.antlr.v4.runtime.CodePointBuffer; -import org.antlr.v4.runtime.CodePointCharStream; -import org.antlr.v4.runtime.CommonTokenStream; /** * Class of static utility methods for working with JSON. All generated JSON is designed to be * 100% identical to that generated by Google Protobuf library. */ +@SuppressWarnings("ForLoopReplaceableByForEach") public final class JsonTools { - /** The indent spacing to use for pretty printing JSON */ - public static final String INDENT = " "; // ==================================================================================================== // Conversion Utility Methods @@ -73,222 +61,70 @@ public static String unescape(@Nullable String string) { return string.replaceAll("\\\\n", "\n").replaceAll("\\\\r", "\r"); } - /** - * Escape a string by replacing new lines with \n or \r. - * - * @param string the string to escape - * @return the escaped string, null if input string is null - */ - public static String escape(@Nullable String string) { - if (string == null) return null; - return string.replaceAll("\n", "\\\\n").replaceAll("\r", "\\\\r"); - } + public static final byte OPEN_OBJECT = '{'; + public static final byte CLOSE_OBJECT = '}'; + public static final byte OPEN_ARRAY = '['; + public static final byte CLOSE_ARRAY = ']'; + public static final byte NL = '\n'; + public static final byte SPACE = ' '; + public static final byte COLON = ':'; + public static final byte COMMA = ','; + public static final byte n = 'n'; + public static final byte u = 'u'; + public static final byte l = 'l'; // ==================================================================================================== - // Parse Methods - - /** - * Parse a JSON string in a ReadableSequentialData into a JSON object. - * - * @param input the ReadableSequentialData containing the JSON string - * @return the Antlr JSON context object - * @throws IOException if there was a problem parsing the JSON - */ - public static JSONParser.ObjContext parseJson(@NonNull final ReadableSequentialData input) throws IOException { - final JSONLexer lexer = new JSONLexer(CharStreams.fromStream(input.asInputStream())); - final JSONParser parser = new JSONParser(new CommonTokenStream(lexer)); - final JSONParser.JsonContext jsonContext = parser.json(); - final JSONParser.ValueContext valueContext = jsonContext.value(); - return valueContext.obj(); - } - - /** - * Parse a JSON string in a CharBuffer into a JSON object. - * - * @param input the CharBuffer containing the JSON string - * @return the Antlr JSON context object - */ - public static JSONParser.ObjContext parseJson(@NonNull final CharBuffer input) { - CodePointBuffer.Builder codePointBufferBuilder = CodePointBuffer.builder(input.remaining()); - codePointBufferBuilder.append(input); - final JSONLexer lexer = - new JSONLexer(CodePointCharStream.fromBuffer(codePointBufferBuilder.build(), "CharBuffer")); - final JSONParser parser = new JSONParser(new CommonTokenStream(lexer)); - final JSONParser.JsonContext jsonContext = parser.json(); - final JSONParser.ValueContext valueContext = jsonContext.value(); - return valueContext.obj(); - } - - /** - * Parse a JSON Object array from a JSONParser.ArrContext into a list of objects. - * - * @param arrContext the JSONParser.ArrContext to parse - * @param codec the JsonCodec to use to parse the objects - * @return the list of parsed objects - * @param the type of the objects to parse - */ - public static List parseObjArray(JSONParser.ArrContext arrContext, JsonCodec codec, final int maxDepth) { - return arrContext.value().stream() - .map(v -> { - try { - return codec.parse(v.obj(), false, maxDepth - 1); - } catch (ParseException e) { - throw new UncheckedParseException(e); - } - }) - .toList(); - } - - /** - * Parse an integer from a JSONParser.ValueContext - * - * @param valueContext the JSONParser.ValueContext to parse - * @return the parsed integer - */ - public static int parseInteger(JSONParser.ValueContext valueContext) { - return Integer.parseInt( - valueContext.STRING() != null - ? valueContext.STRING().getText() - : valueContext.NUMBER().getText()); - } - - /** - * Parse a long from a JSONParser.ValueContext - * - * @param valueContext the JSONParser.ValueContext to parse - * @return the parsed long - */ - public static long parseLong(JSONParser.ValueContext valueContext) { - return Long.parseLong( - valueContext.STRING() != null - ? valueContext.STRING().getText() - : valueContext.NUMBER().getText()); - } - - /** - * Parse a float from a JSONParser.ValueContext - * - * @param valueContext the JSONParser.ValueContext to parse - * @return the parsed float - */ - public static float parseFloat(JSONParser.ValueContext valueContext) { - return Float.parseFloat( - valueContext.STRING() != null - ? valueContext.STRING().getText() - : valueContext.NUMBER().getText()); - } - - /** - * Parse a double from a JSONParser.ValueContext - * - * @param valueContext the JSONParser.ValueContext to parse - * @return the parsed double - */ - public static double parseDouble(JSONParser.ValueContext valueContext) { - return Double.parseDouble( - valueContext.STRING() != null - ? valueContext.STRING().getText() - : valueContext.NUMBER().getText()); - } - - /** - * Parse a boolean from a JSONParser.ValueContext - * - * @param valueContext the JSONParser.ValueContext to parse - * @return the parsed boolean - */ - public static boolean parseBoolean(JSONParser.ValueContext valueContext) { - return Boolean.parseBoolean(valueContext.getText()); - } - - /** - * Parse an integer from a JSONParser.PairContext - * - * @param pairContext the JSONParser.PairContext to parse - * @return the parsed integer - */ - public static int parseInteger(JSONParser.PairContext pairContext) { - return Integer.parseInt(pairContext.STRING().getText()); - } - - /** - * Parse a long from a JSONParser.PairContext - * - * @param pairContext the JSONParser.PairContext to parse - * @return the parsed long - */ - public static long parseLong(JSONParser.PairContext pairContext) { - return Long.parseLong(pairContext.STRING().getText()); - } - - /** - * Parse a float from a JSONParser.PairContext - * - * @param pairContext the JSONParser.PairContext to parse - * @return the parsed float - */ - public static float parseFloat(JSONParser.PairContext pairContext) { - return Float.parseFloat(pairContext.STRING().getText()); - } - - /** - * Parse a double from a JSONParser.PairContext - * - * @param pairContext the JSONParser.PairContext to parse - * @return the parsed double - */ - public static double parseDouble(JSONParser.PairContext pairContext) { - return Double.parseDouble(pairContext.STRING().getText()); - } + // To JSON String Methods /** - * Parse a boolean from a JSONParser.PairContext + * Object field to JSON string * - * @param pairContext the JSONParser.PairContext to parse - * @return the parsed boolean + * @param fieldName the name of the field + * @param value the value of the field */ - public static boolean parseBoolean(JSONParser.PairContext pairContext) { - return Boolean.parseBoolean(pairContext.STRING().getText()); + public static void field(@NonNull WritableSequentialData out, int indent, String fieldName, JsonCodec codec, @Nullable final T value) { + out.writeJsonString(toJsonFieldName(fieldName), true); + out.writeByte2(COLON, SPACE); + if (value != null) { + codec.write(value, out, 0, 2, true); // TODO replace indent.length with indent + } else { + out.writeByte4(n,u,l,l); + } } - // ==================================================================================================== - // To JSON String Methods - /** - * Base method for all field to JSON string methods call this with the string for the value. + * String field to JSON string * * @param fieldName the name of the field - * @param rawValue the JSON string for value - * @return the JSON string + * @param value the value of the field */ - private static String rawFieldCode(String fieldName, String rawValue) { - return '"' + fieldName + '"' + ": " + rawValue; + public static void field(@NonNull WritableSequentialData out, String fieldName, String value) { + out.writeJsonString(fieldName, true); + out.writeByte2(COLON, SPACE); + out.writeJsonString(value, true); } - + /** - * Object field to JSON string + * String field to JSON string * * @param fieldName the name of the field * @param value the value of the field - * @return the JSON string */ - public static String field(String indent, String fieldName, JsonCodec codec, @Nullable final T value) { - if (value != null) { - return '"' + toJsonFieldName(fieldName) + '"' + ": " + codec.toJSON(value, indent, true); - } else { - return '"' + toJsonFieldName(fieldName) + '"' + ": null"; - } + public static void fieldUnquoted(@NonNull WritableSequentialData out, String fieldName, String value) { + out.writeJsonString(fieldName, true); + out.writeByte2(COLON, SPACE); + out.writeJsonString(value, false); } /** * String field to JSON string * * @param fieldName the name of the field - * @param value the value of the field - * @return the JSON string */ - public static String field(String fieldName, String value) { - return rawFieldCode(fieldName, '"' + escape(value) + '"'); + public static void fieldNull(@NonNull WritableSequentialData out, String fieldName) { + out.writeJsonString(fieldName, true); + out.writeByte2(COLON, SPACE); + out.writeByte4(n,u,l,l); } /** @@ -296,10 +132,10 @@ public static String field(String fieldName, String value) { * * @param fieldName the name of the field * @param value the value of the field - * @return the JSON string */ - public static String field(String fieldName, Bytes value) { - return rawFieldCode(fieldName, '"' + value.toBase64() + '"'); + public static void field(@NonNull WritableSequentialData out, String fieldName, Bytes value) { + String rawValue = value.toBase64(); + field(out, fieldName, rawValue); } /** @@ -307,10 +143,10 @@ public static String field(String fieldName, Bytes value) { * * @param fieldName the name of the field * @param value the value of the field - * @return the JSON string */ - public static String field(String fieldName, byte[] value) { - return rawFieldCode(fieldName, '"' + Base64.getEncoder().encodeToString(value) + '"'); + public static void field(@NonNull WritableSequentialData out, String fieldName, byte[] value) { + String rawValue = Base64.getEncoder().encodeToString(value); + field(out, fieldName, rawValue); } /** @@ -318,26 +154,29 @@ public static String field(String fieldName, byte[] value) { * * @param fieldName the name of the field * @param value the value of the field - * @param kEncoder an encoder of a key value to a string * @param vComposer a composer of a "key":value strings - basically, a JsonTools::field method for the value type - * @return the JSON string */ - public static String field( - String fieldName, Map value, Function kEncoder, BiFunction vComposer) { + public static void field(@NonNull WritableSequentialData out, + String fieldName, Map value, FieldFunction vComposer) { assert !value.isEmpty(); - StringBuilder sb = new StringBuilder(); + out.writeJsonString(fieldName, true); + out.writeByte4(COLON, SPACE, OPEN_OBJECT, NL); PbjMap pbjMap = (PbjMap) value; for (int i = 0; i < pbjMap.size(); i++) { if (i > 0) { - sb.append(",\n"); + out.writeByte2(COMMA, NL); } K k = pbjMap.getSortedKeys().get(i); V v = pbjMap.get(k); - String keyStr = kEncoder.apply(k); - sb.append(vComposer.apply(keyStr, v)); +// String keyStr = kEncoder.apply(k); // TODO: implement a key encoder + vComposer.write(out, k.toString(), v); // TODO pass in indent } - return rawFieldCode(fieldName, "{\n" + sb.toString().indent(4) + " }"); + out.writeByte(CLOSE_OBJECT); + } + + public interface FieldFunction { + void write(@NonNull WritableSequentialData out, String fieldName, T value); } /** @@ -345,10 +184,9 @@ public static String field( * * @param fieldName the name of the field * @param value the value of the field - * @return the JSON string */ - public static String field(String fieldName, boolean value) { - return rawFieldCode(fieldName, value ? "true" : "false"); + public static void field(@NonNull WritableSequentialData out, String fieldName, boolean value) { + fieldUnquoted(out, fieldName, value ? "true" : "false"); } /** @@ -356,10 +194,9 @@ public static String field(String fieldName, boolean value) { * * @param fieldName the name of the field * @param value the value of the field - * @return the JSON string */ - public static String field(String fieldName, int value) { - return rawFieldCode(fieldName, Integer.toString(value)); + public static void field(@NonNull WritableSequentialData out, String fieldName, int value) { + fieldUnquoted(out, fieldName, Integer.toString(value)); } /** @@ -367,10 +204,9 @@ public static String field(String fieldName, int value) { * * @param fieldName the name of the field * @param value the value of the field - * @return the JSON string */ - public static String field(String fieldName, long value) { - return rawFieldCode(fieldName, '"' + Long.toString(value) + '"'); + public static void field(@NonNull WritableSequentialData out, String fieldName, long value) { + field(out, fieldName, Long.toString(value)); } /** @@ -378,15 +214,14 @@ public static String field(String fieldName, long value) { * * @param fieldName the name of the field * @param value the value of the field - * @return the JSON string */ - public static String field(String fieldName, float value) { + public static void field(@NonNull WritableSequentialData out, String fieldName, float value) { if (Float.isNaN(value)) { - return rawFieldCode(fieldName, "\"NaN\""); + field(out, fieldName, "NaN"); } else if (Float.isInfinite(value)) { - return rawFieldCode(fieldName, "\"" + (value < 0 ? "-Infinity" : "Infinity") + "\""); + field(out, fieldName, value < 0 ? "-Infinity" : "Infinity"); } else { - return rawFieldCode(fieldName, Float.toString(value)); + field(out, fieldName, Float.toString(value)); } } @@ -395,15 +230,14 @@ public static String field(String fieldName, float value) { * * @param fieldName the name of the field * @param value the value of the field - * @return the JSON string */ - public static String field(String fieldName, double value) { + public static void field(@NonNull WritableSequentialData out, String fieldName, double value) { if (Double.isNaN(value)) { - return rawFieldCode(fieldName, "\"NaN\""); + field(out, fieldName, "NaN"); } else if (Double.isInfinite(value)) { - return rawFieldCode(fieldName, "\"" + (value < 0 ? "-Infinity" : "Infinity") + "\""); + field(out, fieldName, value < 0 ? "-Infinity" : "Infinity"); } else { - return rawFieldCode(fieldName, Double.toString(value)); + field(out, fieldName, Double.toString(value)); } } @@ -412,13 +246,12 @@ public static String field(String fieldName, double value) { * * @param fieldName the name of the field * @param value the value of the field - * @return the JSON string */ - public static String field(String fieldName, Boolean value) { + public static void field(@NonNull WritableSequentialData out, String fieldName, Boolean value) { if (value == null) { - return rawFieldCode(fieldName, "null"); + fieldNull(out, fieldName); } else { - return rawFieldCode(fieldName, Boolean.toString(value)); + fieldUnquoted(out, fieldName, value.toString()); } } @@ -427,13 +260,12 @@ public static String field(String fieldName, Boolean value) { * * @param fieldName the name of the field * @param value the value of the field - * @return the JSON string */ - public static String field(String fieldName, Integer value) { + public static void field(@NonNull WritableSequentialData out, String fieldName, Integer value) { if (value == null) { - return rawFieldCode(fieldName, "null"); + fieldNull(out, fieldName); } else { - return rawFieldCode(fieldName, Integer.toString(value)); + field(out, fieldName, value.intValue()); } } @@ -442,15 +274,14 @@ public static String field(String fieldName, Integer value) { * * @param fieldName the name of the field * @param value the value of the field - * @return the JSON string */ - public static String field(String fieldName, Long value, boolean quote) { + public static void field(@NonNull WritableSequentialData out, String fieldName, Long value, boolean quote) { if (value == null) { - return rawFieldCode(fieldName, "null"); + fieldNull(out, fieldName); } else if (quote) { - return rawFieldCode(fieldName, '\"' + Long.toString(value) + '\"'); + field(out, fieldName, Long.toString(value)); } else { - return rawFieldCode(fieldName, Long.toString(value)); + fieldUnquoted(out, fieldName, Long.toString(value)); } } @@ -459,13 +290,12 @@ public static String field(String fieldName, Long value, boolean quote) { * * @param fieldName the name of the field * @param value the value of the field - * @return the JSON string */ - public static String field(String fieldName, Float value) { + public static void field(@NonNull WritableSequentialData out, String fieldName, Float value) { if (value == null) { - return rawFieldCode(fieldName, "null"); + fieldNull(out, fieldName); } else { - return field(fieldName, value.floatValue()); + field(out, fieldName, value.floatValue()); } } @@ -474,13 +304,12 @@ public static String field(String fieldName, Float value) { * * @param fieldName the name of the field * @param value the value of the field - * @return the JSON string */ - public static String field(String fieldName, Double value) { + public static void field(@NonNull WritableSequentialData out, String fieldName, Double value) { if (value == null) { - return rawFieldCode(fieldName, "null"); + fieldNull(out, fieldName); } else { - return field(fieldName, value.doubleValue()); + field(out, fieldName, value.doubleValue()); } } @@ -489,41 +318,47 @@ public static String field(String fieldName, Double value) { * * @param fieldName the name of the field * @param items the items in the array - * @return the JSON string * @param the type of the items in the array */ - public static String arrayField(String fieldName, FieldDefinition fieldDefinition, List items) { + public static void arrayField(@NonNull WritableSequentialData out, String fieldName, FieldDefinition fieldDefinition, List items) { if (items != null) { if (items.isEmpty()) { - return rawFieldCode(fieldName, "[]"); + out.writeJsonString(fieldName, true); + out.writeByte4(COLON, SPACE, OPEN_ARRAY, CLOSE_ARRAY); } else { - String values = items.stream() - .map(item -> { - if (fieldDefinition.optional() && item == null) { - return "\"null\""; - } else { - return switch (fieldDefinition.type()) { - case STRING -> '"' + escape((String) item) + '"'; - case BYTES -> '"' + ((Bytes) item).toBase64() + '"'; - case INT32, SINT32, UINT32, FIXED32, SFIXED32 -> Integer.toString((Integer) item); - case INT64, SINT64, UINT64, FIXED64, SFIXED64 -> '"' - + Long.toString((Long) item) - + '"'; - case FLOAT -> Float.toString((Float) item); - case DOUBLE -> Double.toString((Double) item); - case BOOL -> Boolean.toString((Boolean) item); - case ENUM -> '"' + ((EnumWithProtoMetadata) item).protoName() + '"'; - case MESSAGE -> throw new UnsupportedOperationException( - "No expected here should have called other arrayField() method"); - case MAP -> throw new UnsupportedOperationException("Arrays of maps not supported"); - }; - } - }) - .collect(Collectors.joining(", ")); - return rawFieldCode(fieldName, "[" + values + "]"); + out.writeJsonString(fieldName, true); + out.writeByte3(COLON, SPACE, OPEN_ARRAY); + boolean isFirst = true; + for (int i = 0; i < items.size(); i++) { + final T item = items.get(i); + if (isFirst) { + isFirst = false; + } else { + out.writeByte2(COMMA, SPACE); + } + if (fieldDefinition.optional() && item == null) { + out.writeByte4(n,u,l,l); + } else { + switch (fieldDefinition.type()) { + case STRING -> out.writeJsonString((String) item, true); + case BYTES -> out.writeJsonString(((Bytes) item).toBase64(), true); + case INT32, SINT32, UINT32, FIXED32, SFIXED32 -> + out.writeJsonString(Integer.toString((Integer) item), false); + case INT64, SINT64, UINT64, FIXED64, SFIXED64 -> + out.writeJsonString(Long.toString((Long) item), true); + case FLOAT -> out.writeJsonString(Float.toString((Float) item), true); + case DOUBLE -> out.writeJsonString(Double.toString((Double) item), true); + case BOOL -> out.writeJsonString(Boolean.toString((Boolean) item), false); + case ENUM -> out.writeJsonString(((EnumWithProtoMetadata) item).protoName(), true); + case MESSAGE -> throw new UnsupportedOperationException( + "No expected here should have called other arrayField() method"); + case MAP -> throw new UnsupportedOperationException("Arrays of maps not supported"); + } + } + } + out.writeByte(CLOSE_ARRAY); } } - return null; } /** @@ -533,26 +368,24 @@ public static String arrayField(String fieldName, FieldDefinition fieldDefin * @param fieldName the name of the field * @param codec the codec to use for encoding the items * @param items the items in the array - * @return the JSON string * @param the type of the items in the array */ - public static String arrayField(String indent, String fieldName, JsonCodec codec, List items) { + public static void arrayField(@NonNull WritableSequentialData out, int indent, String fieldName, JsonCodec codec, List items) { if (items != null) { if (items.isEmpty()) { - return rawFieldCode(fieldName, "[]"); + fieldUnquoted(out, fieldName, "[]"); } else { - StringBuilder code = new StringBuilder('"' + fieldName + '"' + ": ["); + out.writeJsonString(toJsonFieldName(fieldName), true); + out.writeByte3(COLON, SPACE, OPEN_ARRAY); for (int i = 0; i < items.size(); i++) { var item = items.get(i); - code.append(codec.toJSON(item, indent, true)); + codec.write(item, out, indent, 2, true); // TODO replace 0, 2 with indent if (i < items.size() - 1) { - code.append(", "); + out.writeByte2(COMMA, SPACE); } } - code.append("]"); - return code.toString(); + out.writeByte(CLOSE_ARRAY); } } - return null; } } diff --git a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/WritableSequentialData.java b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/WritableSequentialData.java index f5ef0fe7..103d02da 100644 --- a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/WritableSequentialData.java +++ b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/WritableSequentialData.java @@ -222,12 +222,30 @@ default int writeBytes(@NonNull final InputStream src, final int maxLength) thro /** * Write a string as UTF8 bytes to this {@link WritableSequentialData}. * - * @param value The string to write, can not be null + * @param value The string to write, cannot be null */ default void writeUTF8(@NonNull final String value) { writeBytes(value.getBytes(StandardCharsets.UTF_8)); } + /** + * Write a string as UTF8 bytes with JSON escapes to this {@link WritableSequentialData}. + * + * @param value The string to write, cannot be null + */ + default void writeJsonString(@NonNull final String value, boolean quoted) { + String escaped = value + .replaceAll("\\\\", "\\\\") + .replaceAll("\"", "\\\"") + .replaceAll("\n", "\\n") + .replaceAll("\r", "\\r") + .replaceAll("\t", "\\t") + .replaceAll("\f", "\\f") + .replaceAll("\b", "\\b"); + if (quoted) escaped = '"' + escaped + '"'; + writeBytes(escaped.getBytes(StandardCharsets.UTF_8)); + } + /** * Writes four bytes containing the given int value, in the standard Java big-endian byte order, at the current * {@link #position()}, and then increments the {@link #position()} by four. diff --git a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/ByteArrayBufferedData.java b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/ByteArrayBufferedData.java index ab2b6b28..1d315d6a 100644 --- a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/ByteArrayBufferedData.java +++ b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/ByteArrayBufferedData.java @@ -9,6 +9,7 @@ import java.io.UncheckedIOException; import java.nio.BufferUnderflowException; import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Objects; @@ -18,6 +19,7 @@ * direct array reads / writes. */ final class ByteArrayBufferedData extends BufferedData { + private static final byte[] HEX = "0123456789abcdef".getBytes(StandardCharsets.US_ASCII); // Backing byte array private final byte[] array; @@ -381,4 +383,53 @@ public int writeBytes(@NonNull final InputStream src, final int maxLength) { throw new UncheckedIOException(e); } } + + @Override + public void writeJsonString(@NonNull String value, boolean quoted) { + int offset = buffer.position(); + final int len = value.length(); + validateCanWrite(len + 2); // TODO this is not really correct + if(quoted) array[offset++] = '"'; + for (int i = 0; i < len; i++) { + char c = value.charAt(i); + + // Escape control chars and JSON specials + switch (c) { + case '"': array[offset++] = '\\'; array[offset++] = '"'; continue; + case '\\': array[offset++] = '\\'; array[offset++] = '\\'; continue; + case '\b': array[offset++] = '\\'; array[offset++] = 'b'; continue; + case '\f': array[offset++] = '\\'; array[offset++] = 'f'; continue; + case '\n': array[offset++] = '\\'; array[offset++] = 'n'; continue; + case '\r': array[offset++] = '\\'; array[offset++] = 'r'; continue; + case '\t': array[offset++] = '\\'; array[offset++] = 't'; continue; + } + + if (c < 0x20) { + // Control character – use \ u00XX + array[offset++] = '\\'; + array[offset++] = 'u'; + array[offset++] = '0'; + array[offset++] = '0'; + array[offset++] = HEX[c >> 4]; + array[offset++] = HEX[c & 0xF]; + } else if (c < 0x80) { + array[offset++] = (byte) c; + } else if (c < 0x800) { + array[offset++] = (byte) (0b11000000 | (c >> 6)); + array[offset++] = (byte) (0b10000000 | (c & 0b00111111)); + } else if (Character.isSurrogate(c)) { + int cp = Character.toCodePoint(c, value.charAt(++i)); + array[offset++] = (byte) (0b11110000 | (cp >> 18)); + array[offset++] = (byte) (0b10000000 | ((cp >> 12) & 0b00111111)); + array[offset++] = (byte) (0b10000000 | ((cp >> 6) & 0b00111111)); + array[offset++] = (byte) (0b10000000 | (cp & 0b00111111)); + } else { + array[offset++] = (byte) (0b11100000 | (c >> 12)); + array[offset++] = (byte) (0b10000000 | ((c >> 6) & 0b00111111)); + array[offset++] = (byte) (0b10000000 | (c & 0b00111111)); + } + } + if(quoted) array[offset++] = '"'; + buffer.position(offset); + } } diff --git a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/json/JsonLexer.java b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/json/JsonLexer.java new file mode 100644 index 00000000..a2749840 --- /dev/null +++ b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/json/JsonLexer.java @@ -0,0 +1,635 @@ +package com.hedera.pbj.runtime.json; + +import com.hedera.pbj.runtime.ParseException; +import com.hedera.pbj.runtime.io.ReadableSequentialData; +import com.hedera.pbj.runtime.io.buffer.Bytes; +import com.hedera.pbj.runtime.io.buffer.RandomAccessData; +import java.nio.charset.StandardCharsets; +import java.util.Map; +import java.util.function.Consumer; +import java.util.function.Supplier; + +/** + * A simple JSON lexer that reads from a {@link ReadableSequentialData} and provides read and consume methods. It is + * designed to avoid looking ahead in the input stream, and instead reads the next byte when needed. It reads data types + * based on the protobuf JSON format, including strings, numbers, booleans, and null values. It aims to be as fast as + * possible and avoid object allocation. + * + *

This class is not thread-safe and should be used by a single thread. + *

Table from Protobuf Docs of JSON Mapping of PB types + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
ProtobufJSONJSON exampleNotes
messageobject{"fooBar": v, "g": null, ...}Generates JSON objects. + * Message field names are mapped to lowerCamelCase and become JSON object keys. If the json_name field + * option is specified, the specified value will be used as the key instead. Parsers accept both the lowerCamelCase + * name (or the one specified by the json_name option) and the original proto field name. + * null is an accepted value for all field types and treated as the default value of the corresponding + * field type. However, null cannot be used for the json_name value. For more on why, see + * Stricter validation for json_name.
enumstring"FOO_BAR"The name of the enum value as specified in + * proto is used. Parsers accept both enum names and integer values.
map<K,V>object{"k": v, ...}All keys are converted to strings.
repeated Varray[v, ...]null is accepted as the empty + * list [].
booltrue, falsetrue, false
stringstring"Hello World!"
bytesbase64 string + * "YWJjMTIzIT8kKiYoKSctPUB+"JSON value will be the data encoded as a string using standard + * base64 encoding with paddings. Either standard or URL-safe base64 encoding with/without paddings is accepted. + *
int32, fixed32, uint32number1, -10, 0JSON value will be a decimal + * number. Either numbers or strings are accepted. Empty strings are invalid.
int64, fixed64, uint64string"1", "-10"JSON value will be a + * decimal string. Either numbers or strings are accepted. Empty strings are invalid.
float, doublenumber1.1, -10.0, 0, "NaN", "Infinity"JSON value + * will be a number or one of the special string values "NaN", "Infinity", and "-Infinity". Either numbers or + * strings are accepted. Empty strings are invalid. Exponent notation is also accepted.
Anyobject{"@type": "url", "f": v, ... }If the + * Any contains a value that has a special JSON mapping, it will be converted as follows: + * {"@type": xxx, "value": yyy}. Otherwise, the value will be converted into a JSON object, and the + * "@type" field will be inserted to indicate the actual data type.
Timestampstring"1972-01-01T10:00:20.021Z"Uses RFC 3339, where + * generated output will always be Z-normalized and uses 0, 3, 6 or 9 fractional digits. Offsets other than "Z" are + * also accepted.
Durationstring"1.000340012s", "1s"Generated output always + * contains 0, 3, 6, or 9 fractional digits, depending on required precision, followed by the suffix "s". Accepted + * are any fractional digits (also none) as long as they fit into nanosecond precision and the suffix "s" is + * required.
Structobject{ ... }Any JSON object. See + * struct.proto.
Wrapper typesvarious types2, "2", "foo", + * true, "true", null, 0, ...Wrappers use the same representation in JSON as the wrapped primitive + * type, except that null is allowed and preserved during data conversion and transfer.
FieldMaskstring"f.fooBar,h"See field_mask.proto. + *
ListValuearray[foo, bar, ...]
ValuevalueAny JSON value. Check + * google.protobuf.Value for details.
NullValuenullJSON null
Emptyobject{}An empty JSON object
+ */ +public final class JsonLexer { + + private static final int SPACE = 0x20; // Space + private static final int HORIZONTAL_TAB = 0x09; // Horizontal tab + private static final int LINE_FEED = 0x0A; // Line feed or New line + private static final int CARRIAGE_RETURN = 0x0D; // Carriage return + private static final int QUOTE = 0x22; // Double quote + private static final int N = 0x6E; // 'n' + + /** the ReadableSequentialData to read from */ + private final ReadableSequentialData data; + /** scratch space for raw UTF-8 bytes between escapes */ + private byte[] buf = new byte[64]; + /** number of bytes in the buffer buf */ + private int count; + + private boolean hasNextCharRead = false; + private int nextCharRead; + + /** + * Construct a new JsonLexer, this is not thread safe and should be used by a single thread. + * + * @param data the ReadableSequentialData to read from + */ + public JsonLexer(ReadableSequentialData data) { + this.data = data; + } + + private int readByte() { + if (hasNextCharRead) { + hasNextCharRead = false; + return nextCharRead; + } + return data.readByte(); + } + + private void setNextCharRead(int nextCharRead) { + this.nextCharRead = nextCharRead; + hasNextCharRead = true; + } + + /** + * Consume JSON whitespace from the input data. + */ + public void consumeWhiteSpace() { + int c = readByte(); // we have always used "nextCharRead" if one was set + while (c == SPACE || c == HORIZONTAL_TAB || c == LINE_FEED || c == CARRIAGE_RETURN) { + c = data.readByte(); + } + setNextCharRead(c); + } + + /** + * Consume JSON whitespace from the input data at the end of the file checking for end of stream + */ + public void consumeWhiteSpaceEnd() { + int c; + if (hasNextCharRead) { + hasNextCharRead = false; + c = nextCharRead; + } else if (data.hasRemaining()){ + c = data.readByte(); + } else { // end of stream + return; + } + while (c == SPACE || c == HORIZONTAL_TAB || c == LINE_FEED || c == CARRIAGE_RETURN) { + if (!data.hasRemaining()) { + return; + } + c = data.readByte(); + } + setNextCharRead(c); + } + + public void openObject() throws ParseException { + consumeWhiteSpaceEnd(); + final int c = readByte(); + if (c != '{') { + throw new ParseException(createParseExceptionMessage(c, '{')); + } + consumeWhiteSpace(); + } + + public void openArray() throws ParseException { + consumeWhiteSpace(); + final int c = readByte(); + if (c != '[') { + throw new ParseException(createParseExceptionMessage(c, '[')); + } + consumeWhiteSpace(); + } + + public void closeObject() throws ParseException { + consumeWhiteSpace(); + final int c = readByte(); + if (c != '}') { + throw new ParseException(createParseExceptionMessage(c, '}')); + } + consumeWhiteSpaceEnd(); + } + + public void consumeColon() throws ParseException { + consumeWhiteSpace(); + final int c = readByte(); + if (c != ':') { + throw new ParseException(createParseExceptionMessage(c, ':')); + } + consumeWhiteSpace(); + } + + public void consumeComma() throws ParseException { + consumeWhiteSpace(); + final int c = readByte(); + if (c != ',') { + throw new ParseException(createParseExceptionMessage(c, ',')); + } + consumeWhiteSpace(); + } + + /** + * Creates a parse exception message with the given character and expected characters. With other helpful + * information to aid debugging. + * + * @param c the character that was read + * @param expected the expected characters + * @return the parse exception message + */ + private String createParseExceptionMessage(int c, char ... expected) { + StringBuilder sb = new StringBuilder(); + sb.append("Expected "); + for (int i = 0; i < expected.length; i++) { + sb.append("'").append(expected[i]).append("'"); + if (i < expected.length - 1) { + sb.append(", "); + } + } + sb.append(", got: '"); + switch(c) { + case '\n' -> sb.append("\\n"); + case '\r' -> sb.append("\\r"); + case '\t' -> sb.append("\\t"); + case '\b' -> sb.append("\\b"); + case '\f' -> sb.append("\\f"); + default -> sb.append((char) c); + } + sb.append("' at position: ").append(data.position()-1); + sb.append(" remaining: ").append(data.remaining()); + if (data instanceof RandomAccessData randomAccessData) { + // print the 30 bytes around the current position on a line with a "^" under the current position + StringBuilder sb2 = new StringBuilder(); + for (int i = -15; i < 15; i++) { + long pos = data.position() + i; + if (pos >= 0 && pos < data.limit()) { + sb2.append((char) randomAccessData.getByte(pos)); + } + } + sb.append("\n JSON surrounding ▶"); + sb.append(sb2.toString() + .replace('\n', '↩') + .replace('\r', '↵') + .replace('\t', '↦') + .replace(' ', '␣')); + sb.append("\n Current position ▶"); + sb.append(" ".repeat((int)Math.min(15, Math.max(0,data.position()-15)))); + sb.append(" ^"); + } + return sb.toString(); + } + + /** + * Checks if the next character is a closing object or closing array or a comma. If it is a closing object or array, + * it returns false. + * + * @return true if there is a next field or false if the object or array is closed + * @throws ParseException if the input is not a valid JSON object + */ + public boolean nextFieldOrClose() throws ParseException { + consumeWhiteSpace(); + final int c = readByte(); + switch (c) { + case '}', ']' -> { + consumeWhiteSpaceEnd(); + return false; + } + case ',' -> { + consumeWhiteSpace(); + return true; + } + default -> throw new ParseException(createParseExceptionMessage(c, '}', ']', ',')); + } + } + + /** + * Parse a JSON string in a SequentialData. Also handles JSON null. + * + * @return Java String or null if the input was null + * @throws ParseException if the input is not a valid JSON string or null + */ + public String readString() throws ParseException { + final int firstChar = readByte(); + switch (firstChar) { + case '}', ']' -> { + return null; // we have a closing object so return null + } + case N -> { + data.readByte(); // consume 'u', we have always used "nextCharRead" if one was set + data.readByte(); // consume 'l' + data.readByte(); // consume 'l' + return null; // consume 'l' + } + case QUOTE -> { + return readJsonString(); + } + default -> throw new ParseException(createParseExceptionMessage(firstChar, '"', 'N')); + } + } + + /** + * Parse a JSON boolean in a SequentialData. It can be true, false, "true", + * or "false". + * + * @return Java boolean + * @throws ParseException if the input is not a valid JSON boolean + */ + public boolean readBoolean() throws ParseException { + int firstChar = readByte(); + boolean isString = false; + if (firstChar == QUOTE) { + isString = true; + // we have a number a string so jump over the quote + firstChar = data.readByte(); + } + if (firstChar == 't') { + data.readByte(); // consume 'r', we have always used "nextCharRead" if one was set + data.readByte(); // consume 'u' + data.readByte(); // consume 'e' + if (isString) checkClosingQuote(data.readByte()); + return true; + } else if (firstChar == 'f') { + data.readByte(); // consume 'a', we have always used "nextCharRead" if one was set + data.readByte(); // consume 'l' + data.readByte(); // consume 's' + data.readByte(); // consume 'e' + if (isString) checkClosingQuote(data.readByte()); + return false; + } else { + throw new ParseException(createParseExceptionMessage(firstChar, 't', 'f')); + } + } + + /** + * Parse a Base64 encoded bytes as a JSON String in a SequentialData. + * + * @return Bytes read + * @throws ParseException if the input is not a valid base 64 bytes + */ + public Bytes readBytes() throws ParseException { + final int firstChar = readByte(); + if (firstChar == QUOTE) { + return Bytes.fromBase64(readJsonString()); // TODO probably a faster way to do this + } else { + throw new ParseException(createParseExceptionMessage(firstChar, '"')); + } + } + + /** + * Reads a signed long integer from the input data. If you want 32bit integer, then you need to cast it to int. + * + * @return the signed integer value + * @throws ParseException if the input is not a valid JSON signed integer + */ + public long readSignedInteger() throws ParseException { + int firstChar = readByte(); + boolean isString = false; + if (firstChar == QUOTE) { + isString = true; + // we have a number a string so jump over the quote + firstChar = data.readByte(); + } + long negative = 1; + if (firstChar == '-') { + negative = -1; + firstChar = data.readByte(); // read next character + } else if (firstChar == '0') { // fast path for zero + if (isString) checkClosingQuote(data.readByte()); + return 0; + } + if (firstChar < '1' || firstChar > '9') { + throw new ParseException( + createParseExceptionMessage(firstChar, '0', '1', '2', '3', '4', '5', '6', '7', '8', '9')); + } + long result = firstChar - '0'; // first digit + while (true) { + final int c = data.readByte(); + if (c < '0' || c > '9') { // check if not a digit + checkClosingQuote(c); + break; + } + result = result * 10 + (c - '0'); + } + return negative * result; + } + + /** + * Reads a double from the input data. The double can be either a string or a number. It can also be one of the + * special values "NaN", "Infinity", or "-Infinity". + * + * @return the double value + * @throws ParseException if the input is not a valid JSON double + */ + public double readDouble() throws ParseException { + int c = readByte(); + boolean isString = false; + if (c == QUOTE) { + isString = true; + // we have a number a string so jump over the quote + c = data.readByte(); + } + boolean isNegative = false; + switch (c) { + case '-' -> { + isNegative = true; + c = data.readByte(); + } + case 'N' -> { + data.readByte(); // consume 'a', we have always used "nextCharRead" if one was set + data.readByte(); // consume 'N' + if (isString) checkClosingQuote(data.readByte()); + return Double.NaN; + } + } + // handle "Infinity" and "-Infinity" + if (c == 'I') { + data.skip(7); // consume 'n', 'f', 'i', 'n', 'i', 't', 'y' TODO does this need to be checked? + if (isString) checkClosingQuote(data.readByte()); + return isNegative ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY; + } + + // First digit + if (c < '0' || c > '9') { + throw new ParseException(createParseExceptionMessage(c, '0', '1', '2', '3', '4', '5', '6', '7', '8', '9')); + } + + // For very large numbers or ones requiring exact precision (like Double.MAX_VALUE), + // we need to use a string-based approach + StringBuilder sb = new StringBuilder(32); + if (isNegative) { + sb.append('-'); + } + sb.append((char) c); // add first digit + + boolean hasDecimalPoint = false; + boolean hasExponent = false; + + while (true) { + c = data.readByte(); + + // Handle decimal point + if (c == '.') { + if (hasDecimalPoint) { + throw new ParseException("Unexpected second decimal point in number"); + } + hasDecimalPoint = true; + sb.append('.'); + continue; + } + + // Handle digits + if (c >= '0' && c <= '9') { + sb.append((char) c); + continue; + } + + // Handle exponent notation + if (c == 'e' || c == 'E') { + if (hasExponent) { + throw new ParseException("Unexpected second exponent in number"); + } + hasExponent = true; + sb.append('E'); + + // Handle exponent sign + c = data.readByte(); + if (c == '-' || c == '+') { + sb.append((char) c); + c = data.readByte(); + } + + // First digit of exponent must be a digit + if (c < '0' || c > '9') { + throw new ParseException("Expected digit after exponent, got: " + (char) c); + } + + sb.append((char) c); + continue; + } + + // End of number + if (isString) { + checkClosingQuote(c); + } else { + setNextCharRead(c); + } + + // Use Double.parseDouble for precise handling of all values including extreme cases + try { + return Double.parseDouble(sb.toString()); + } catch (NumberFormatException e) { + throw new ParseException("Invalid double value: " + sb); + } + } + } + + /** + * Checks if the closing quote is present. If not, it sets the next character to be read. + * + * @param c the character to check + */ + private void checkClosingQuote(int c) { + // check for closing quote + if (c != QUOTE) {// we don't have a number a string so keep last char + setNextCharRead(c); + } + } + + /** + * Reads a protobuf enum value from the input data. The enum value can be either a string or an integer. + * + * @param enumClass the enum class to read + * @return the enum value + * @param the enum type + * @throws ParseException if the input is not a valid JSON enum value + */ + public > E readEnum(Class enumClass) throws ParseException { + final int firstChar = readByte(); + if (firstChar == QUOTE) { + return Enum.valueOf(enumClass, readJsonString()); + } else { + setNextCharRead(firstChar); + long res; + int firstChar1 = readByte(); + if (firstChar1 == QUOTE) { + // we have a number a string so jump over the quote + firstChar1 = data.readByte(); + } + if (firstChar1 == '0') { + res = 0;// fast path for zero + } else { + if (firstChar1 < '1' || firstChar1 > '9') { + throw new ParseException( + createParseExceptionMessage(firstChar1, '0', '1', '2', '3', '4', '5', '6', '7', '8', '9')); + } + long result = firstChar1 - '0'; // first digit + while (true) { + final int c = data.readByte(); + if (c < '0' || c > '9') { // check if not a digit + checkClosingQuote(c); + res = result; + break; + } + result = result * 10 + (c - '0'); + } + } + final int ordinal = (int) res; + final E[] enumConstants = enumClass.getEnumConstants(); + if (ordinal < 0 || ordinal >= enumConstants.length) { + throw new ParseException("Invalid enum ordinal: " + ordinal); + } + return enumConstants[ordinal]; + } + } + + public Map readMap(Supplier keySupplier, Supplier valueSupplier) throws ParseException { + openObject(); + Map map = Map.of(); + boolean isFirst = true; + while (true) { + if (isFirst) { + isFirst = false; + } else if (!nextFieldOrClose()) { + break; + } + // read field name and colon + final String fieldName = readString(); + if (fieldName == null) break;// there are no fields or no more fields + consumeColon(); + // read and handle field value + K key = keySupplier.get(); + V value = valueSupplier.get(); + map.put(key, value); + } + return map; + } + + /* ----- Private Methods ------------------------------------------------------------- */ + + /** called by the parser – the opening quote has already been consumed */ + private String readJsonString() throws ParseException { + StringBuilder out = new StringBuilder(32); // final result + + while (true) { + int b = data.readByte(); // your I/O source + if (b == '"') { // closing quote + flushUtf8Chunk(out); + return out.toString(); // done + } + if (b != '\\') { // fast-path: plain byte + ensureCapacity(1); + buf[count++] = (byte) b; + continue; + } + + /* ---- slow-path: we hit an escape sequence ---- */ + flushUtf8Chunk(out); // decode bytes seen so far + int esc = data.readByte(); + switch (esc) { + case '"': out.append('"'); break; + case '\\': out.append('\\'); break; + case '/': out.append('/'); break; + case 'b': out.append('\b'); break; + case 'f': out.append('\f'); break; + case 'n': out.append('\n'); break; + case 'r': out.append('\r'); break; + case 't': out.append('\t'); break; + case 'u': // \ uXXXX + out.append(readUnicodeEscape()); + break; + default: + throw new ParseException("Invalid escape: \\" + (char) esc); + } + } + } + + /** decode any buffered UTF-8 bytes and append to the StringBuilder */ + private void flushUtf8Chunk(StringBuilder out) throws ParseException { + if (count == 0) return; + out.append(new String(buf, 0, count, StandardCharsets.UTF_8)); + count = 0; + } + + /** grow the buffer when necessary (amortised O(1)) */ + private void ensureCapacity(int needed) { + int required = count + needed; + if (required > buf.length) { + byte[] bigger = new byte[Math.max(required, buf.length * 2)]; + System.arraycopy(buf, 0, bigger, 0, count); + buf = bigger; + } + } + + /** parse four hex digits after \ u and return the resulting code unit */ + private char readUnicodeEscape() throws ParseException { + int cp = 0; + for (int i = 0; i < 4; i++) { + cp = (cp << 4) | hexValue(data.readByte()); + } + return (char) cp; + } + + private static int hexValue(int ch) throws ParseException { + if ('0' <= ch && ch <= '9') return ch - '0'; + else if ('a' <= ch && ch <= 'f') return 10 + ch - 'a'; + else if ('A' <= ch && ch <= 'F') return 10 + ch - 'A'; + throw new ParseException("Bad \\u escape digit: " + (char) ch); + } +} diff --git a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/test/CharBufferToWritableSequentialData.java b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/test/CharBufferToWritableSequentialData.java index 758e6421..f94ff8d4 100644 --- a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/test/CharBufferToWritableSequentialData.java +++ b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/test/CharBufferToWritableSequentialData.java @@ -52,7 +52,7 @@ public void skip(long count) { @Override public void writeByte(byte b) throws UncheckedIOException { - throw new UnsupportedOperationException(); + charBuffer.put((char) b); } /** diff --git a/pbj-core/pbj-runtime/src/main/java/module-info.java b/pbj-core/pbj-runtime/src/main/java/module-info.java index 63d5f648..1158e750 100644 --- a/pbj-core/pbj-runtime/src/main/java/module-info.java +++ b/pbj-core/pbj-runtime/src/main/java/module-info.java @@ -11,5 +11,6 @@ exports com.hedera.pbj.runtime.io.stream; exports com.hedera.pbj.runtime.io.buffer; exports com.hedera.pbj.runtime.jsonparser; + exports com.hedera.pbj.runtime.json; exports com.hedera.pbj.runtime.grpc; } diff --git a/pbj-core/pbj-runtime/src/test/java/com/hedera/pbj/runtime/io/buffer/BufferedDataTestBase.java b/pbj-core/pbj-runtime/src/test/java/com/hedera/pbj/runtime/io/buffer/BufferedDataTestBase.java index 6e375c1f..3d745a21 100644 --- a/pbj-core/pbj-runtime/src/test/java/com/hedera/pbj/runtime/io/buffer/BufferedDataTestBase.java +++ b/pbj-core/pbj-runtime/src/test/java/com/hedera/pbj/runtime/io/buffer/BufferedDataTestBase.java @@ -68,7 +68,7 @@ void toStringIsSafe() { buf.skip(5); buf.limit(10); - assertThat(buf.toString()).endsWith("BufferedData[1,2,3,4,5,6,7,8,9,10]"); + assertThat(buf.toString()).endsWith(buf.getClass().getSimpleName()+"[1,2,3,4,5,6,7,8,9,10]"); assertEquals(5, buf.position()); assertEquals(10, buf.limit()); @@ -78,15 +78,15 @@ void toStringIsSafe() { void toStringWithOffsetAndLen() { final var buf = wrap(new byte[] {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, 2, 4); // toString() doesn't depend on position, but respects limit - assertThat(buf.toString()).endsWith("BufferedData[0,1,2,3,4,5]"); + assertThat(buf.toString()).endsWith(buf.getClass().getSimpleName()+"[0,1,2,3,4,5]"); buf.limit(10); - assertThat(buf.toString()).endsWith("BufferedData[0,1,2,3,4,5,6,7,8,9]"); + assertThat(buf.toString()).endsWith(buf.getClass().getSimpleName()+"[0,1,2,3,4,5,6,7,8,9]"); } @Test void toStringWithSlice() { final var buf = wrap(new byte[] {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}).slice(2, 4); - assertThat(buf.toString()).endsWith("BufferedData[2,3,4,5]"); + assertThat(buf.toString()).endsWith(buf.getClass().getSimpleName()+"[2,3,4,5]"); } @ParameterizedTest diff --git a/pbj-core/pbj-runtime/src/test/java/com/hedera/pbj/runtime/json/JsonLexerTest.java b/pbj-core/pbj-runtime/src/test/java/com/hedera/pbj/runtime/json/JsonLexerTest.java new file mode 100644 index 00000000..0bdf12ce --- /dev/null +++ b/pbj-core/pbj-runtime/src/test/java/com/hedera/pbj/runtime/json/JsonLexerTest.java @@ -0,0 +1,355 @@ +package com.hedera.pbj.runtime.json; + +import static org.junit.jupiter.api.Assertions.*; + +import com.hedera.pbj.runtime.ParseException; +import com.hedera.pbj.runtime.io.ReadableSequentialData; +import com.hedera.pbj.runtime.io.buffer.BufferedData; +import com.hedera.pbj.runtime.io.buffer.Bytes; +import java.nio.charset.StandardCharsets; +import java.text.DecimalFormat; +import java.util.Base64; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; +import org.junit.jupiter.params.provider.ValueSource; + +public class JsonLexerTest { + + // Test enum for testing readEnum method + private enum TestEnum { + FIRST, + SECOND, + THIRD + } + + @Test + void simpleTest() throws Exception { + String json = """ + { + "name": "Alice", + "age": 30 + } + """; + JsonLexer lexer = new JsonLexer(readableSequentialData(json)); + lexer.openObject(); + String fieldName1 = lexer.readString(); + assertEquals("name", fieldName1); + lexer.consumeColon(); + String fieldValue1 = lexer.readString(); + assertEquals("Alice", fieldValue1); + lexer.consumeComma(); + + String fieldName2 = lexer.readString(); + lexer.consumeColon(); + assertEquals("age", fieldName2); + long fieldValue2 = lexer.readSignedInteger(); + assertEquals(30, fieldValue2); + lexer.closeObject(); + } + + @ParameterizedTest + @ValueSource(longs = {0,1,10,-1, -10, Integer.MAX_VALUE, Integer.MIN_VALUE, Long.MAX_VALUE, Long.MIN_VALUE}) + void integerTest(long value) throws Exception { + String json = """ + { + "number": "$value" + } + """.replace("$value", String.valueOf(value)); + JsonLexer lexer = new JsonLexer(readableSequentialData(json)); + lexer.openObject(); + String fieldName1 = lexer.readString(); + assertEquals("number", fieldName1); + lexer.consumeColon(); + long readValue = lexer.readSignedInteger(); + assertEquals(value, readValue); + lexer.closeObject(); + } + + @Test + void booleanTest() throws Exception { + String json = """ + { + "trueValue": true, + "falseValue": false + } + """; + JsonLexer lexer = new JsonLexer(readableSequentialData(json)); + lexer.openObject(); + + String fieldName1 = lexer.readString(); + assertEquals("trueValue", fieldName1); + lexer.consumeColon(); + boolean trueValue = lexer.readBoolean(); + assertTrue(trueValue); + lexer.consumeComma(); + + String fieldName2 = lexer.readString(); + assertEquals("falseValue", fieldName2); + lexer.consumeColon(); + boolean falseValue = lexer.readBoolean(); + assertFalse(falseValue); + + lexer.closeObject(); + } + + private static final DecimalFormat doubleFormat = new DecimalFormat("0.###############################"); + + @ParameterizedTest + @ValueSource(doubles = {0,1.123,10.1234,-1.1234, -10.1234, Double.MAX_VALUE, Double.MIN_VALUE, Double.NaN}) + void doubleTest(double value) throws Exception { + String json = """ + { + "numberStr": "$value", + "number": $value + } + """.replace("$value", Double.toString(value)); + JsonLexer lexer = new JsonLexer(readableSequentialData(json)); + lexer.openObject(); + { + String fieldName = lexer.readString(); + assertEquals("numberStr", fieldName); + lexer.consumeColon(); + double readValue = lexer.readDouble(); + assertEquals(value, readValue, 0.000001, + "Expected: " + Double.toString(value) + ",\n" + + "but got : " + Double.toString(readValue) + "\n" + + "json:" + json); + } + lexer.consumeComma(); + { + String fieldName = lexer.readString(); + assertEquals("number", fieldName); + lexer.consumeColon(); + double readValue = lexer.readDouble(); + assertEquals(value, readValue, 0.000001); + } + lexer.closeObject(); + } + + @Test + void doubleSpecialValuesTest() throws Exception { + String json = """ + { + "nan": "NaN", + "infinity": "Infinity", + "negativeInfinity": "-Infinity" + } + """; + JsonLexer lexer = new JsonLexer(readableSequentialData(json)); + lexer.openObject(); + + String fieldName1 = lexer.readString(); + assertEquals("nan", fieldName1); + lexer.consumeColon(); + double nanValue = lexer.readDouble(); + assertTrue(Double.isNaN(nanValue)); + lexer.consumeComma(); + + String fieldName2 = lexer.readString(); + assertEquals("infinity", fieldName2); + lexer.consumeColon(); + double infinityValue = lexer.readDouble(); + assertTrue(Double.isInfinite(infinityValue) && infinityValue > 0); + lexer.consumeComma(); + + String fieldName3 = lexer.readString(); + assertEquals("negativeInfinity", fieldName3); + lexer.consumeColon(); + double negInfinityValue = lexer.readDouble(); + assertTrue(Double.isInfinite(negInfinityValue) && negInfinityValue < 0); + + lexer.closeObject(); + } + + @Test + void bytesTest() throws Exception { + byte[] testData = "Hello, world!".getBytes(StandardCharsets.UTF_8); + String base64Data = Base64.getEncoder().encodeToString(testData); + + String json = """ + { + "data": "$value" + } + """.replace("$value", base64Data); + + JsonLexer lexer = new JsonLexer(readableSequentialData(json)); + lexer.openObject(); + String fieldName = lexer.readString(); + assertEquals("data", fieldName); + lexer.consumeColon(); + Bytes readValue = lexer.readBytes(); + + assertArrayEquals(testData, readValue.toByteArray()); + + lexer.closeObject(); + } + + @Test + void enumTest() throws Exception { + String json = """ + { + "enumByName": "SECOND", + "enumByOrdinal": 2 + } + """; + + JsonLexer lexer = new JsonLexer(readableSequentialData(json)); + lexer.openObject(); + + String fieldName1 = lexer.readString(); + assertEquals("enumByName", fieldName1); + lexer.consumeColon(); + TestEnum enumValue1 = lexer.readEnum(TestEnum.class); + assertEquals(TestEnum.SECOND, enumValue1); + lexer.consumeComma(); + + String fieldName2 = lexer.readString(); + assertEquals("enumByOrdinal", fieldName2); + lexer.consumeColon(); + TestEnum enumValue2 = lexer.readEnum(TestEnum.class); + assertEquals(TestEnum.THIRD, enumValue2); + + lexer.closeObject(); + } + + @Test + void arrayTest() throws Exception { + String json = """ + { + "array": [1, 2, 3] + } + """; + + JsonLexer lexer = new JsonLexer(readableSequentialData(json)); + lexer.openObject(); + String fieldName = lexer.readString(); + assertEquals("array", fieldName); + lexer.consumeColon(); + + lexer.openArray(); + + long value1 = lexer.readSignedInteger(); + assertEquals(1, value1); + assertTrue(lexer.nextFieldOrClose()); + + long value2 = lexer.readSignedInteger(); + assertEquals(2, value2); + assertTrue(lexer.nextFieldOrClose()); + + long value3 = lexer.readSignedInteger(); + assertEquals(3, value3); + assertFalse(lexer.nextFieldOrClose()); + + lexer.closeObject(); + } + + @Test + void nullTest() throws Exception { + String json = """ + { + "nullValue": null + } + """; + + JsonLexer lexer = new JsonLexer(readableSequentialData(json)); + lexer.openObject(); + String fieldName = lexer.readString(); + assertEquals("nullValue", fieldName); + lexer.consumeColon(); + + String nullValue = lexer.readString(); + assertNull(nullValue); + + lexer.closeObject(); + } + + @Test + void whitespaceTest() throws Exception { + String json = " { \n" + + " \"value\" : 42 \n" + + " } \n"; + + JsonLexer lexer = new JsonLexer(readableSequentialData(json)); + lexer.openObject(); + + String fieldName = lexer.readString(); + assertEquals("value", fieldName); + lexer.consumeColon(); + + long value = lexer.readSignedInteger(); + assertEquals(42, value); + + lexer.closeObject(); + } + + @Test + void nestedObjectTest() throws Exception { + String json = """ + { + "outer": { + "inner": "value" + } + } + """; + + JsonLexer lexer = new JsonLexer(readableSequentialData(json)); + lexer.openObject(); + + String fieldName = lexer.readString(); + assertEquals("outer", fieldName); + lexer.consumeColon(); + + lexer.openObject(); + String innerFieldName = lexer.readString(); + assertEquals("inner", innerFieldName); + lexer.consumeColon(); + + String value = lexer.readString(); + assertEquals("value", value); + + lexer.closeObject(); + lexer.closeObject(); + } + + @Test + void escapeSequencesTest() throws Exception { + String json = """ + { + "escaped": "\\"\\\\/\\b\\f\\n\\r\\t\\u0041" + } + """; + + JsonLexer lexer = new JsonLexer(readableSequentialData(json)); + lexer.openObject(); + + String fieldName = lexer.readString(); + assertEquals("escaped", fieldName); + lexer.consumeColon(); + + String value = lexer.readString(); + assertEquals("\"\\/\b\f\n\r\tA", value); + + lexer.closeObject(); + } + + @Test + void parseExceptionTest() { + String invalidJson = "{\"field\": invalid}"; + + JsonLexer lexer = new JsonLexer(readableSequentialData(invalidJson)); + + Exception exception = assertThrows(ParseException.class, () -> { + lexer.openObject(); + lexer.readString(); + lexer.consumeColon(); + lexer.readString(); // This should throw because "invalid" is not a valid JSON string + }); + + assertTrue(exception.getMessage().contains("Expected")); + } + + private ReadableSequentialData readableSequentialData(String json) { + return BufferedData.wrap(json.getBytes(StandardCharsets.UTF_8)); + } +} diff --git a/pbj-integration-tests/build.gradle.kts b/pbj-integration-tests/build.gradle.kts index bf38465d..413e254d 100644 --- a/pbj-integration-tests/build.gradle.kts +++ b/pbj-integration-tests/build.gradle.kts @@ -59,7 +59,15 @@ dependencies { testImplementation("com.hedera.pbj:pbj-compiler") { isTransitive dependencyAnalysis { issues { all { onAny { exclude("com.hedera.pbj:pbj-compiler") } } } } // IMPROVE: JMH code should not depend on test code -jmh { includeTests = true } +jmh { + includeTests = true +// includes.add("com.hedera.pbj.integration.jmh.JsonBench*") + includes.add("com.hedera.pbj.integration.jmh.JsonBench.AccountDetailsBench.writePbj") +// includes.add("com.hedera.pbj.integration.jmh.JsonBench.AccountDetailsBench.writePbjStreaming") +// includes.add("com.hedera.pbj.integration.jmh.BufferedDataBench") +// includes.add("com.hedera.pbj.integration.jmh.ProtobufObjectBench*") + profilers.add("async:libPath=/Users/jasperpotts/code/async-profiler-4.0-macos/lib/libasyncProfiler.dylib;output=jfr") +} // Avoid a clash with Google protoc models when .proto files don't specify `pbj.java_package`: pbj { javaPackageSuffix = ".pbj.integration.tests" } diff --git a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/BufferedDataBench.java b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/BufferedDataBench.java new file mode 100644 index 00000000..e83161b3 --- /dev/null +++ b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/BufferedDataBench.java @@ -0,0 +1,71 @@ +package com.hedera.pbj.integration.jmh; + +import com.hedera.pbj.runtime.io.buffer.BufferedData; +import java.util.concurrent.TimeUnit; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.TearDown; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; + +@SuppressWarnings("unused") +@Fork(1) +@Warmup(iterations = 5, time = 2) +@Measurement(iterations = 10, time = 2) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@BenchmarkMode(Mode.AverageTime) +public class BufferedDataBench { + public static final byte[] TEST_BYTES = "fooBar".getBytes(); + + @SuppressWarnings("rawtypes") + @State(Scope.Benchmark) + public static class BufferedDataBenchState { + private BufferedData smallBuffer; + private BufferedData offsetBuffer; + + public BufferedDataBenchState() { + } + + @Setup(Level.Invocation) + public void setup() { + smallBuffer = BufferedData.wrap(new byte[1024]); + offsetBuffer = BufferedData.wrap(new byte[1024*4],1024, 2048); + } + + @TearDown(Level.Invocation) + public void tearDown() { + smallBuffer = null; + offsetBuffer = null; + } + } + + + @Benchmark + public void writeByteSmallBuffer(BufferedDataBenchState benchmarkState, Blackhole blackhole) { + benchmarkState.smallBuffer.writeByte((byte) 100); + } + + @Benchmark + public void writeByteOffsetBuffer(BufferedDataBenchState benchmarkState, Blackhole blackhole) { + benchmarkState.offsetBuffer.writeByte((byte) 100); + } + + @Benchmark + public void writeBytesSmallBuffer(BufferedDataBenchState benchmarkState, Blackhole blackhole) { + benchmarkState.smallBuffer.writeBytes(TEST_BYTES); + } + + @Benchmark + public void writeBytesOffsetBuffer(BufferedDataBenchState benchmarkState, Blackhole blackhole) { + benchmarkState.offsetBuffer.writeBytes(TEST_BYTES); + } + +} diff --git a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/JsonBench.java b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/JsonBench.java index 397a27e4..36704d6d 100644 --- a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/JsonBench.java +++ b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/JsonBench.java @@ -12,8 +12,10 @@ import com.hedera.pbj.runtime.JsonCodec; import com.hedera.pbj.runtime.ParseException; import com.hedera.pbj.runtime.io.buffer.BufferedData; +import com.hedera.pbj.runtime.io.stream.WritableStreamingData; import com.hedera.pbj.test.proto.pbj.Everything; import com.hederahashgraph.api.proto.java.GetAccountDetailsResponse; +import java.io.ByteArrayOutputStream; import java.io.IOException; import java.util.concurrent.TimeUnit; import java.util.function.Supplier; @@ -31,8 +33,8 @@ @SuppressWarnings("unused") @Fork(1) -@Warmup(iterations = 2, time = 2) -@Measurement(iterations = 5, time = 2) +@Warmup(iterations = 5, time = 2) +@Measurement(iterations = 10, time = 2) @OutputTimeUnit(TimeUnit.NANOSECONDS) @BenchmarkMode(Mode.AverageTime) public abstract class JsonBench { @@ -52,6 +54,9 @@ public static class JsonBenchmarkState { // output buffers private BufferedData outDataBuffer; + private ByteArrayOutputStream outOutputStream; + private WritableStreamingData outStreamingData; + public void configure( P pbjModelObject, @@ -81,7 +86,9 @@ public void configure( // input buffers // output buffers - this.outDataBuffer = BufferedData.allocate(jsonString.length()); + this.outDataBuffer = BufferedData.allocate(jsonString.length()*2); + this.outOutputStream = new ByteArrayOutputStream(jsonString.length()*2); + this.outStreamingData = new WritableStreamingData(outOutputStream); } catch (IOException e) { e.getStackTrace(); System.err.flush(); @@ -112,6 +119,13 @@ public void writePbj(JsonBenchmarkState benchmarkState, Blackhole blackhol blackhole.consume(benchmarkState.outDataBuffer); } + @Benchmark + public void writePbjStreaming(JsonBenchmarkState benchmarkState, Blackhole blackhole) throws IOException { + benchmarkState.outOutputStream.reset(); + benchmarkState.pbjJsonCodec.write(benchmarkState.pbjModelObject, benchmarkState.outStreamingData); + blackhole.consume(benchmarkState.outOutputStream.toByteArray()); + } + @Benchmark public void writeProtoC(JsonBenchmarkState benchmarkState, Blackhole blackhole) throws InvalidProtocolBufferException { From 8bd65aab12c5be490beeb86124a975663d4cb70f Mon Sep 17 00:00:00 2001 From: Jasper Potts <1466205+jasperpotts@users.noreply.github.com> Date: Wed, 28 May 2025 12:46:11 -0700 Subject: [PATCH 2/5] Bunch more performance improvements Signed-off-by: Jasper Potts <1466205+jasperpotts@users.noreply.github.com> --- .../impl/generators/TestGenerator.java | 28 +- .../generators/json/JsonCodecGenerator.java | 74 +++++ .../json/JsonCodecWriteMethodGenerator.java | 11 +- .../com/hedera/pbj/runtime/JsonTools.java | 264 +++++++++--------- .../hedera/pbj/runtime/ProtoTestTools.java | 19 +- .../runtime/io/WritableSequentialData.java | 38 ++- .../io/buffer/ByteArrayBufferedData.java | 142 ++++++++-- .../hedera/pbj/runtime/io/buffer/Bytes.java | 40 +++ .../hedera/pbj/runtime/json/JsonLexer.java | 12 +- .../pbj/runtime/io/WritableTestBase.java | 23 ++ 10 files changed, 459 insertions(+), 192 deletions(-) diff --git a/pbj-core/pbj-compiler/src/main/java/com/hedera/pbj/compiler/impl/generators/TestGenerator.java b/pbj-core/pbj-compiler/src/main/java/com/hedera/pbj/compiler/impl/generators/TestGenerator.java index 16ad7775..babb264f 100644 --- a/pbj-core/pbj-compiler/src/main/java/com/hedera/pbj/compiler/impl/generators/TestGenerator.java +++ b/pbj-core/pbj-compiler/src/main/java/com/hedera/pbj/compiler/impl/generators/TestGenerator.java @@ -318,7 +318,7 @@ private static String generateTestMethod(final String modelClassName, final Stri final var dataBuffer2 = getThreadLocalDataBuffer2(); final var byteBuffer = getThreadLocalByteBuffer(); final var charBuffer = getThreadLocalCharBuffer(); - final var charBuffer2 = getThreadLocalCharBuffer2(); + final var dataBufferLarge = getThreadLocalBigBuffer(); // model to bytes with PBJ $modelClassName.PROTOBUF.write(modelObj, dataBuffer); @@ -377,21 +377,25 @@ private static String generateTestMethod(final String modelClassName, final Stri dataBuffer3.getBytes(0, readBytes); assertArrayEquals(bytes.toByteArray(), readBytes); - // Test JSON Writing - final CharBufferToWritableSequentialData charBufferToWritableSequentialData = new CharBufferToWritableSequentialData(charBuffer); - $modelClassName.JSON.write(modelObj,charBufferToWritableSequentialData); + // Write JSON with PBJ + $modelClassName.JSON.write(modelObj,dataBufferLarge); + dataBufferLarge.flip(); + final byte[] pbjJsonBytes = new byte[(int)dataBufferLarge.length()]; + dataBufferLarge.getBytes(0, pbjJsonBytes); + final String pbjJsonString = new String(pbjJsonBytes, StandardCharsets.UTF_8); + // Write JSON with ProtoC + JsonFormat.printer().appendTo(protoCModelObj, charBuffer); charBuffer.flip(); - JsonFormat.printer().appendTo(protoCModelObj, charBuffer2); - charBuffer2.flip(); - assertEquals(charBuffer2, charBuffer); + final String jsonString = charBuffer.toString(); + final byte[] protoCJsonBytes = jsonString.getBytes(StandardCharsets.UTF_8); + // compare JSON string then bytes + assertEquals(jsonString, pbjJsonString); + assertArrayEquals(protoCJsonBytes, pbjJsonBytes); // Test JSON Reading - String jsonString = charBuffer.toString(); - System.out.println("jsonString: " + jsonString); - System.out.flush(); try{ - final $modelClassName jsonReadPbj = $modelClassName.JSON.parse(BufferedData.wrap(jsonString.getBytes( - StandardCharsets.UTF_8)), false, Integer.MAX_VALUE); + final $modelClassName jsonReadPbj = $modelClassName.JSON.parse(BufferedData.wrap(protoCJsonBytes + ), false, Integer.MAX_VALUE); assertEquals(modelObj, jsonReadPbj); } catch (Exception e) { System.err.println("JSON read: " + jsonString); diff --git a/pbj-core/pbj-compiler/src/main/java/com/hedera/pbj/compiler/impl/generators/json/JsonCodecGenerator.java b/pbj-core/pbj-compiler/src/main/java/com/hedera/pbj/compiler/impl/generators/json/JsonCodecGenerator.java index 15518651..3369f559 100644 --- a/pbj-core/pbj-compiler/src/main/java/com/hedera/pbj/compiler/impl/generators/json/JsonCodecGenerator.java +++ b/pbj-core/pbj-compiler/src/main/java/com/hedera/pbj/compiler/impl/generators/json/JsonCodecGenerator.java @@ -11,6 +11,7 @@ import com.hedera.pbj.compiler.impl.generators.Generator; import com.hedera.pbj.compiler.impl.grammar.Protobuf3Parser; import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; @@ -78,6 +79,7 @@ public void generate( * JSON Codec for $modelClass model object. Generated based on protobuf schema. */ public final$staticModifier class $codecClass implements JsonCodec<$modelClass> { + $fieldNameConstants /** * Empty constructor @@ -91,6 +93,7 @@ public void generate( $writeMethod """ + .replace("$fieldNameConstants", generateFieldNameConstants(fields)) .replace("$modelClass", modelClassName) .replace("$staticModifier", staticModifier) .replace("$codecClass", codecClassName) @@ -109,6 +112,77 @@ public void generate( writer.append("}"); } + /** + * Generates the field name constants for the fields in the message. The field names are converted to JSON field + * names then UTF-8 encoded as byte arrays. The byte arrays are then stored as static final fields in the generated + * JSON codec class. This is done to avoid all the conversion logic at runtime. + * + * @param fields the list of fields in the message + * @return a CharSequence containing the field name constants + */ + private CharSequence generateFieldNameConstants(List fields) { + final StringBuilder sb = new StringBuilder(); + for (final var field : fields) { + // check if field is oneof + if (field instanceof OneOfField) { + // handle one of child fields + final OneOfField oneOfField = (OneOfField) field; + for (final var childField : oneOfField.fields()) { + sb.append(" private static final byte[] "); + sb.append(getFieldNameConstantName(childField)); + sb.append(" = new byte[] {"); + byte[] fieldNameBytes = toJsonFieldName(childField.name()).getBytes(StandardCharsets.UTF_8); + for (int i = 0; i < fieldNameBytes.length; i++) { + int b = fieldNameBytes[i] & 0xFF; + if (fieldNameBytes[i] < 0) { + sb.append("(byte)"); + } + sb.append(String.format("0x%02X", b)); + if (i < fieldNameBytes.length - 1) { + sb.append(", "); + } + } + sb.append("};\n"); + } + } else { + sb.append(" private static final byte[] "); + sb.append(getFieldNameConstantName(field)); + sb.append(" = new byte[] {"); + byte[] fieldNameBytes = toJsonFieldName(field.name()).getBytes(StandardCharsets.UTF_8); + for (int i = 0; i < fieldNameBytes.length; i++) { + int b = fieldNameBytes[i] & 0xFF; + if (fieldNameBytes[i] < 0) { + sb.append("(byte)"); + } + sb.append(String.format("0x%02X", b)); + if (i < fieldNameBytes.length - 1) { + sb.append(", "); + } + } + sb.append("};\n"); + } + } + // remove the first line indent + if (!sb.isEmpty()) { + sb.delete(0, 4); + } + return sb; + } + + /** + * Generates the constant name for a field name. + * + * @param field the field + * @return the constant name for the field name + */ + static String getFieldNameConstantName(Field field) { + // check if the field name is not snake case but camel case or pascal case, if so convert it to snake case + final String fieldName = field.name().replaceAll("([a-z])([A-Z])", "$1_$2") + .replaceAll("([A-Z])([A-Z][a-z])", "$1_$2") + .toUpperCase(); + return "FIELD_NAME_" + fieldName; + } + /** * Converts a field name to a JSON field name. * diff --git a/pbj-core/pbj-compiler/src/main/java/com/hedera/pbj/compiler/impl/generators/json/JsonCodecWriteMethodGenerator.java b/pbj-core/pbj-compiler/src/main/java/com/hedera/pbj/compiler/impl/generators/json/JsonCodecWriteMethodGenerator.java index 62a386fc..2e459cbf 100644 --- a/pbj-core/pbj-compiler/src/main/java/com/hedera/pbj/compiler/impl/generators/json/JsonCodecWriteMethodGenerator.java +++ b/pbj-core/pbj-compiler/src/main/java/com/hedera/pbj/compiler/impl/generators/json/JsonCodecWriteMethodGenerator.java @@ -2,7 +2,7 @@ package com.hedera.pbj.compiler.impl.generators.json; import static com.hedera.pbj.compiler.impl.Common.DEFAULT_INDENT; -import static com.hedera.pbj.compiler.impl.generators.json.JsonCodecGenerator.toJsonFieldName; +import static com.hedera.pbj.compiler.impl.generators.json.JsonCodecGenerator.getFieldNameConstantName; import com.hedera.pbj.compiler.impl.Common; import com.hedera.pbj.compiler.impl.Field; @@ -47,8 +47,7 @@ public void write(@NonNull $modelClass data, @NonNull WritableSequentialData out if (inline) { out.writeByte(OPEN_OBJECT); } else { - out.writeBytes(indentBytes); - out.writeByte2(OPEN_OBJECT, NL); + out.writeByte(OPEN_OBJECT); } // write field lines boolean isFirstField = true; @@ -78,8 +77,8 @@ public void write(@NonNull $modelClass data, @NonNull WritableSequentialData out */ private static String generateFieldWriteLines(final Field field, final String modelClassName, String getValueCode) { final String fieldDef = Common.camelToUpperSnake(field.name()); - final String fieldName = '\"' + toJsonFieldName(field.name()) + '\"'; - final String basicFieldCode = generateBasicFieldLines(field, getValueCode, fieldDef, fieldName, "initialIndent+indentStep+indentStep"); // todo replace indent*2 with childIndentBytes + final String fieldName = getFieldNameConstantName(field); + final String basicFieldCode = generateBasicFieldLines(field, getValueCode, fieldDef, fieldName, "initialIndent+indentStep"); // todo replace indent*2 with childIndentBytes StringBuilder sb = new StringBuilder(); sb.append("// [" + field.fieldNumber() + "] - " + field.name() + "\n"); @@ -103,7 +102,7 @@ private static String generateFieldWriteLines(final Field field, final String mo + ") {\n"); } } - sb.append(" if (isFirstField) { isFirstField = false; } else { out.writeByte2(COMMA, NL); }\n"); + sb.append(" if (isFirstField) { isFirstField = false; if(!inline) out.writeByte(NL); } else { out.writeByte2(COMMA, NL); }\n"); sb.append(" out.writeBytes(childIndentBytes);\n"); sb.append(" "+ basicFieldCode + ";\n"); sb.append("}"); diff --git a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/JsonTools.java b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/JsonTools.java index 33917249..c6f3b57f 100644 --- a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/JsonTools.java +++ b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/JsonTools.java @@ -5,6 +5,7 @@ import com.hedera.pbj.runtime.io.buffer.Bytes; import edu.umd.cs.findbugs.annotations.NonNull; import edu.umd.cs.findbugs.annotations.Nullable; +import java.nio.charset.StandardCharsets; import java.util.Base64; import java.util.List; import java.util.Map; @@ -13,54 +14,8 @@ * Class of static utility methods for working with JSON. All generated JSON is designed to be * 100% identical to that generated by Google Protobuf library. */ -@SuppressWarnings("ForLoopReplaceableByForEach") +@SuppressWarnings({"ForLoopReplaceableByForEach", "JavaExistingMethodCanBeUsed"}) public final class JsonTools { - - // ==================================================================================================== - // Conversion Utility Methods - - /** - * Convert a protobuf field name to a protobuf spec json field name. This is based directly on the code - * from protobuf library so that it matches exactly. - * - * @param fieldName the protobuf field name to convert - * @return the protobuf spec json field name - */ - public static String toJsonFieldName(String fieldName) { - // based directly on protoc so output matches - final int length = fieldName.length(); - StringBuilder result = new StringBuilder(length); - boolean isNextUpperCase = false; - for (int i = 0; i < length; i++) { - char ch = fieldName.charAt(i); - if (ch == '_') { - isNextUpperCase = true; - } else if (isNextUpperCase) { - // This closely matches the logic for ASCII characters in: - // http://google3/google/protobuf/descriptor.cc?l=249-251&rcl=228891689 - if ('a' <= ch && ch <= 'z') { - ch = (char) (ch - 'a' + 'A'); - } - result.append(ch); - isNextUpperCase = false; - } else { - result.append(ch); - } - } - return result.toString(); - } - - /** - * Unescape a string that was escaped by replacing new lines with \n or \r. - * - * @param string the string with escapes to unescape - * @return the unescaped string, null if input string is null - */ - public static String unescape(@Nullable String string) { - if (string == null) return null; - return string.replaceAll("\\\\n", "\n").replaceAll("\\\\r", "\r"); - } - public static final byte OPEN_OBJECT = '{'; public static final byte CLOSE_OBJECT = '}'; public static final byte OPEN_ARRAY = '['; @@ -69,6 +24,7 @@ public static String unescape(@Nullable String string) { public static final byte SPACE = ' '; public static final byte COLON = ':'; public static final byte COMMA = ','; + public static final byte QUOTE = '"'; public static final byte n = 'n'; public static final byte u = 'u'; public static final byte l = 'l'; @@ -79,14 +35,15 @@ public static String unescape(@Nullable String string) { /** * Object field to JSON string * - * @param fieldName the name of the field + * @param fieldNameBytes the name of the field already encoded as bytes * @param value the value of the field */ - public static void field(@NonNull WritableSequentialData out, int indent, String fieldName, JsonCodec codec, @Nullable final T value) { - out.writeJsonString(toJsonFieldName(fieldName), true); - out.writeByte2(COLON, SPACE); + public static void field(@NonNull WritableSequentialData out, int indent, byte[] fieldNameBytes, JsonCodec codec, @Nullable final T value) { + out.writeByte(QUOTE); + out.writeBytes(fieldNameBytes); + out.writeByte3(QUOTE, COLON, SPACE); if (value != null) { - codec.write(value, out, 0, 2, true); // TODO replace indent.length with indent + codec.write(value, out, indent, 2, false); // TODO replace indent.length with indent } else { out.writeByte4(n,u,l,l); } @@ -95,71 +52,82 @@ public static void field(@NonNull WritableSequentialData out, int indent, St /** * String field to JSON string * - * @param fieldName the name of the field + * @param fieldNameBytes the name of the field already encoded as bytes * @param value the value of the field */ - public static void field(@NonNull WritableSequentialData out, String fieldName, String value) { - out.writeJsonString(fieldName, true); - out.writeByte2(COLON, SPACE); + public static void field(@NonNull WritableSequentialData out, byte[] fieldNameBytes, String value) { + out.writeByte(QUOTE); + out.writeBytes(fieldNameBytes); + out.writeByte3(QUOTE, COLON, SPACE); out.writeJsonString(value, true); } /** * String field to JSON string * - * @param fieldName the name of the field + * @param fieldNameBytes the name of the field already encoded as bytes * @param value the value of the field */ - public static void fieldUnquoted(@NonNull WritableSequentialData out, String fieldName, String value) { - out.writeJsonString(fieldName, true); - out.writeByte2(COLON, SPACE); + public static void fieldUnquoted(@NonNull WritableSequentialData out, byte[] fieldNameBytes, String value) { + out.writeByte(QUOTE); + out.writeBytes(fieldNameBytes); + out.writeByte3(QUOTE, COLON, SPACE); out.writeJsonString(value, false); } /** * String field to JSON string * - * @param fieldName the name of the field + * @param fieldNameBytes the name of the field already encoded as bytes */ - public static void fieldNull(@NonNull WritableSequentialData out, String fieldName) { - out.writeJsonString(fieldName, true); - out.writeByte2(COLON, SPACE); + public static void fieldNull(@NonNull WritableSequentialData out, byte[] fieldNameBytes) { + out.writeByte(QUOTE); + out.writeBytes(fieldNameBytes); + out.writeByte3(QUOTE, COLON, SPACE); out.writeByte4(n,u,l,l); } /** * Bytes field to JSON string * - * @param fieldName the name of the field + * @param fieldNameBytes the name of the field already encoded as bytes * @param value the value of the field */ - public static void field(@NonNull WritableSequentialData out, String fieldName, Bytes value) { - String rawValue = value.toBase64(); - field(out, fieldName, rawValue); + public static void field(@NonNull WritableSequentialData out, byte[] fieldNameBytes, Bytes value) { + out.writeByte(QUOTE); + out.writeBytes(fieldNameBytes); + out.writeByte4(QUOTE, COLON, SPACE,QUOTE); + out.writeBase64(value); + out.writeByte(QUOTE); } /** * Byte array field to JSON string * - * @param fieldName the name of the field + * @param fieldNameBytes the name of the field already encoded as bytes * @param value the value of the field */ - public static void field(@NonNull WritableSequentialData out, String fieldName, byte[] value) { + public static void field(@NonNull WritableSequentialData out, byte[] fieldNameBytes, byte[] value) { String rawValue = Base64.getEncoder().encodeToString(value); - field(out, fieldName, rawValue); + out.writeByte(QUOTE); + out.writeBytes(fieldNameBytes); + out.writeByte3(QUOTE, COLON, SPACE); + out.writeJsonString(rawValue, true); } /** * Map field to JSON string * - * @param fieldName the name of the field + * @param fieldNameBytes the name of the field already encoded as bytes * @param value the value of the field * @param vComposer a composer of a "key":value strings - basically, a JsonTools::field method for the value type */ public static void field(@NonNull WritableSequentialData out, - String fieldName, Map value, FieldFunction vComposer) { + byte[] fieldNameBytes, Map value, FieldFunction vComposer) { assert !value.isEmpty(); - out.writeJsonString(fieldName, true); + out.writeByte(QUOTE); + out.writeBytes(fieldNameBytes); + out.writeByte(QUOTE); out.writeByte4(COLON, SPACE, OPEN_OBJECT, NL); PbjMap pbjMap = (PbjMap) value; for (int i = 0; i < pbjMap.size(); i++) { @@ -170,163 +138,198 @@ public static void field(@NonNull WritableSequentialData out, V v = pbjMap.get(k); // String keyStr = kEncoder.apply(k); // TODO: implement a key encoder - vComposer.write(out, k.toString(), v); // TODO pass in indent + vComposer.write(out, k.toString().getBytes(StandardCharsets.UTF_8), v); // TODO pass in indent } - out.writeByte(CLOSE_OBJECT); + out.writeByte2(NL, CLOSE_OBJECT); } public interface FieldFunction { - void write(@NonNull WritableSequentialData out, String fieldName, T value); + void write(@NonNull WritableSequentialData out, byte[] fieldNameBytes, T value); } /** * Primitive boolean field to JSON string * - * @param fieldName the name of the field + * @param fieldNameBytes the name of the field already encoded as bytes * @param value the value of the field */ - public static void field(@NonNull WritableSequentialData out, String fieldName, boolean value) { - fieldUnquoted(out, fieldName, value ? "true" : "false"); + public static void field(@NonNull WritableSequentialData out, byte[] fieldNameBytes, boolean value) { + fieldUnquoted(out, fieldNameBytes, value ? "true" : "false"); } /** * Primitive int field to JSON string * - * @param fieldName the name of the field + * @param fieldNameBytes the name of the field already encoded as bytes * @param value the value of the field */ - public static void field(@NonNull WritableSequentialData out, String fieldName, int value) { - fieldUnquoted(out, fieldName, Integer.toString(value)); + public static void field(@NonNull WritableSequentialData out, byte[] fieldNameBytes, int value) { + out.writeByte(QUOTE); + out.writeBytes(fieldNameBytes); + out.writeByte3(QUOTE, COLON, SPACE); + out.writeJsonLong(value, false); } /** * Primitive long field to JSON string * - * @param fieldName the name of the field + * @param fieldNameBytes the name of the field already encoded as bytes * @param value the value of the field */ - public static void field(@NonNull WritableSequentialData out, String fieldName, long value) { - field(out, fieldName, Long.toString(value)); + public static void field(@NonNull WritableSequentialData out, byte[] fieldNameBytes, long value) { + out.writeByte(QUOTE); + out.writeBytes(fieldNameBytes); + out.writeByte3(QUOTE, COLON, SPACE); + out.writeJsonLong(value, true); } /** * Primitive float field to JSON string * - * @param fieldName the name of the field + * @param fieldNameBytes the name of the field already encoded as bytes * @param value the value of the field */ - public static void field(@NonNull WritableSequentialData out, String fieldName, float value) { + public static void field(@NonNull WritableSequentialData out, byte[] fieldNameBytes, float value) { if (Float.isNaN(value)) { - field(out, fieldName, "NaN"); + out.writeByte(QUOTE); + out.writeBytes(fieldNameBytes); + out.writeByte3(QUOTE, COLON, SPACE); + out.writeJsonString("NaN", true); } else if (Float.isInfinite(value)) { - field(out, fieldName, value < 0 ? "-Infinity" : "Infinity"); + String value1 = value < 0 ? "-Infinity" : "Infinity"; + out.writeByte(QUOTE); + out.writeBytes(fieldNameBytes); + out.writeByte3(QUOTE, COLON, SPACE); + out.writeJsonString(value1, true); } else { - field(out, fieldName, Float.toString(value)); + String value1 = Float.toString(value); + out.writeByte(QUOTE); + out.writeBytes(fieldNameBytes); + out.writeByte3(QUOTE, COLON, SPACE); + out.writeJsonString(value1, false); } } /** * Primitive double field to JSON string * - * @param fieldName the name of the field + * @param fieldNameBytes the name of the field already encoded as bytes * @param value the value of the field */ - public static void field(@NonNull WritableSequentialData out, String fieldName, double value) { + public static void field(@NonNull WritableSequentialData out, byte[] fieldNameBytes, double value) { if (Double.isNaN(value)) { - field(out, fieldName, "NaN"); + out.writeByte(QUOTE); + out.writeBytes(fieldNameBytes); + out.writeByte3(QUOTE, COLON, SPACE); + out.writeJsonString("NaN", true); } else if (Double.isInfinite(value)) { - field(out, fieldName, value < 0 ? "-Infinity" : "Infinity"); + String value1 = value < 0 ? "-Infinity" : "Infinity"; + out.writeByte(QUOTE); + out.writeBytes(fieldNameBytes); + out.writeByte3(QUOTE, COLON, SPACE); + out.writeJsonString(value1, true); } else { - field(out, fieldName, Double.toString(value)); + String value1 = Double.toString(value); + out.writeByte(QUOTE); + out.writeBytes(fieldNameBytes); + out.writeByte3(QUOTE, COLON, SPACE); + out.writeJsonString(value1, false); } } /** * Boxed Boolean field to JSON string * - * @param fieldName the name of the field + * @param fieldNameBytes the name of the field already encoded as bytes * @param value the value of the field */ - public static void field(@NonNull WritableSequentialData out, String fieldName, Boolean value) { + public static void field(@NonNull WritableSequentialData out, byte[] fieldNameBytes, Boolean value) { if (value == null) { - fieldNull(out, fieldName); + fieldNull(out, fieldNameBytes); } else { - fieldUnquoted(out, fieldName, value.toString()); + fieldUnquoted(out, fieldNameBytes, value.toString()); } } /** * Boxed Integer field to JSON string * - * @param fieldName the name of the field + * @param fieldNameBytes the name of the field already encoded as bytes * @param value the value of the field */ - public static void field(@NonNull WritableSequentialData out, String fieldName, Integer value) { + public static void field(@NonNull WritableSequentialData out, byte[] fieldNameBytes, Integer value) { if (value == null) { - fieldNull(out, fieldName); + fieldNull(out, fieldNameBytes); } else { - field(out, fieldName, value.intValue()); + field(out, fieldNameBytes, value.intValue()); } } /** * Boxed Long field to JSON string * - * @param fieldName the name of the field + * @param fieldNameBytes the name of the field already encoded as bytes * @param value the value of the field */ - public static void field(@NonNull WritableSequentialData out, String fieldName, Long value, boolean quote) { + public static void field(@NonNull WritableSequentialData out, byte[] fieldNameBytes, Long value, boolean quote) { if (value == null) { - fieldNull(out, fieldName); + fieldNull(out, fieldNameBytes); } else if (quote) { - field(out, fieldName, Long.toString(value)); + out.writeByte(QUOTE); + out.writeBytes(fieldNameBytes); + out.writeByte3(QUOTE, COLON, SPACE); + out.writeJsonString(Long.toString(value), true); } else { - fieldUnquoted(out, fieldName, Long.toString(value)); + fieldUnquoted(out, fieldNameBytes, Long.toString(value)); } } /** * Boxed Float field to JSON string * - * @param fieldName the name of the field + * @param fieldNameBytes the name of the field already encoded as bytes * @param value the value of the field */ - public static void field(@NonNull WritableSequentialData out, String fieldName, Float value) { + public static void field(@NonNull WritableSequentialData out, byte[] fieldNameBytes, Float value) { if (value == null) { - fieldNull(out, fieldName); + fieldNull(out, fieldNameBytes); } else { - field(out, fieldName, value.floatValue()); + field(out, fieldNameBytes, value.floatValue()); } } /** * Boxed Double field to JSON string * - * @param fieldName the name of the field + * @param fieldNameBytes the name of the field already encoded as bytes * @param value the value of the field */ - public static void field(@NonNull WritableSequentialData out, String fieldName, Double value) { + public static void field(@NonNull WritableSequentialData out, byte[] fieldNameBytes, Double value) { if (value == null) { - fieldNull(out, fieldName); + fieldNull(out, fieldNameBytes); } else { - field(out, fieldName, value.doubleValue()); + field(out, fieldNameBytes, value.doubleValue()); } } /** * Array field of primitives to JSON string * - * @param fieldName the name of the field + * @param fieldNameBytes the name of the field already encoded as bytes * @param items the items in the array * @param the type of the items in the array */ - public static void arrayField(@NonNull WritableSequentialData out, String fieldName, FieldDefinition fieldDefinition, List items) { + public static void arrayField(@NonNull WritableSequentialData out, byte[] fieldNameBytes, FieldDefinition fieldDefinition, List items) { if (items != null) { if (items.isEmpty()) { - out.writeJsonString(fieldName, true); + out.writeByte(QUOTE); + out.writeBytes(fieldNameBytes); + out.writeByte(QUOTE); out.writeByte4(COLON, SPACE, OPEN_ARRAY, CLOSE_ARRAY); } else { - out.writeJsonString(fieldName, true); + out.writeByte(QUOTE); + out.writeBytes(fieldNameBytes); + out.writeByte(QUOTE); out.writeByte3(COLON, SPACE, OPEN_ARRAY); boolean isFirst = true; for (int i = 0; i < items.size(); i++) { @@ -341,7 +344,11 @@ public static void arrayField(@NonNull WritableSequentialData out, String fi } else { switch (fieldDefinition.type()) { case STRING -> out.writeJsonString((String) item, true); - case BYTES -> out.writeJsonString(((Bytes) item).toBase64(), true); + case BYTES -> { + out.writeByte(QUOTE); + out.writeBase64((Bytes) item); + out.writeByte(QUOTE); + } case INT32, SINT32, UINT32, FIXED32, SFIXED32 -> out.writeJsonString(Integer.toString((Integer) item), false); case INT64, SINT64, UINT64, FIXED64, SFIXED64 -> @@ -365,21 +372,22 @@ public static void arrayField(@NonNull WritableSequentialData out, String fi * Array field of objects to JSON string * * @param indent the indent to use for generated JSON - * @param fieldName the name of the field + * @param fieldNameBytes the name of the field already encoded as bytes * @param codec the codec to use for encoding the items * @param items the items in the array * @param the type of the items in the array */ - public static void arrayField(@NonNull WritableSequentialData out, int indent, String fieldName, JsonCodec codec, List items) { + public static void arrayField(@NonNull WritableSequentialData out, int indent, byte[] fieldNameBytes, JsonCodec codec, List items) { if (items != null) { if (items.isEmpty()) { - fieldUnquoted(out, fieldName, "[]"); + fieldUnquoted(out, fieldNameBytes, "[]"); } else { - out.writeJsonString(toJsonFieldName(fieldName), true); - out.writeByte3(COLON, SPACE, OPEN_ARRAY); + out.writeByte(QUOTE); + out.writeBytes(fieldNameBytes); + out.writeByte4(QUOTE, COLON, SPACE, OPEN_ARRAY); for (int i = 0; i < items.size(); i++) { var item = items.get(i); - codec.write(item, out, indent, 2, true); // TODO replace 0, 2 with indent + codec.write(item, out, indent, 2, false); if (i < items.size() - 1) { out.writeByte2(COMMA, SPACE); } diff --git a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/ProtoTestTools.java b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/ProtoTestTools.java index 1d37cd55..4ba806fd 100644 --- a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/ProtoTestTools.java +++ b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/ProtoTestTools.java @@ -21,6 +21,9 @@ public final class ProtoTestTools { /** Size for reusable test buffers */ private static final int BUFFER_SIZE = 1024 * 1024; + /** Size for reusable large test buffers */ + private static final int LARGE_BUFFER_SIZE = 16 * 1024 * 1024; + /** Size for reusable test char buffers */ private static final int CHAR_BUFFER_SIZE = 8 * 1024 * 1024; @@ -42,9 +45,9 @@ private ProtoTestTools() {} private static final ThreadLocal THREAD_LOCAL_CHAR_BUFFERS = ThreadLocal.withInitial(() -> CharBuffer.allocate(CHAR_BUFFER_SIZE)); - /** Thread local set of reusable char buffers */ - private static final ThreadLocal THREAD_LOCAL_CHAR_BUFFERS_2 = - ThreadLocal.withInitial(() -> CharBuffer.allocate(CHAR_BUFFER_SIZE)); + /** Thread local set of reusable large buffers */ + private static final ThreadLocal THREAD_LOCAL_BIG_BUFFERS = + ThreadLocal.withInitial(() -> BufferedData.allocate(LARGE_BUFFER_SIZE)); /** * Get the thread local instance of DataBuffer, reset and ready to use. @@ -91,13 +94,13 @@ public static CharBuffer getThreadLocalCharBuffer() { } /** - * Get the thread local instance of CharBuffer, reset and ready to use. + * Get the thread local large instance of BufferedData, reset and ready to use. * - * @return a ByteBuffer that can be reused by current thread + * @return a BufferedData that can be reused by current thread */ - public static CharBuffer getThreadLocalCharBuffer2() { - final var local = THREAD_LOCAL_CHAR_BUFFERS_2.get(); - local.clear(); + public static BufferedData getThreadLocalBigBuffer() { + final var local = THREAD_LOCAL_BIG_BUFFERS.get(); + local.reset(); return local; } diff --git a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/WritableSequentialData.java b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/WritableSequentialData.java index 103d02da..0a3da1bb 100644 --- a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/WritableSequentialData.java +++ b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/WritableSequentialData.java @@ -2,6 +2,7 @@ package com.hedera.pbj.runtime.io; import com.hedera.pbj.runtime.io.buffer.BufferedData; +import com.hedera.pbj.runtime.io.buffer.Bytes; import com.hedera.pbj.runtime.io.buffer.RandomAccessData; import edu.umd.cs.findbugs.annotations.NonNull; import java.io.IOException; @@ -11,6 +12,7 @@ import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.charset.StandardCharsets; +import java.util.Base64; import java.util.Objects; /** @@ -235,17 +237,39 @@ default void writeUTF8(@NonNull final String value) { */ default void writeJsonString(@NonNull final String value, boolean quoted) { String escaped = value - .replaceAll("\\\\", "\\\\") - .replaceAll("\"", "\\\"") - .replaceAll("\n", "\\n") - .replaceAll("\r", "\\r") - .replaceAll("\t", "\\t") - .replaceAll("\f", "\\f") - .replaceAll("\b", "\\b"); + .replaceAll("\"", "\\\\\"") + .replaceAll("\n", "\\\\n") + .replaceAll("\r", "\\\\r") + .replaceAll("\t", "\\\\t") + .replaceAll("\f", "\\\\f") + .replaceAll("\b", "\\\\b") + .replaceAll("\\\\", "\\\\"); if (quoted) escaped = '"' + escaped + '"'; writeBytes(escaped.getBytes(StandardCharsets.UTF_8)); } + /** + * Writes an int or long value as a JSON string, with quotes if requested. + * + * @param value The long value to write + * @param quoted If true, the value is written as a JSON string with quotes + */ + default void writeJsonLong(final long value, boolean quoted) { + if (quoted) writeByte((byte) '"'); + writeBytes(Long.toString(value).getBytes(StandardCharsets.UTF_8)); + if (quoted) writeByte((byte)'"'); + } + + /** + * Writes the given byte array as a Base64 encoded string to this {@link WritableSequentialData}. + * + * @param dataToEncode The byte array to encode in Base64 + * @throws UncheckedIOException if an I/O error occurs + */ + default void writeBase64(@NonNull final Bytes dataToEncode) { + writeBytes(Base64.getEncoder().encode(dataToEncode.toByteArray())); + } + /** * Writes four bytes containing the given int value, in the standard Java big-endian byte order, at the current * {@link #position()}, and then increments the {@link #position()} by four. diff --git a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/ByteArrayBufferedData.java b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/ByteArrayBufferedData.java index 1d315d6a..e6582bd3 100644 --- a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/ByteArrayBufferedData.java +++ b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/ByteArrayBufferedData.java @@ -388,48 +388,130 @@ public int writeBytes(@NonNull final InputStream src, final int maxLength) { public void writeJsonString(@NonNull String value, boolean quoted) { int offset = buffer.position(); final int len = value.length(); - validateCanWrite(len + 2); // TODO this is not really correct - if(quoted) array[offset++] = '"'; + validateCanWrite(len * 6L + 2); // TODO Worst-case scenario for UTF-8 encoding, is there better estimate? + if (quoted) array[arrayOffset + offset++] = '"'; for (int i = 0; i < len; i++) { char c = value.charAt(i); // Escape control chars and JSON specials switch (c) { - case '"': array[offset++] = '\\'; array[offset++] = '"'; continue; - case '\\': array[offset++] = '\\'; array[offset++] = '\\'; continue; - case '\b': array[offset++] = '\\'; array[offset++] = 'b'; continue; - case '\f': array[offset++] = '\\'; array[offset++] = 'f'; continue; - case '\n': array[offset++] = '\\'; array[offset++] = 'n'; continue; - case '\r': array[offset++] = '\\'; array[offset++] = 'r'; continue; - case '\t': array[offset++] = '\\'; array[offset++] = 't'; continue; + case '"': array[arrayOffset + offset++] = '\\'; array[arrayOffset + offset++] = '"'; continue; + case '\\': array[arrayOffset + offset++] = '\\'; array[arrayOffset + offset++] = '\\'; continue; + case '\b': array[arrayOffset + offset++] = '\\'; array[arrayOffset + offset++] = 'b'; continue; + case '\f': array[arrayOffset + offset++] = '\\'; array[arrayOffset + offset++] = 'f'; continue; + case '\n': array[arrayOffset + offset++] = '\\'; array[arrayOffset + offset++] = 'n'; continue; + case '\r': array[arrayOffset + offset++] = '\\'; array[arrayOffset + offset++] = 'r'; continue; + case '\t': array[arrayOffset + offset++] = '\\'; array[arrayOffset + offset++] = 't'; continue; } if (c < 0x20) { // Control character – use \ u00XX - array[offset++] = '\\'; - array[offset++] = 'u'; - array[offset++] = '0'; - array[offset++] = '0'; - array[offset++] = HEX[c >> 4]; - array[offset++] = HEX[c & 0xF]; - } else if (c < 0x80) { - array[offset++] = (byte) c; - } else if (c < 0x800) { - array[offset++] = (byte) (0b11000000 | (c >> 6)); - array[offset++] = (byte) (0b10000000 | (c & 0b00111111)); - } else if (Character.isSurrogate(c)) { - int cp = Character.toCodePoint(c, value.charAt(++i)); - array[offset++] = (byte) (0b11110000 | (cp >> 18)); - array[offset++] = (byte) (0b10000000 | ((cp >> 12) & 0b00111111)); - array[offset++] = (byte) (0b10000000 | ((cp >> 6) & 0b00111111)); - array[offset++] = (byte) (0b10000000 | (cp & 0b00111111)); + array[arrayOffset + offset++] = '\\'; + array[arrayOffset + offset++] = 'u'; + array[arrayOffset + offset++] = '0'; + array[arrayOffset + offset++] = '0'; + array[arrayOffset + offset++] = HEX[c >> 4]; + array[arrayOffset + offset++] = HEX[c & 0xF]; } else { - array[offset++] = (byte) (0b11100000 | (c >> 12)); - array[offset++] = (byte) (0b10000000 | ((c >> 6) & 0b00111111)); - array[offset++] = (byte) (0b10000000 | (c & 0b00111111)); + // Proper UTF-8 encoding for extended characters + offset = encodeUtf8(c, value, i, offset); } } - if(quoted) array[offset++] = '"'; + if (quoted) array[arrayOffset + offset++] = '"'; buffer.position(offset); } + + private int encodeUtf8(char c, String value, int i, int offset) { + if (c < 0x80) { + array[arrayOffset + offset++] = (byte) c; + } else if (c < 0x800) { + array[arrayOffset + offset++] = (byte) (0b11000000 | (c >> 6)); + array[arrayOffset + offset++] = (byte) (0b10000000 | (c & 0b00111111)); + } else if (Character.isSurrogate(c)) { + int cp = Character.toCodePoint(c, value.charAt(++i)); + array[arrayOffset + offset++] = (byte) (0b11110000 | (cp >> 18)); + array[arrayOffset + offset++] = (byte) (0b10000000 | ((cp >> 12) & 0b00111111)); + array[arrayOffset + offset++] = (byte) (0b10000000 | ((cp >> 6) & 0b00111111)); + array[arrayOffset + offset++] = (byte) (0b10000000 | (cp & 0b00111111)); + } else { + array[arrayOffset + offset++] = (byte) (0b11100000 | (c >> 12)); + array[arrayOffset + offset++] = (byte) (0b10000000 | ((c >> 6) & 0b00111111)); + array[arrayOffset + offset++] = (byte) (0b10000000 | (c & 0b00111111)); + } + return offset; + } + + @Override + public void writeJsonLong(final long value, final boolean quoted) { + int offset = buffer.position(); + if (quoted) array[offset++] = '"'; + // Handle zero explicitly + if (value == 0) { + array[offset++] = '0'; + } else if (value == Long.MIN_VALUE) { + // Special case for Long.MIN_VALUE(-9223372036854775808) to avoid overflow + array[offset++] = '-'; + array[offset++] = '9'; + array[offset++] = '2'; + array[offset++] = '2'; + array[offset++] = '3'; + array[offset++] = '3'; + array[offset++] = '7'; + array[offset++] = '2'; + array[offset++] = '0'; + array[offset++] = '3'; + array[offset++] = '6'; + array[offset++] = '8'; + array[offset++] = '5'; + array[offset++] = '4'; + array[offset++] = '7'; + array[offset++] = '7'; + array[offset++] = '5'; + array[offset++] = '8'; + array[offset++] = '0'; + array[offset++] = '8'; + } else { + long v = value; + if (v < 0) { + array[offset++] = '-'; + v = -v; + } + // count the number of digits in the long value, assumes all values are positive + // Fast digit count calculation + final int digitCount = (v < 10L) ? 1 : + (v < 100L) ? 2 : + (v < 1000L) ? 3 : + (v < 10000L) ? 4 : + (v < 100000L) ? 5 : + (v < 1000000L) ? 6 : + (v < 10000000L) ? 7 : + (v < 100000000L) ? 8 : + (v < 1000000000L) ? 9 : + (v < 10000000000L) ? 10 : + (v < 100000000000L) ? 11 : + (v < 1000000000000L) ? 12 : + (v < 10000000000000L) ? 13 : + (v < 100000000000000L) ? 14 : + (v < 1000000000000000L) ? 15 : + (v < 10000000000000000L) ? 16 : + (v < 100000000000000000L) ? 17 : + (v < 1000000000000000000L) ? 18 : 19; + // Now write them in reverse order + long tmp = v; + for (int i = digitCount-1; i >= 0; i--) { + array[offset+i] = (byte) ('0' + (tmp % 10)); + tmp /= 10; + } + offset += digitCount; + } + if (quoted) array[offset++] = '"'; + buffer.position(offset); + } + + @Override + public void writeBase64(@NonNull Bytes dataToEncode) { + final int offset = buffer.position(); + final int writenBytes = dataToEncode.writeBase64To(array, offset); + buffer.position(offset + writenBytes); + } } diff --git a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/Bytes.java b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/Bytes.java index 81cf9167..e35d85cc 100644 --- a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/Bytes.java +++ b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/Bytes.java @@ -324,6 +324,46 @@ public void writeTo(@NonNull final MessageDigest digest, final int offset, final digest.update(buffer, Math.toIntExact(start + offset), length); } + + private static final char[] BASE64_ALPHABET = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/".toCharArray(); + private static final byte PADDING = '='; + + /** + * A helper method for efficient copy of our data into a Base64 encoded string. + * + * @param outputBuffer the byte array to write the Base64 encoded string into + * @param outputOffset the offset in the outputBuffer to start writing the Base64 encoded string + */ + public int writeBase64To(byte[] outputBuffer, int outputOffset) { + final int inputEnd = start + length; + int outputIndex = outputOffset; + // Process input in chunks of 3 bytes + for (int i = start; i + 2 < inputEnd; i += 3) { + int chunk = ((buffer[i] & 0xFF) << 16) | ((buffer[i + 1] & 0xFF) << 8) | (buffer[i + 2] & 0xFF); + outputBuffer[outputIndex++] = (byte) BASE64_ALPHABET[(chunk >> 18) & 0x3F]; + outputBuffer[outputIndex++] = (byte) BASE64_ALPHABET[(chunk >> 12) & 0x3F]; + outputBuffer[outputIndex++] = (byte) BASE64_ALPHABET[(chunk >> 6) & 0x3F]; + outputBuffer[outputIndex++] = (byte) BASE64_ALPHABET[chunk & 0x3F]; + } + // Handle remaining bytes + int remaining = inputEnd - start; + if (remaining == 1) { + int chunk = (buffer[start] & 0xFF) << 16; + outputBuffer[outputIndex++] = (byte) BASE64_ALPHABET[(chunk >> 18) & 0x3F]; + outputBuffer[outputIndex++] = (byte) BASE64_ALPHABET[(chunk >> 12) & 0x3F]; + outputBuffer[outputIndex++] = PADDING; + outputBuffer[outputIndex++] = PADDING; + } else if (remaining == 2) { + int chunk = ((buffer[start] & 0xFF) << 16) | ((buffer[start + 1] & 0xFF) << 8); + outputBuffer[outputIndex++] = (byte) BASE64_ALPHABET[(chunk >> 18) & 0x3F]; + outputBuffer[outputIndex++] = (byte) BASE64_ALPHABET[(chunk >> 12) & 0x3F]; + outputBuffer[outputIndex++] = (byte) BASE64_ALPHABET[(chunk >> 6) & 0x3F]; + outputBuffer[outputIndex++] = PADDING; + } + return outputIndex - outputOffset; // Return the number of bytes written + } + /** * Same as {@link #updateSignature(Signature, int, int)} with offset 0 and length equal to the length of this * {@link Bytes} object. diff --git a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/json/JsonLexer.java b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/json/JsonLexer.java index a2749840..68ca4f24 100644 --- a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/json/JsonLexer.java +++ b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/json/JsonLexer.java @@ -5,9 +5,12 @@ import com.hedera.pbj.runtime.io.buffer.Bytes; import com.hedera.pbj.runtime.io.buffer.RandomAccessData; import java.nio.charset.StandardCharsets; +import java.util.Arrays; import java.util.Map; +import java.util.Objects; import java.util.function.Consumer; import java.util.function.Supplier; +import java.util.stream.Collectors; /** * A simple JSON lexer that reads from a {@link ReadableSequentialData} and provides read and consume methods. It is @@ -503,7 +506,14 @@ private void checkClosingQuote(int c) { public > E readEnum(Class enumClass) throws ParseException { final int firstChar = readByte(); if (firstChar == QUOTE) { - return Enum.valueOf(enumClass, readJsonString()); + final String enumValue = readJsonString().toUpperCase(); + try { + return Enum.valueOf(enumClass, enumValue); + } catch (IllegalArgumentException e) { + throw new ParseException("Invalid enum value: \"" + enumValue + "\" in options: " + + Arrays.stream(enumClass.getEnumConstants()).map(en -> en.toString()) + .collect(Collectors.joining(", ")) + " becasue of\n" + e.getMessage()); + } } else { setNextCharRead(firstChar); long res; diff --git a/pbj-core/pbj-runtime/src/test/java/com/hedera/pbj/runtime/io/WritableTestBase.java b/pbj-core/pbj-runtime/src/test/java/com/hedera/pbj/runtime/io/WritableTestBase.java index c5c859dd..c6a647b1 100644 --- a/pbj-core/pbj-runtime/src/test/java/com/hedera/pbj/runtime/io/WritableTestBase.java +++ b/pbj-core/pbj-runtime/src/test/java/com/hedera/pbj/runtime/io/WritableTestBase.java @@ -1363,4 +1363,27 @@ void writeZigZag() { assertThat(seq.position()).isEqualTo(pos + 2); } } + + @Nested + @DisplayName("write Json Tests") + final class WriteJsonTest { + @ParameterizedTest + @ValueSource(longs = {0,-1,1,123456789, -123456789L, Long.MAX_VALUE, Long.MIN_VALUE}) + @DisplayName("Writing a long in JSON format without quotes") + void writeJsonLongUnquoted(final long value) { + final var seq = sequence(); + seq.writeJsonLong(value, false); + assertThat(extractWrittenBytes(seq)).isEqualTo(Long.toString(value).getBytes(StandardCharsets.UTF_8)); + } + + @ParameterizedTest + @ValueSource(longs = {0,-1,1,123456789, -123456789L, Long.MAX_VALUE, Long.MIN_VALUE}) + @DisplayName("Writing a long in JSON format with quotes") + void writeJsonLongQuoted(final long value) { + final var seq = sequence(); + seq.writeJsonLong(value, true); + assertThat(extractWrittenBytes(seq)).isEqualTo( + ('"' + Long.toString(value) + '"').getBytes(StandardCharsets.UTF_8)); + } + } } From 115ddae0b386d167b3216606af312496ce90ccdc Mon Sep 17 00:00:00 2001 From: Jasper Potts <1466205+jasperpotts@users.noreply.github.com> Date: Wed, 28 May 2025 16:15:04 -0700 Subject: [PATCH 3/5] Added unsafe Signed-off-by: Jasper Potts <1466205+jasperpotts@users.noreply.github.com> --- .../generators/json/JsonCodecGenerator.java | 6 +- .../json/JsonCodecWriteMethodGenerator.java | 4 +- .../com/hedera/pbj/runtime/JsonTools.java | 112 ++-- .../buffer/UnsafeByteArrayBufferedData.java | 532 ++++++++++++++++++ 4 files changed, 584 insertions(+), 70 deletions(-) create mode 100644 pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/UnsafeByteArrayBufferedData.java diff --git a/pbj-core/pbj-compiler/src/main/java/com/hedera/pbj/compiler/impl/generators/json/JsonCodecGenerator.java b/pbj-core/pbj-compiler/src/main/java/com/hedera/pbj/compiler/impl/generators/json/JsonCodecGenerator.java index 3369f559..b15adae8 100644 --- a/pbj-core/pbj-compiler/src/main/java/com/hedera/pbj/compiler/impl/generators/json/JsonCodecGenerator.java +++ b/pbj-core/pbj-compiler/src/main/java/com/hedera/pbj/compiler/impl/generators/json/JsonCodecGenerator.java @@ -131,7 +131,8 @@ private CharSequence generateFieldNameConstants(List fields) { sb.append(" private static final byte[] "); sb.append(getFieldNameConstantName(childField)); sb.append(" = new byte[] {"); - byte[] fieldNameBytes = toJsonFieldName(childField.name()).getBytes(StandardCharsets.UTF_8); + final String filedNameText = '"' + toJsonFieldName(childField.name()) + "\": "; + final byte[] fieldNameBytes = filedNameText.getBytes(StandardCharsets.UTF_8); for (int i = 0; i < fieldNameBytes.length; i++) { int b = fieldNameBytes[i] & 0xFF; if (fieldNameBytes[i] < 0) { @@ -148,7 +149,8 @@ private CharSequence generateFieldNameConstants(List fields) { sb.append(" private static final byte[] "); sb.append(getFieldNameConstantName(field)); sb.append(" = new byte[] {"); - byte[] fieldNameBytes = toJsonFieldName(field.name()).getBytes(StandardCharsets.UTF_8); + final String filedNameText = '"' + toJsonFieldName(field.name()) + "\": "; + final byte[] fieldNameBytes = filedNameText.getBytes(StandardCharsets.UTF_8); for (int i = 0; i < fieldNameBytes.length; i++) { int b = fieldNameBytes[i] & 0xFF; if (fieldNameBytes[i] < 0) { diff --git a/pbj-core/pbj-compiler/src/main/java/com/hedera/pbj/compiler/impl/generators/json/JsonCodecWriteMethodGenerator.java b/pbj-core/pbj-compiler/src/main/java/com/hedera/pbj/compiler/impl/generators/json/JsonCodecWriteMethodGenerator.java index 2e459cbf..fc1366d6 100644 --- a/pbj-core/pbj-compiler/src/main/java/com/hedera/pbj/compiler/impl/generators/json/JsonCodecWriteMethodGenerator.java +++ b/pbj-core/pbj-compiler/src/main/java/com/hedera/pbj/compiler/impl/generators/json/JsonCodecWriteMethodGenerator.java @@ -148,9 +148,9 @@ private static String generateBasicFieldLines( "v", Common.camelToUpperSnake(mapField.valueField().name()), "n", - childIndent); + childIndent+ "+indentStep"); // return "field(out, %s, %s, $kEncoder, $vComposer)" - return "field(out, %s, %s, $vComposer)" + return "field(out, initialIndent+indentStep+indentStep, %s, %s, $vComposer)" .formatted(fieldName, getValueCode) // Maps in protobuf can only have simple scalar and not floating keys, so toString should do a good // job. diff --git a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/JsonTools.java b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/JsonTools.java index c6f3b57f..d43fbd50 100644 --- a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/JsonTools.java +++ b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/JsonTools.java @@ -6,6 +6,7 @@ import edu.umd.cs.findbugs.annotations.NonNull; import edu.umd.cs.findbugs.annotations.Nullable; import java.nio.charset.StandardCharsets; +import java.util.Arrays; import java.util.Base64; import java.util.List; import java.util.Map; @@ -14,7 +15,7 @@ * Class of static utility methods for working with JSON. All generated JSON is designed to be * 100% identical to that generated by Google Protobuf library. */ -@SuppressWarnings({"ForLoopReplaceableByForEach", "JavaExistingMethodCanBeUsed"}) +@SuppressWarnings({"ForLoopReplaceableByForEach"}) public final class JsonTools { public static final byte OPEN_OBJECT = '{'; public static final byte CLOSE_OBJECT = '}'; @@ -22,16 +23,29 @@ public final class JsonTools { public static final byte CLOSE_ARRAY = ']'; public static final byte NL = '\n'; public static final byte SPACE = ' '; - public static final byte COLON = ':'; public static final byte COMMA = ','; public static final byte QUOTE = '"'; + public static final byte MINUS = '-'; public static final byte n = 'n'; public static final byte u = 'u'; public static final byte l = 'l'; + public static final byte _0 = '0'; + private static final byte[] _TRUE = "true".getBytes(StandardCharsets.UTF_8); + private static final byte[] _FALSE = "false".getBytes(StandardCharsets.UTF_8); + private static final byte[] SPACES = new byte[128]; + static { + Arrays.fill(SPACES, SPACE); + } // ==================================================================================================== // To JSON String Methods + public static void writeIndent(@NonNull WritableSequentialData out, int indent) { + if (indent > 0) { + out.writeBytes(SPACES, 0, indent); + } + } + /** * Object field to JSON string * @@ -39,9 +53,7 @@ public final class JsonTools { * @param value the value of the field */ public static void field(@NonNull WritableSequentialData out, int indent, byte[] fieldNameBytes, JsonCodec codec, @Nullable final T value) { - out.writeByte(QUOTE); out.writeBytes(fieldNameBytes); - out.writeByte3(QUOTE, COLON, SPACE); if (value != null) { codec.write(value, out, indent, 2, false); // TODO replace indent.length with indent } else { @@ -56,24 +68,9 @@ public static void field(@NonNull WritableSequentialData out, int indent, by * @param value the value of the field */ public static void field(@NonNull WritableSequentialData out, byte[] fieldNameBytes, String value) { - out.writeByte(QUOTE); out.writeBytes(fieldNameBytes); - out.writeByte3(QUOTE, COLON, SPACE); out.writeJsonString(value, true); } - - /** - * String field to JSON string - * - * @param fieldNameBytes the name of the field already encoded as bytes - * @param value the value of the field - */ - public static void fieldUnquoted(@NonNull WritableSequentialData out, byte[] fieldNameBytes, String value) { - out.writeByte(QUOTE); - out.writeBytes(fieldNameBytes); - out.writeByte3(QUOTE, COLON, SPACE); - out.writeJsonString(value, false); - } /** * String field to JSON string @@ -81,9 +78,7 @@ public static void fieldUnquoted(@NonNull WritableSequentialData out, byte[] fie * @param fieldNameBytes the name of the field already encoded as bytes */ public static void fieldNull(@NonNull WritableSequentialData out, byte[] fieldNameBytes) { - out.writeByte(QUOTE); out.writeBytes(fieldNameBytes); - out.writeByte3(QUOTE, COLON, SPACE); out.writeByte4(n,u,l,l); } @@ -94,9 +89,8 @@ public static void fieldNull(@NonNull WritableSequentialData out, byte[] fieldNa * @param value the value of the field */ public static void field(@NonNull WritableSequentialData out, byte[] fieldNameBytes, Bytes value) { - out.writeByte(QUOTE); out.writeBytes(fieldNameBytes); - out.writeByte4(QUOTE, COLON, SPACE,QUOTE); + out.writeByte(QUOTE); out.writeBase64(value); out.writeByte(QUOTE); } @@ -109,9 +103,7 @@ public static void field(@NonNull WritableSequentialData out, byte[] fieldNameBy */ public static void field(@NonNull WritableSequentialData out, byte[] fieldNameBytes, byte[] value) { String rawValue = Base64.getEncoder().encodeToString(value); - out.writeByte(QUOTE); out.writeBytes(fieldNameBytes); - out.writeByte3(QUOTE, COLON, SPACE); out.writeJsonString(rawValue, true); } @@ -119,28 +111,37 @@ public static void field(@NonNull WritableSequentialData out, byte[] fieldNameBy * Map field to JSON string * * @param fieldNameBytes the name of the field already encoded as bytes + * @param indent the indent to use for generated JSON * @param value the value of the field * @param vComposer a composer of a "key":value strings - basically, a JsonTools::field method for the value type */ public static void field(@NonNull WritableSequentialData out, - byte[] fieldNameBytes, Map value, FieldFunction vComposer) { + int indent, byte[] fieldNameBytes, Map value, FieldFunction vComposer) { assert !value.isEmpty(); - out.writeByte(QUOTE); out.writeBytes(fieldNameBytes); - out.writeByte(QUOTE); - out.writeByte4(COLON, SPACE, OPEN_OBJECT, NL); + out.writeByte2(OPEN_OBJECT, NL); PbjMap pbjMap = (PbjMap) value; for (int i = 0; i < pbjMap.size(); i++) { if (i > 0) { out.writeByte2(COMMA, NL); } + writeIndent(out, indent); K k = pbjMap.getSortedKeys().get(i); V v = pbjMap.get(k); - + final String kStr = '"' + k.toString() + .replaceAll("\"", "\\\\\"") + .replaceAll("\n", "\\\\n") + .replaceAll("\r", "\\\\r") + .replaceAll("\t", "\\\\t") + .replaceAll("\f", "\\\\f") + .replaceAll("\b", "\\\\b") + .replaceAll("\\\\", "\\\\") + "\": "; // String keyStr = kEncoder.apply(k); // TODO: implement a key encoder - vComposer.write(out, k.toString().getBytes(StandardCharsets.UTF_8), v); // TODO pass in indent + vComposer.write(out, kStr.getBytes(StandardCharsets.UTF_8), v); // TODO pass in indent } - out.writeByte2(NL, CLOSE_OBJECT); + out.writeByte(NL); + writeIndent(out, indent - 2); + out.writeByte(CLOSE_OBJECT); } public interface FieldFunction { @@ -154,7 +155,8 @@ public interface FieldFunction { * @param value the value of the field */ public static void field(@NonNull WritableSequentialData out, byte[] fieldNameBytes, boolean value) { - fieldUnquoted(out, fieldNameBytes, value ? "true" : "false"); + out.writeBytes(fieldNameBytes); + out.writeBytes(value ? _TRUE : _FALSE); } /** @@ -164,9 +166,7 @@ public static void field(@NonNull WritableSequentialData out, byte[] fieldNameBy * @param value the value of the field */ public static void field(@NonNull WritableSequentialData out, byte[] fieldNameBytes, int value) { - out.writeByte(QUOTE); out.writeBytes(fieldNameBytes); - out.writeByte3(QUOTE, COLON, SPACE); out.writeJsonLong(value, false); } @@ -177,9 +177,7 @@ public static void field(@NonNull WritableSequentialData out, byte[] fieldNameBy * @param value the value of the field */ public static void field(@NonNull WritableSequentialData out, byte[] fieldNameBytes, long value) { - out.writeByte(QUOTE); out.writeBytes(fieldNameBytes); - out.writeByte3(QUOTE, COLON, SPACE); out.writeJsonLong(value, true); } @@ -191,21 +189,15 @@ public static void field(@NonNull WritableSequentialData out, byte[] fieldNameBy */ public static void field(@NonNull WritableSequentialData out, byte[] fieldNameBytes, float value) { if (Float.isNaN(value)) { - out.writeByte(QUOTE); out.writeBytes(fieldNameBytes); - out.writeByte3(QUOTE, COLON, SPACE); out.writeJsonString("NaN", true); } else if (Float.isInfinite(value)) { String value1 = value < 0 ? "-Infinity" : "Infinity"; - out.writeByte(QUOTE); out.writeBytes(fieldNameBytes); - out.writeByte3(QUOTE, COLON, SPACE); out.writeJsonString(value1, true); } else { String value1 = Float.toString(value); - out.writeByte(QUOTE); out.writeBytes(fieldNameBytes); - out.writeByte3(QUOTE, COLON, SPACE); out.writeJsonString(value1, false); } } @@ -218,21 +210,15 @@ public static void field(@NonNull WritableSequentialData out, byte[] fieldNameBy */ public static void field(@NonNull WritableSequentialData out, byte[] fieldNameBytes, double value) { if (Double.isNaN(value)) { - out.writeByte(QUOTE); out.writeBytes(fieldNameBytes); - out.writeByte3(QUOTE, COLON, SPACE); out.writeJsonString("NaN", true); } else if (Double.isInfinite(value)) { String value1 = value < 0 ? "-Infinity" : "Infinity"; - out.writeByte(QUOTE); out.writeBytes(fieldNameBytes); - out.writeByte3(QUOTE, COLON, SPACE); out.writeJsonString(value1, true); } else { String value1 = Double.toString(value); - out.writeByte(QUOTE); out.writeBytes(fieldNameBytes); - out.writeByte3(QUOTE, COLON, SPACE); out.writeJsonString(value1, false); } } @@ -247,7 +233,8 @@ public static void field(@NonNull WritableSequentialData out, byte[] fieldNameBy if (value == null) { fieldNull(out, fieldNameBytes); } else { - fieldUnquoted(out, fieldNameBytes, value.toString()); + out.writeBytes(fieldNameBytes); + out.writeBytes(value ? _TRUE : _FALSE); } } @@ -275,12 +262,10 @@ public static void field(@NonNull WritableSequentialData out, byte[] fieldNameBy if (value == null) { fieldNull(out, fieldNameBytes); } else if (quote) { - out.writeByte(QUOTE); out.writeBytes(fieldNameBytes); - out.writeByte3(QUOTE, COLON, SPACE); out.writeJsonString(Long.toString(value), true); } else { - fieldUnquoted(out, fieldNameBytes, Long.toString(value)); + field(out, fieldNameBytes, Long.toString(value)); } } @@ -322,15 +307,11 @@ public static void field(@NonNull WritableSequentialData out, byte[] fieldNameBy public static void arrayField(@NonNull WritableSequentialData out, byte[] fieldNameBytes, FieldDefinition fieldDefinition, List items) { if (items != null) { if (items.isEmpty()) { - out.writeByte(QUOTE); out.writeBytes(fieldNameBytes); - out.writeByte(QUOTE); - out.writeByte4(COLON, SPACE, OPEN_ARRAY, CLOSE_ARRAY); + out.writeByte2(OPEN_ARRAY, CLOSE_ARRAY); } else { - out.writeByte(QUOTE); out.writeBytes(fieldNameBytes); - out.writeByte(QUOTE); - out.writeByte3(COLON, SPACE, OPEN_ARRAY); + out.writeByte(OPEN_ARRAY); boolean isFirst = true; for (int i = 0; i < items.size(); i++) { final T item = items.get(i); @@ -350,11 +331,11 @@ public static void arrayField(@NonNull WritableSequentialData out, byte[] fi out.writeByte(QUOTE); } case INT32, SINT32, UINT32, FIXED32, SFIXED32 -> - out.writeJsonString(Integer.toString((Integer) item), false); + out.writeJsonLong((Integer) item, false); case INT64, SINT64, UINT64, FIXED64, SFIXED64 -> - out.writeJsonString(Long.toString((Long) item), true); - case FLOAT -> out.writeJsonString(Float.toString((Float) item), true); - case DOUBLE -> out.writeJsonString(Double.toString((Double) item), true); + out.writeJsonLong((Long) item, true); + case FLOAT -> out.writeJsonString(Float.toString((Float) item), false); + case DOUBLE -> out.writeJsonString(Double.toString((Double) item), false); case BOOL -> out.writeJsonString(Boolean.toString((Boolean) item), false); case ENUM -> out.writeJsonString(((EnumWithProtoMetadata) item).protoName(), true); case MESSAGE -> throw new UnsupportedOperationException( @@ -380,11 +361,10 @@ public static void arrayField(@NonNull WritableSequentialData out, byte[] fi public static void arrayField(@NonNull WritableSequentialData out, int indent, byte[] fieldNameBytes, JsonCodec codec, List items) { if (items != null) { if (items.isEmpty()) { - fieldUnquoted(out, fieldNameBytes, "[]"); + field(out, fieldNameBytes, "[]"); } else { - out.writeByte(QUOTE); out.writeBytes(fieldNameBytes); - out.writeByte4(QUOTE, COLON, SPACE, OPEN_ARRAY); + out.writeByte(OPEN_ARRAY); for (int i = 0; i < items.size(); i++) { var item = items.get(i); codec.write(item, out, indent, 2, false); diff --git a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/UnsafeByteArrayBufferedData.java b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/UnsafeByteArrayBufferedData.java new file mode 100644 index 00000000..20fb3a7b --- /dev/null +++ b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/UnsafeByteArrayBufferedData.java @@ -0,0 +1,532 @@ +// SPDX-License-Identifier: Apache-2.0 +package com.hedera.pbj.runtime.io.buffer; + +import static com.hedera.pbj.runtime.JsonTools.*; + +import com.hedera.pbj.runtime.io.DataEncodingException; +import com.hedera.pbj.runtime.io.UnsafeUtils; +import edu.umd.cs.findbugs.annotations.NonNull; +import java.io.IOException; +import java.io.InputStream; +import java.io.UncheckedIOException; +import java.lang.reflect.Field; +import java.nio.BufferUnderflowException; +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.Objects; +import sun.misc.Unsafe; + +/** + * BufferedData subclass for instances backed by a byte array. Provides slightly more optimized + * versions of several methods to get / read / write bytes using {@link System#arraycopy} and + * direct array reads / writes. + */ +final class UnsafeByteArrayBufferedData extends BufferedData { + /** Unsafe instance for direct memory access */ + private static final Unsafe UNSAFE; + /** Field offset of the byte[] class */ + private static final int BYTE_ARRAY_BASE_OFFSET; + /** Lookup table for hex digits */ + private static final byte[] HEX = "0123456789abcdef".getBytes(StandardCharsets.US_ASCII); + /** The byte representation of long minimum value */ + private static final byte[] MIN_LONG_VALUE = Long.toString(Long.MIN_VALUE).getBytes(StandardCharsets.US_ASCII); + + /* Get the Unsafe instance and the byte array base offset */ + static { + try { + final Field theUnsafeField = Unsafe.class.getDeclaredField("theUnsafe"); + theUnsafeField.setAccessible(true); + UNSAFE = (Unsafe) theUnsafeField.get(null); + BYTE_ARRAY_BASE_OFFSET = UNSAFE.arrayBaseOffset(byte[].class); + } catch (NoSuchFieldException | SecurityException | IllegalArgumentException | IllegalAccessException e) { + throw new InternalError(e); + } + } + + // Backing byte array + private final byte[] array; + + // This data buffer's offset into the backing array. See ByteBuffer.arrayOffset() for details + private final int arrayOffset; + + UnsafeByteArrayBufferedData(final ByteBuffer buffer) { + super(buffer); + if (!buffer.hasArray()) { + throw new IllegalArgumentException("Cannot create a ByteArrayBufferedData over a buffer with no array"); + } + this.array = buffer.array(); + this.arrayOffset = buffer.arrayOffset(); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(getClass().getSimpleName()); + sb.append("["); + for (int i = 0; i < buffer.limit(); i++) { + int v = array[arrayOffset + i] & 0xFF; + sb.append(v); + if (i < (buffer.limit() - 1)) { + sb.append(','); + } + } + sb.append(']'); + return sb.toString(); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean contains(final long offset, @NonNull final byte[] bytes) { + checkOffset(offset, length()); + + final int len = bytes.length; + if (length() - offset < len) { + return false; + } + + final int fromThisIndex = Math.toIntExact(arrayOffset + offset); + final int fromToIndex = fromThisIndex + len; + return Arrays.equals(array, fromThisIndex, fromToIndex, bytes, 0, len); + } + + /** + * {@inheritDoc} + */ + @Override + public byte getByte(final long offset) { + checkOffset(offset, length()); + return UNSAFE.getByte(array, BYTE_ARRAY_BASE_OFFSET + arrayOffset + offset); + } + + /** + * {@inheritDoc} + */ + @Override + public long getBytes(final long offset, @NonNull final byte[] dst, final int dstOffset, final int maxLength) { + validateLen(maxLength); + checkOffset(offset); + final long len = Math.min(maxLength, length() - offset); + checkOffsetToRead(offset, length(), len); + if (len == 0) { + return 0; + } + System.arraycopy(array, Math.toIntExact(arrayOffset + offset), dst, dstOffset, Math.toIntExact(len)); + return len; + } + + /** + * {@inheritDoc} + */ + @Override + public long getBytes(final long offset, @NonNull final ByteBuffer dst) { + if (!dst.hasArray()) { + return super.getBytes(offset, dst); + } + final long len = Math.min(length() - offset, dst.remaining()); + checkOffsetToRead(offset, length(), len); + final byte[] dstArr = dst.array(); + final int dstPos = dst.position(); + final int dstArrOffset = dst.arrayOffset(); + System.arraycopy( + array, Math.toIntExact(arrayOffset + offset), dstArr, dstArrOffset + dstPos, Math.toIntExact(len)); + return len; + } + + /** + * {@inheritDoc} + */ + @NonNull + @Override + public Bytes getBytes(final long offset, final long len) { + validateLen(len); + if (len == 0) { + return Bytes.EMPTY; + } + checkOffsetToRead(offset, length(), len); + final byte[] res = new byte[Math.toIntExact(len)]; + System.arraycopy(array, Math.toIntExact(arrayOffset + offset), res, 0, res.length); + return Bytes.wrap(res); + } + + /** + * {@inheritDoc} + */ + @Override + public int getVarInt(final long offset, final boolean zigZag) { + return (int) getVar(Math.toIntExact(offset), zigZag); + } + + /** + * {@inheritDoc} + */ + @Override + public long getVarLong(final long offset, final boolean zigZag) { + return getVar(Math.toIntExact(offset), zigZag); + } + + private long getVar(final int offset, final boolean zigZag) { + checkOffset(offset, buffer.limit()); + + final int readOffset = arrayOffset + offset; + int rem = buffer.limit() - offset; + if (rem > 10) { + rem = 10; + } + + long value = 0; + + int i = 0; + while (i != rem) { + final byte b = UnsafeUtils.getArrayByteNoChecks(array, readOffset + i); + value |= (long) (b & 0x7F) << (i * 7); + i++; + if (b >= 0) { + return zigZag ? (value >>> 1) ^ -(value & 1) : value; + } + } + throw (i == 10) ? new DataEncodingException("Malformed var int") : new BufferUnderflowException(); + } + + /** + * {@inheritDoc} + */ + @Override + public byte readByte() { + if (remaining() == 0) { + throw new BufferUnderflowException(); + } + final int pos = buffer.position(); + final byte res = UNSAFE.getByte(array, BYTE_ARRAY_BASE_OFFSET + arrayOffset + pos); + buffer.position(pos + 1); + return res; + } + + /** + * {@inheritDoc} + */ + @Override + public long readBytes(@NonNull byte[] dst, int offset, int maxLength) { + validateLen(maxLength); + final var len = Math.toIntExact(Math.min(maxLength, remaining())); + if (len == 0) { + return 0; + } + final int pos = buffer.position(); + System.arraycopy(array, arrayOffset + pos, dst, offset, len); + buffer.position(pos + len); + return len; + } + + /** + * {@inheritDoc} + */ + @Override + public long readBytes(@NonNull final ByteBuffer dst) { + if (!dst.hasArray()) { + return super.readBytes(dst); + } + final long len = Math.min(remaining(), dst.remaining()); + final int pos = buffer.position(); + final byte[] dstArr = dst.array(); + final int dstPos = dst.position(); + final int dstArrOffset = dst.arrayOffset(); + System.arraycopy(array, arrayOffset + pos, dstArr, dstArrOffset + dstPos, Math.toIntExact(len)); + buffer.position(Math.toIntExact(pos + len)); + dst.position(Math.toIntExact(dstPos + len)); + return len; + } + + /** + * {@inheritDoc} + */ + @NonNull + @Override + public Bytes readBytes(final int len) { + validateLen(len); + final int pos = buffer.position(); + validateCanRead(pos, len); + if (len == 0) { + return Bytes.EMPTY; + } + final byte[] res = new byte[len]; + System.arraycopy(array, arrayOffset + pos, res, 0, len); + buffer.position(pos + len); + return Bytes.wrap(res); + } + + /** + * {@inheritDoc} + */ + @Override + public int readVarInt(final boolean zigZag) { + return (int) readVar(zigZag); + } + + /** + * {@inheritDoc} + */ + @Override + public long readVarLong(final boolean zigZag) { + return readVar(zigZag); + } + + private long readVar(final boolean zigZag) { + final int pos = buffer.position(); + final int offset = arrayOffset + pos; + int rem = buffer.remaining(); + if (rem > 10) { + rem = 10; + } + + long value = 0; + + int i = 0; + while (i != rem) { + final byte b = UnsafeUtils.getArrayByteNoChecks(array, offset + i); + value |= (long) (b & 0x7F) << (i * 7); + i++; + if (b >= 0) { + buffer.position(pos + i); + return zigZag ? (value >>> 1) ^ -(value & 1) : value; + } + } + throw (i == 10) ? new DataEncodingException("Malformed var int") : new BufferUnderflowException(); + } + + /** + * {@inheritDoc} + */ + @Override + public void writeByte(final byte b) { + validateCanWrite(1); + final int pos = buffer.position(); + UNSAFE.putByte(array, BYTE_ARRAY_BASE_OFFSET + arrayOffset + pos, b); + buffer.position(pos + 1); + } + + @Override + public void writeByte2(final byte b1, final byte b2) { + validateCanWrite(2); + int pos = buffer.position(); + UNSAFE.putByte(array, BYTE_ARRAY_BASE_OFFSET + arrayOffset + pos++, b1); + UNSAFE.putByte(array, BYTE_ARRAY_BASE_OFFSET + arrayOffset + pos++, b2); + buffer.position(pos); + } + + @Override + public void writeByte3(final byte b1, final byte b2, final byte b3) { + validateCanWrite(3); + int pos = buffer.position(); + UNSAFE.putByte(array, BYTE_ARRAY_BASE_OFFSET + arrayOffset + pos++, b1); + UNSAFE.putByte(array, BYTE_ARRAY_BASE_OFFSET + arrayOffset + pos++, b2); + UNSAFE.putByte(array, BYTE_ARRAY_BASE_OFFSET + arrayOffset + pos++, b3); + buffer.position(pos); + } +// +// @Override +// public void writeByte4(final byte b1, final byte b2, final byte b3, final byte b4) { +// validateCanWrite(4); +// int pos = buffer.position(); +// UNSAFE.putByte(array, BYTE_ARRAY_BASE_OFFSET + arrayOffset + pos++, b1); +// UNSAFE.putByte(array, BYTE_ARRAY_BASE_OFFSET + arrayOffset + pos++, b2); +// UNSAFE.putByte(array, BYTE_ARRAY_BASE_OFFSET + arrayOffset + pos++, b3); +// UNSAFE.putByte(array, BYTE_ARRAY_BASE_OFFSET + arrayOffset + pos++, b4); +// buffer.position(pos); +// } + // TODO not interesting at least on mac this is just as fast as the above + @Override + public void writeByte4(final byte b1, final byte b2, final byte b3, final byte b4) { + buffer.put(new byte[] {b1, b2, b3, b4}); + } + + /** {@inheritDoc} */ + @Override + public void writeBytes(@NonNull final byte[] src, final int offset, final int len) { + validateLen(len); + validateCanWrite(len); + if (src.length < offset + len) { + throw new IndexOutOfBoundsException("Source array is too short for the specified offset and length"); + } + final int pos = buffer.position(); + UNSAFE.copyMemory(src, BYTE_ARRAY_BASE_OFFSET + offset, array, BYTE_ARRAY_BASE_OFFSET + arrayOffset + pos, len); + buffer.position(pos + len); + } + + /** + * {@inheritDoc} + */ + @Override + public void writeBytes(@NonNull final ByteBuffer src) { + if (!src.hasArray()) { + super.writeBytes(src); + return; + } + final long len = src.remaining(); + validateCanWrite(len); + final int pos = buffer.position(); + final byte[] srcArr = src.array(); + final int srcArrOffset = src.arrayOffset(); + final int srcPos = src.position(); + System.arraycopy(srcArr, srcArrOffset + srcPos, array, arrayOffset + pos, Math.toIntExact(len)); + src.position(Math.toIntExact(srcPos + len)); + buffer.position(Math.toIntExact(pos + len)); + } + + /** + * {@inheritDoc} + */ + @Override + public int writeBytes(@NonNull final InputStream src, final int maxLength) { + // Check for a bad length or a null src + Objects.requireNonNull(src); + validateLen(maxLength); + + // If the length is zero, then we have nothing to read + if (maxLength == 0) { + return 0; + } + + // We are going to read from the input stream up to either "len" or the number of bytes + // remaining in this buffer, whichever is lesser. + final long numBytesToRead = Math.min(maxLength, remaining()); + if (numBytesToRead == 0) { + return 0; + } + + try { + int pos = buffer.position(); + int totalBytesRead = 0; + while (totalBytesRead < numBytesToRead) { + int bytesRead = src.read(array, pos + arrayOffset, (int) numBytesToRead - totalBytesRead); + if (bytesRead == -1) { + buffer.position(pos); + return totalBytesRead; + } + pos += bytesRead; + totalBytesRead += bytesRead; + } + buffer.position(pos); + return totalBytesRead; + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + @Override + public void writeJsonString(@NonNull String value, boolean quoted) { + int offset = buffer.position(); + final int len = value.length(); + validateCanWrite(len * 6L + 2); // TODO Worst-case scenario for UTF-8 encoding, is there better estimate? + if (quoted) array[arrayOffset + offset++] = '"'; + for (int i = 0; i < len; i++) { + char c = value.charAt(i); + + // Escape control chars and JSON specials + switch (c) { + case '"': array[arrayOffset + offset++] = '\\'; array[arrayOffset + offset++] = '"'; continue; + case '\\': array[arrayOffset + offset++] = '\\'; array[arrayOffset + offset++] = '\\'; continue; + case '\b': array[arrayOffset + offset++] = '\\'; array[arrayOffset + offset++] = 'b'; continue; + case '\f': array[arrayOffset + offset++] = '\\'; array[arrayOffset + offset++] = 'f'; continue; + case '\n': array[arrayOffset + offset++] = '\\'; array[arrayOffset + offset++] = 'n'; continue; + case '\r': array[arrayOffset + offset++] = '\\'; array[arrayOffset + offset++] = 'r'; continue; + case '\t': array[arrayOffset + offset++] = '\\'; array[arrayOffset + offset++] = 't'; continue; + } + + if (c < 0x20) { + // Control character – use \ u00XX + array[arrayOffset + offset++] = '\\'; + array[arrayOffset + offset++] = 'u'; + array[arrayOffset + offset++] = '0'; + array[arrayOffset + offset++] = '0'; + array[arrayOffset + offset++] = HEX[c >> 4]; + array[arrayOffset + offset++] = HEX[c & 0xF]; + } else { + // Proper UTF-8 encoding for extended characters + offset = encodeUtf8(c, value, i, offset); + } + } + if (quoted) array[arrayOffset + offset++] = '"'; + buffer.position(offset); + } + + private int encodeUtf8(char c, String value, int i, int offset) { + if (c < 0x80) { + array[arrayOffset + offset++] = (byte) c; + } else if (c < 0x800) { + array[arrayOffset + offset++] = (byte) (0b11000000 | (c >> 6)); + array[arrayOffset + offset++] = (byte) (0b10000000 | (c & 0b00111111)); + } else if (Character.isSurrogate(c)) { + int cp = Character.toCodePoint(c, value.charAt(++i)); + array[arrayOffset + offset++] = (byte) (0b11110000 | (cp >> 18)); + array[arrayOffset + offset++] = (byte) (0b10000000 | ((cp >> 12) & 0b00111111)); + array[arrayOffset + offset++] = (byte) (0b10000000 | ((cp >> 6) & 0b00111111)); + array[arrayOffset + offset++] = (byte) (0b10000000 | (cp & 0b00111111)); + } else { + array[arrayOffset + offset++] = (byte) (0b11100000 | (c >> 12)); + array[arrayOffset + offset++] = (byte) (0b10000000 | ((c >> 6) & 0b00111111)); + array[arrayOffset + offset++] = (byte) (0b10000000 | (c & 0b00111111)); + } + return offset; + } + + @Override + public void writeJsonLong(final long value, boolean quoted) { + int offset = buffer.position(); + validateCanWrite(20); // Worst-case scenario for a long value, quoted or not + final int baseOffset = BYTE_ARRAY_BASE_OFFSET + arrayOffset; + if (quoted) UNSAFE.putByte(array, baseOffset + offset++, QUOTE); + // Handle zero explicitly + if (value == 0) { + UNSAFE.putByte(array, baseOffset + offset++, _0); + } else if (value == Long.MIN_VALUE) { + // Special case for Long.MIN_VALUE(-9223372036854775808) to avoid overflow + UNSAFE.copyMemory(MIN_LONG_VALUE, BYTE_ARRAY_BASE_OFFSET, array, baseOffset + offset, MIN_LONG_VALUE.length); + offset += MIN_LONG_VALUE.length; + } else { + long v = value; + if (v < 0) { + UNSAFE.putByte(array, baseOffset + offset++, MINUS); + v = -v; + } + // count the number of digits in the long value, assumes all values are positive + // Fast digit count calculation + final int digitCount = (v < 10L) ? 1 : + (v < 100L) ? 2 : + (v < 1000L) ? 3 : + (v < 10000L) ? 4 : + (v < 100000L) ? 5 : + (v < 1000000L) ? 6 : + (v < 10000000L) ? 7 : + (v < 100000000L) ? 8 : + (v < 1000000000L) ? 9 : + (v < 10000000000L) ? 10 : + (v < 100000000000L) ? 11 : + (v < 1000000000000L) ? 12 : + (v < 10000000000000L) ? 13 : + (v < 100000000000000L) ? 14 : + (v < 1000000000000000L) ? 15 : + (v < 10000000000000000L) ? 16 : + (v < 100000000000000000L) ? 17 : + (v < 1000000000000000000L) ? 18 : 19; + // Now write them in reverse order + long tmp = v; + for (int i = digitCount-1; i >= 0; i--) { + UNSAFE.putByte(array, baseOffset + offset + i, (byte) ('0' + (tmp % 10))); + tmp /= 10; + } + offset += digitCount; + } + if (quoted) UNSAFE.putByte(array, baseOffset + offset++, QUOTE); + buffer.position(offset); + } + + @Override + public void writeBase64(@NonNull Bytes dataToEncode) { + final int offset = buffer.position(); + final int writenBytes = dataToEncode.writeBase64To(array, offset); + buffer.position(offset + writenBytes); + } +} From 51163954ed5101dcacae61a3ee685c85f3eaa1e5 Mon Sep 17 00:00:00 2001 From: Jasper Potts <1466205+jasperpotts@users.noreply.github.com> Date: Wed, 28 May 2025 16:15:15 -0700 Subject: [PATCH 4/5] Added unsafe Signed-off-by: Jasper Potts <1466205+jasperpotts@users.noreply.github.com> --- .../com/hedera/pbj/runtime/io/buffer/BufferedData.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/BufferedData.java b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/BufferedData.java index 43929d5b..b7d0fc2d 100644 --- a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/BufferedData.java +++ b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/BufferedData.java @@ -27,7 +27,7 @@ */ public sealed class BufferedData implements BufferedSequentialData, ReadableSequentialData, WritableSequentialData, RandomAccessData - permits ByteArrayBufferedData, DirectBufferedData { + permits ByteArrayBufferedData, DirectBufferedData, UnsafeByteArrayBufferedData { /** Single instance of an empty buffer we can use anywhere we need an empty read only buffer */ @SuppressWarnings("unused") @@ -68,7 +68,7 @@ protected BufferedData(@NonNull final ByteBuffer buffer) { @NonNull public static BufferedData wrap(@NonNull final ByteBuffer buffer) { if (buffer.hasArray()) { - return new ByteArrayBufferedData(buffer); + return new UnsafeByteArrayBufferedData(buffer); } else if (buffer.isDirect()) { return new DirectBufferedData(buffer); } else { @@ -88,7 +88,7 @@ public static BufferedData wrap(@NonNull final ByteBuffer buffer) { */ @NonNull public static BufferedData wrap(@NonNull final byte[] array) { - return new ByteArrayBufferedData(ByteBuffer.wrap(array)); + return new UnsafeByteArrayBufferedData(ByteBuffer.wrap(array)); } /** @@ -104,7 +104,7 @@ public static BufferedData wrap(@NonNull final byte[] array) { */ @NonNull public static BufferedData wrap(@NonNull final byte[] array, final int offset, final int len) { - return new ByteArrayBufferedData(ByteBuffer.wrap(array, offset, len)); + return new UnsafeByteArrayBufferedData(ByteBuffer.wrap(array, offset, len)); } /** @@ -115,7 +115,7 @@ public static BufferedData wrap(@NonNull final byte[] array, final int offset, f */ @NonNull public static BufferedData allocate(final int size) { - return new ByteArrayBufferedData(ByteBuffer.allocate(size)); + return new UnsafeByteArrayBufferedData(ByteBuffer.allocate(size)); } /** From e98f096fdf7d0e3638cb4e4706e534b4c3c956b8 Mon Sep 17 00:00:00 2001 From: Josh Marinacci <276938+joshmarinacci@users.noreply.github.com> Date: Tue, 2 Sep 2025 09:42:10 -0700 Subject: [PATCH 5/5] more JSON tests (#583) Signed-off-by: Josh Marinacci --- .../com/hedera/pbj/grpc/helidon/PbjTest.java | 1 + .../buffer/UnsafeByteArrayBufferedData.java | 3 + .../hedera/pbj/runtime/json/JsonLexer.java | 4 +- .../pbj/runtime/json/JsonLexerTest.java | 57 ++++++++++++++++++- 4 files changed, 61 insertions(+), 4 deletions(-) diff --git a/pbj-core/pbj-grpc-helidon/src/test/java/com/hedera/pbj/grpc/helidon/PbjTest.java b/pbj-core/pbj-grpc-helidon/src/test/java/com/hedera/pbj/grpc/helidon/PbjTest.java index 9dd48df6..08200a75 100644 --- a/pbj-core/pbj-grpc-helidon/src/test/java/com/hedera/pbj/grpc/helidon/PbjTest.java +++ b/pbj-core/pbj-grpc-helidon/src/test/java/com/hedera/pbj/grpc/helidon/PbjTest.java @@ -56,6 +56,7 @@ import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.ValueSource; +@SuppressWarnings("dangling-doc-comments") class PbjTest { private static final MediaType APPLICATION_GRPC_PROTO = HttpMediaType.create("application/grpc+proto"); private static final MediaType APPLICATION_GRPC_JSON = HttpMediaType.create("application/grpc+json"); diff --git a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/UnsafeByteArrayBufferedData.java b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/UnsafeByteArrayBufferedData.java index 20fb3a7b..78509ec0 100644 --- a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/UnsafeByteArrayBufferedData.java +++ b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/UnsafeByteArrayBufferedData.java @@ -347,6 +347,9 @@ public void writeByte4(final byte b1, final byte b2, final byte b3, final byte b public void writeBytes(@NonNull final byte[] src, final int offset, final int len) { validateLen(len); validateCanWrite(len); + if (offset < 0) { + throw new IndexOutOfBoundsException("Negative offset"); + } if (src.length < offset + len) { throw new IndexOutOfBoundsException("Source array is too short for the specified offset and length"); } diff --git a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/json/JsonLexer.java b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/json/JsonLexer.java index 68ca4f24..7ea84eb1 100644 --- a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/json/JsonLexer.java +++ b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/json/JsonLexer.java @@ -68,7 +68,9 @@ * google.protobuf.Value for details. * NullValuenullJSON null * Emptyobject{}An empty JSON object - * + * + * Table from Protobuf Docs of JSON Mapping of PB types + * */ public final class JsonLexer { diff --git a/pbj-core/pbj-runtime/src/test/java/com/hedera/pbj/runtime/json/JsonLexerTest.java b/pbj-core/pbj-runtime/src/test/java/com/hedera/pbj/runtime/json/JsonLexerTest.java index 0bdf12ce..1c2f7923 100644 --- a/pbj-core/pbj-runtime/src/test/java/com/hedera/pbj/runtime/json/JsonLexerTest.java +++ b/pbj-core/pbj-runtime/src/test/java/com/hedera/pbj/runtime/json/JsonLexerTest.java @@ -9,9 +9,9 @@ import java.nio.charset.StandardCharsets; import java.text.DecimalFormat; import java.util.Base64; + import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.CsvSource; import org.junit.jupiter.params.provider.ValueSource; public class JsonLexerTest { @@ -71,7 +71,8 @@ void booleanTest() throws Exception { String json = """ { "trueValue": true, - "falseValue": false + "falseValue": false, + "trueValue2": "true" } """; JsonLexer lexer = new JsonLexer(readableSequentialData(json)); @@ -89,6 +90,13 @@ void booleanTest() throws Exception { lexer.consumeColon(); boolean falseValue = lexer.readBoolean(); assertFalse(falseValue); + lexer.consumeComma(); + + String fieldName3 = lexer.readString(); + assertEquals("trueValue2", fieldName3); + lexer.consumeColon(); + boolean trueValue2 = lexer.readBoolean(); + assertTrue(trueValue2); lexer.closeObject(); } @@ -190,7 +198,8 @@ void enumTest() throws Exception { String json = """ { "enumByName": "SECOND", - "enumByOrdinal": 2 + "enumByOrdinal": 2, + "enumByZero": 0 } """; @@ -209,6 +218,13 @@ void enumTest() throws Exception { lexer.consumeColon(); TestEnum enumValue2 = lexer.readEnum(TestEnum.class); assertEquals(TestEnum.THIRD, enumValue2); + lexer.consumeComma(); + + String fieldName3 = lexer.readString(); + assertEquals("enumByZero", fieldName3); + lexer.consumeColon(); + TestEnum enumValue3 = lexer.readEnum(TestEnum.class); + assertEquals(TestEnum.FIRST,enumValue3); lexer.closeObject(); } @@ -349,6 +365,41 @@ void parseExceptionTest() { assertTrue(exception.getMessage().contains("Expected")); } + @Test + void parseExceptionReadBoolean() throws ParseException { + assertThrows(ParseException.class, () -> new JsonLexer(readableSequentialData("bad")).readBoolean()); + } + + @Test + void parseExceptionReadBytes() throws ParseException { + // missing the starting quote + assertThrows(ParseException.class, () -> new JsonLexer(readableSequentialData("somebaddata")).readBytes()); + } + + @Test + void parseExceptionNextFieldOrClose() throws ParseException { + assertThrows(ParseException.class, () -> new JsonLexer(readableSequentialData(".")).nextFieldOrClose()); + } + @Test + void consumeWrongCharacters() throws ParseException { + assertThrows(ParseException.class, () -> new JsonLexer(readableSequentialData(".")).consumeComma()); + assertThrows(ParseException.class, () -> new JsonLexer(readableSequentialData(".")).consumeColon()); + assertThrows(ParseException.class, () -> new JsonLexer(readableSequentialData("[")).openObject()); + assertThrows(ParseException.class, () -> new JsonLexer(readableSequentialData("]")).closeObject()); + assertThrows(ParseException.class, () -> new JsonLexer(readableSequentialData("{")).openArray()); + } + + + @Test + void parseExceptionEnumErrors() throws ParseException { + // check for non digit + assertThrows(ParseException.class, () -> new JsonLexer(readableSequentialData("x,")).readEnum(TestEnum.class)); + // 4 is a digit but TestEnum only ahs three ordinals + assertThrows(ParseException.class, () -> new JsonLexer(readableSequentialData("4,")).readEnum(TestEnum.class)); + // FOURTH is not a valid value of TestEnum + assertThrows(ParseException.class, () -> new JsonLexer(readableSequentialData("\"FOURTH\",")).readEnum(TestEnum.class)); + } + private ReadableSequentialData readableSequentialData(String json) { return BufferedData.wrap(json.getBytes(StandardCharsets.UTF_8)); }