Skip to content

Commit

Permalink
Add an optional extended parser subclass (`YAMLAnchorReplayingFactory…
Browse files Browse the repository at this point in the history
…`) able to inline anchors (#502)
  • Loading branch information
HeikoBoettger-KarlStorz authored Dec 21, 2024
1 parent 2c26e1e commit 95a4300
Show file tree
Hide file tree
Showing 6 changed files with 647 additions and 3 deletions.
3 changes: 3 additions & 0 deletions release-notes/CREDITS-2.x
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,9 @@ Heiko Boettger (@HeikoBoettger)

* Contributed #482: (yaml) Allow passing `ParserImpl` by a subclass or overwrite the events
(2.18.0)
* Contributed #502: (yaml) Add an optional extended parser subclass (`YAMLAnchorReplayingFactory`)
able to inline anchors
(2.19.0)

Burdyug Pavel (@Pavel38l)

Expand Down
4 changes: 3 additions & 1 deletion release-notes/VERSION-2.x
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ Active Maintainers:

2.19.0 (not yet released)

-
#502: Add an optional extended parser subclass (`YAMLAnchorReplayingFactory`)
able to inline anchors
(contributed by Heiko B)

2.18.2 (27-Nov-2024)

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
package com.fasterxml.jackson.dataformat.yaml;

import java.io.CharArrayReader;
import java.io.InputStream;
import java.io.IOException;
import java.io.Reader;

import com.fasterxml.jackson.core.JsonEncoding;
import com.fasterxml.jackson.core.ObjectCodec;
import com.fasterxml.jackson.core.io.IOContext;

/**
* A subclass of YAMLFactory with the only purpose to replace the YAMLParser by
* the YAMLAnchorReplayingParser subclass.
*
* @since 2.19
*/
public class YAMLAnchorReplayingFactory extends YAMLFactory {
private static final long serialVersionUID = 1L;

public YAMLAnchorReplayingFactory() {
super();
}

public YAMLAnchorReplayingFactory(ObjectCodec oc) {
super(oc);
}

public YAMLAnchorReplayingFactory(YAMLFactory src, ObjectCodec oc) {
super(src, oc);
}

protected YAMLAnchorReplayingFactory(YAMLFactoryBuilder b) {
super(b);
}

@Override
public YAMLAnchorReplayingFactory copy() {
_checkInvalidCopy(YAMLAnchorReplayingFactory.class);
return new YAMLAnchorReplayingFactory(this, (ObjectCodec) null);
}

@Override
protected Object readResolve() {
return new YAMLAnchorReplayingFactory(this, _objectCodec);
}

@Override
protected YAMLParser _createParser(InputStream input, IOContext ctxt) throws IOException {
return new YAMLAnchorReplayingParser(ctxt, _parserFeatures, _yamlParserFeatures,
_loaderOptions, _objectCodec,
_createReader(input, (JsonEncoding) null, ctxt));
}

@Override
protected YAMLParser _createParser(Reader r, IOContext ctxt) throws IOException {
return new YAMLAnchorReplayingParser(ctxt, _parserFeatures, _yamlParserFeatures,
_loaderOptions, _objectCodec, r);
}

@Override
protected YAMLParser _createParser(char[] data, int offset, int len, IOContext ctxt, boolean recyclable) throws IOException {
return new YAMLAnchorReplayingParser(ctxt, _parserFeatures, _yamlParserFeatures,
_loaderOptions, _objectCodec, new CharArrayReader(data, offset, len));
}

@Override
protected YAMLParser _createParser(byte[] data, int offset, int len, IOContext ctxt) throws IOException {
return new YAMLAnchorReplayingParser(ctxt, _parserFeatures, _yamlParserFeatures,
_loaderOptions, _objectCodec, _createReader(data, offset, len, (JsonEncoding) null, ctxt));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
package com.fasterxml.jackson.dataformat.yaml;

import java.io.Reader;
import java.io.IOException;

import java.util.*;

import org.yaml.snakeyaml.LoaderOptions;
import org.yaml.snakeyaml.events.*;

import com.fasterxml.jackson.core.JsonParseException;
import com.fasterxml.jackson.core.ObjectCodec;
import com.fasterxml.jackson.core.exc.StreamConstraintsException;
import com.fasterxml.jackson.core.io.IOContext;

/**
* A parser that remembers the events of anchored parts in yaml and repeats them
* to inline these parts when an alias if found instead of only returning an alias.
*<p>
* Note: this overwrites the getEvent() since the base `super.nextToken()` manages to much state and
* it seems to be much simpler to re-emit the events.
*
* @since 2.19
*/
public class YAMLAnchorReplayingParser extends YAMLParser
{
private static class AnchorContext {
public final String anchor;
public final List<Event> events = new ArrayList<>();
public int depth = 1;

public AnchorContext(String anchor) {
this.anchor = anchor;
}
}

/**
* the maximum number of events that can be replayed
*/
public static final int MAX_EVENTS = 9999;

/**
* the maximum limit of anchors to remember
*/
public static final int MAX_ANCHORS = 9999;

/**
* the maximum limit of merges to follow
*/
public static final int MAX_MERGES = 9999;

/**
* the maximum limit of references to remember
*/
public static final int MAX_REFS = 9999;

/**
* Remembers when a merge has been started in order to skip the corresponding
* sequence end which needs to be excluded
*/
private final ArrayDeque<Integer> mergeStack = new ArrayDeque<>();

/**
* Collects nested anchor definitions
*/
private final ArrayDeque<AnchorContext> tokenStack = new ArrayDeque<>();

/**
* Keeps track of the last sequentially found definition of each anchor
*/
private final Map<String, List<Event>> referencedObjects = new HashMap<>();

/**
* Keeps track of events that have been insert when processing alias
*/
private final ArrayDeque<Event> refEvents = new ArrayDeque<>();

/**
* keeps track of the global depth of nested collections
*/
private int globalDepth = 0;

public YAMLAnchorReplayingParser(IOContext ctxt, int parserFeatures, int formatFeatures, LoaderOptions loaderOptions, ObjectCodec codec, Reader reader) {
super(ctxt, parserFeatures, formatFeatures, loaderOptions, codec, reader);
}

private void finishContext(AnchorContext context) throws StreamConstraintsException {
if (referencedObjects.size() + 1 > MAX_REFS) throw new StreamConstraintsException("too many references in the document");
referencedObjects.put(context.anchor, context.events);
if (!tokenStack.isEmpty()) {
List<Event> events = tokenStack.peek().events;
if (events.size() + context.events.size() > MAX_EVENTS) throw new StreamConstraintsException("too many events to replay");
events.addAll(context.events);
}
}

protected Event trackDepth(Event event) {
if (event instanceof CollectionStartEvent) {
++globalDepth;
} else if (event instanceof CollectionEndEvent) {
--globalDepth;
}
return event;
}

protected Event filterEvent(Event event) {
if (event instanceof MappingEndEvent) {
if (!mergeStack.isEmpty()) {
if (mergeStack.peek() > globalDepth) {
mergeStack.pop();
return null;
}
}
}
return event;
}

@Override
protected Event getEvent() throws IOException {
while(!refEvents.isEmpty()) {
Event event = filterEvent(trackDepth(refEvents.removeFirst()));
if (event != null) return event;
}

Event event = null;
while (event == null) {
event = trackDepth(super.getEvent());
if (event == null) return null;
event = filterEvent(event);
}

if (event instanceof AliasEvent) {
AliasEvent alias = (AliasEvent) event;
List<Event> events = referencedObjects.get(alias.getAnchor());
if (events != null) {
if (refEvents.size() + events.size() > MAX_EVENTS) throw new StreamConstraintsException("too many events to replay");
refEvents.addAll(events);
return refEvents.removeFirst();
}
throw new JsonParseException("invalid alias " + alias.getAnchor());
}

if (event instanceof NodeEvent) {
String anchor = ((NodeEvent) event).getAnchor();
if (anchor != null) {
AnchorContext context = new AnchorContext(anchor);
context.events.add(event);
if (event instanceof CollectionStartEvent) {
if (tokenStack.size() + 1 > MAX_ANCHORS) throw new StreamConstraintsException("too many anchors in the document");
tokenStack.push(context);
} else {
// directly store it
finishContext(context);
}
return event;
}
}

if (event instanceof ScalarEvent) {
ScalarEvent scalarEvent = (ScalarEvent) event;
if (scalarEvent.getValue().equals( "<<")) {
// expect next node to be a map
Event next = getEvent();
if (next instanceof MappingStartEvent) {
if (mergeStack.size() + 1 > MAX_MERGES) throw new StreamConstraintsException("too many merges in the document");
mergeStack.push(globalDepth);
return getEvent();
}
throw new JsonParseException("found field '<<' but value isn't a map");
}
}

if (!tokenStack.isEmpty()) {
AnchorContext context = tokenStack.peek();
if (context.events.size() + 1 > MAX_EVENTS) throw new StreamConstraintsException("too many events to replay");
context.events.add(event);
if (event instanceof CollectionStartEvent) {
++context.depth;
} else if (event instanceof CollectionEndEvent) {
--context.depth;
if (context.depth == 0) {
tokenStack.pop();
finishContext(context);
}
}
}
return event;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -580,13 +580,16 @@ public JsonToken nextToken() throws IOException
/**
* Since the parserImpl cannot be replaced allow subclasses to at least be able to
* influence the events being consumed.
*
*<p>
* A particular use case is working around the lack of anchor and alias support to
* emit additional events.
*<p>
* NOTE: since 2.18, declared to throw {@link IOException} to allow sub-classes
* to do so.
*
* @since 2.18
*/
protected Event getEvent() {
protected Event getEvent() throws IOException {
return _yamlParser.getEvent();
}

Expand Down
Loading

0 comments on commit 95a4300

Please sign in to comment.