Remove duplicate Symbols caused by a Label

mpsijm · mpsijm · commit 877f2f75e82c · 2019-11-04T16:59:13.000+01:00
**The Problem:** In `NormGrammarReader.processSymbol`, a cache is maintained for symbols that are already processed. However, this cache did not take into account that Symbols with and without a Label should be treated as equal (e.g. `tail:Stm` and `Stm`, see org.spoofax.jsglr2.integration/src/main/resources/grammars/layout-sensitive.sdf3). These labels are only used to reference the symbols from the layout constraints, but do not actually contribute to the meaning of a production. **Why is this a problem?** For SLR parse table generation (see metaborg#27), all grammar symbols are assumed to be unique. However, because Symbols with or without a Label were not treated as equal, the "graph" used to calculate the First- and Follow-sets became disconnected. This in turn caused some symbols to have empty First-/Follow-sets. The productions belonging to these symbols would not get any reduce actions in the parse table, because if the Follow-set is empty, no lookahead is allowed. **Current solution:** When processing symbols in the `NormGrammarReader`, any Label constructor is removed from the symbol term. **Discussion:** I have tried looking around where these labels are used (I am assuming layout constraints, but I couldn't find how they are processed in NormGrammarReader). Depending on their usage, it might be a good idea to apply this filter in Stratego already, e.g. in the strategy `module-to-normal-form`. @udesou any feedback is appreciated, as always 🙂
diff --git a/org.metaborg.sdf2table/src/main/java/org/metaborg/sdf2table/io/NormGrammarReader.java b/org.metaborg.sdf2table/src/main/java/org/metaborg/sdf2table/io/NormGrammarReader.java
@@ -3,50 +3,14 @@
 import java.io.File;
 import java.io.FileReader;
 import java.io.IOException;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
+import java.util.*;
 
 import org.metaborg.parsetable.characterclasses.CharacterClassFactory;
 import org.metaborg.parsetable.characterclasses.ICharacterClass;
 import org.metaborg.sdf2table.exceptions.ModuleNotFoundException;
 import org.metaborg.sdf2table.exceptions.UnexpectedTermException;
-import org.metaborg.sdf2table.grammar.AltSymbol;
-import org.metaborg.sdf2table.grammar.CharacterClassSymbol;
-import org.metaborg.sdf2table.grammar.ConstructorAttribute;
-import org.metaborg.sdf2table.grammar.ContextFreeSymbol;
-import org.metaborg.sdf2table.grammar.DeprecatedAttribute;
-import org.metaborg.sdf2table.grammar.FileStartSymbol;
-import org.metaborg.sdf2table.grammar.GeneralAttribute;
-import org.metaborg.sdf2table.grammar.IAttribute;
-import org.metaborg.sdf2table.grammar.ISymbol;
-import org.metaborg.sdf2table.grammar.IterSepSymbol;
-import org.metaborg.sdf2table.grammar.IterStarSepSymbol;
-import org.metaborg.sdf2table.grammar.IterStarSymbol;
-import org.metaborg.sdf2table.grammar.IterSymbol;
-import org.metaborg.sdf2table.grammar.Layout;
-import org.metaborg.sdf2table.grammar.LayoutConstraintAttribute;
-import org.metaborg.sdf2table.grammar.LexicalSymbol;
-import org.metaborg.sdf2table.grammar.LiteralType;
-import org.metaborg.sdf2table.grammar.NormGrammar;
-import org.metaborg.sdf2table.grammar.OptionalSymbol;
-import org.metaborg.sdf2table.grammar.Priority;
-import org.metaborg.sdf2table.grammar.Production;
-import org.metaborg.sdf2table.grammar.ProductionReference;
-import org.metaborg.sdf2table.grammar.SequenceSymbol;
-import org.metaborg.sdf2table.grammar.Sort;
-import org.metaborg.sdf2table.grammar.StartSymbol;
-import org.metaborg.sdf2table.grammar.Symbol;
-import org.metaborg.sdf2table.grammar.TermAttribute;
-import org.metaborg.sdf2table.grammar.UniqueProduction;
-import org.spoofax.interpreter.terms.IStrategoAppl;
-import org.spoofax.interpreter.terms.IStrategoList;
-import org.spoofax.interpreter.terms.IStrategoString;
-import org.spoofax.interpreter.terms.IStrategoTerm;
-import org.spoofax.interpreter.terms.ITermFactory;
+import org.metaborg.sdf2table.grammar.*;
+import org.spoofax.interpreter.terms.*;
 import org.spoofax.terms.StrategoAppl;
 import org.spoofax.terms.StrategoList;
 import org.spoofax.terms.StrategoString;
@@ -115,7 +79,7 @@ private void normalizeIndexedPriorities() {
             for(Integer arg : grammar.priorities().get(p)) {
                 if(arg != -1 && arg != Integer.MIN_VALUE && arg != Integer.MAX_VALUE) {
                     grammar.getIndexedPriorities().put(p, arg);
-                } 
+                }
             }
         }
 
@@ -389,6 +353,7 @@ private Symbol processSymbol(IStrategoTerm term) {
         Symbol symbol = null;
         String enquoted;
 
+        term = removeLabelFromSymbolTerm(term);
         symbol = grammar.getCacheSymbolsRead().get(term.toString());
 
         if(symbol != null) {
@@ -453,9 +418,6 @@ private Symbol processSymbol(IStrategoTerm term) {
                 case "FileStart":
                     symbol = new FileStartSymbol();
                     break;
-                case "Label":
-                    symbol = processSymbol(app.getSubterm(1));
-                    break;
                 default:
                     System.err.println("Unknown symbol type `" + app.getName() + "'. Is that normalized SDF3?");
                     return null;
@@ -465,14 +427,38 @@ private Symbol processSymbol(IStrategoTerm term) {
             return null;
         }
 
-        if(grammar != null && symbol != null) {
-            grammar.getCacheSymbolsRead().put(term.toString(), symbol);
-        }
-
+        grammar.getCacheSymbolsRead().put(term.toString(), symbol);
         grammar.getSymbols().add(symbol);
         return symbol;
     }
 
+    private IStrategoTerm removeLabelFromSymbolTerm(IStrategoTerm term) {
+        if(!(term instanceof StrategoAppl))
+            return term;
+        StrategoAppl app = (StrategoAppl) term;
+
+        if(app.getName().equals("Label")) {
+            return removeLabelFromSymbolTerm(app.getSubterm(1));
+        }
+
+        IStrategoTerm[] children = app.getAllSubterms();
+        boolean changed = false;
+        for(int i = 0; i < children.length; i++) {
+            IStrategoTerm newChild = removeLabelFromSymbolTerm(children[i]);
+            if(newChild != children[i]) {
+                changed = true;
+                children[i] = newChild;
+            }
+        }
+
+        // Only create a new Stratego term if the children have changed.
+        if(changed) {
+            return new StrategoAppl(app.getConstructor(), children, app.getAnnotations(), app.getStorageType());
+        }
+        // If there is no `Label` subterm, the full term is reused.
+        return term;
+    }
+
     public List<Symbol> processSymbolList(IStrategoTerm term) {
         List<Symbol> list = Lists.newLinkedList();