You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1095 lines
46 KiB
1095 lines
46 KiB
package com.fr.third.antlr; |
|
|
|
/* ANTLR Translator Generator |
|
* Project led by Terence Parr at http://www.cs.usfca.edu |
|
* Software rights: http://www.antlr.org/license.html |
|
* |
|
* $Id: //depot/code/org.antlr/release/antlr-2.7.7/antlr/LLkAnalyzer.java#2 $ |
|
*/ |
|
|
|
import com.fr.third.antlr.collections.impl.BitSet; |
|
import com.fr.third.antlr.collections.impl.Vector; |
|
|
|
/**A linear-approximate LL(k) grammar analzyer. |
|
* |
|
* All lookahead elements are sets of token types. |
|
* |
|
* @author Terence Parr, John Lilley |
|
* @see com.fr.third.antlr.Grammar |
|
* @see com.fr.third.antlr.Lookahead |
|
*/ |
|
public class LLkAnalyzer implements LLkGrammarAnalyzer { |
|
// Set "analyzerDebug" to true |
|
public boolean DEBUG_ANALYZER = false; |
|
private AlternativeBlock currentBlock; |
|
protected Tool tool = null; |
|
protected Grammar grammar = null; |
|
// True if analyzing a lexical grammar |
|
protected boolean lexicalAnalysis = false; |
|
// Used for formatting bit sets in default (Java) format |
|
CharFormatter charFormatter = new JavaCharFormatter(); |
|
|
|
/** Create an LLk analyzer */ |
|
public LLkAnalyzer(Tool tool_) { |
|
tool = tool_; |
|
} |
|
|
|
/** Return true if someone used the '.' wildcard default idiom. |
|
* Either #(. children) or '.' as an alt by itself. |
|
*/ |
|
protected boolean altUsesWildcardDefault(Alternative alt) { |
|
AlternativeElement head = alt.head; |
|
// if element is #(. blah) then check to see if el is root |
|
if (head instanceof TreeElement && |
|
((TreeElement)head).root instanceof WildcardElement) { |
|
return true; |
|
} |
|
if (head instanceof WildcardElement && head.next instanceof BlockEndElement) { |
|
return true; |
|
} |
|
return false; |
|
} |
|
|
|
/**Is this block of alternatives LL(k)? Fill in alternative cache for this block. |
|
* @return true if the block is deterministic |
|
*/ |
|
public boolean deterministic(AlternativeBlock blk) { |
|
/** The lookahead depth for this decision */ |
|
int k = 1; // start at k=1 |
|
if (DEBUG_ANALYZER) System.out.println("deterministic(" + blk + ")"); |
|
boolean det = true; |
|
int nalts = blk.alternatives.size(); |
|
AlternativeBlock saveCurrentBlock = currentBlock; |
|
Alternative wildcardAlt = null; |
|
currentBlock = blk; |
|
|
|
/* don't allow nongreedy (...) blocks */ |
|
if (blk.greedy == false && !(blk instanceof OneOrMoreBlock) && !(blk instanceof ZeroOrMoreBlock)) { |
|
tool.warning("Being nongreedy only makes sense for (...)+ and (...)*", grammar.getFilename(), blk.getLine(), blk.getColumn()); |
|
} |
|
|
|
// SPECIAL CASE: only one alternative. We don't need to check the |
|
// determinism, but other code expects the lookahead cache to be |
|
// set for the single alt. |
|
if (nalts == 1) { |
|
AlternativeElement e = blk.getAlternativeAt(0).head; |
|
currentBlock.alti = 0; |
|
blk.getAlternativeAt(0).cache[1] = e.look(1); |
|
blk.getAlternativeAt(0).lookaheadDepth = 1; // set lookahead to LL(1) |
|
currentBlock = saveCurrentBlock; |
|
return true; // always deterministic for one alt |
|
} |
|
|
|
// outer: |
|
for (int i = 0; i < nalts - 1; i++) { |
|
currentBlock.alti = i; |
|
currentBlock.analysisAlt = i; // which alt are we analyzing? |
|
currentBlock.altj = i + 1; // reset this alt. Haven't computed yet, |
|
// but we need the alt number. |
|
// inner: |
|
// compare against other alternatives with lookahead depth k |
|
for (int j = i + 1; j < nalts; j++) { |
|
currentBlock.altj = j; |
|
if (DEBUG_ANALYZER) System.out.println("comparing " + i + " against alt " + j); |
|
currentBlock.analysisAlt = j; // which alt are we analyzing? |
|
k = 1; // always attempt minimum lookahead possible. |
|
|
|
// check to see if there is a lookahead depth that distinguishes |
|
// between alternatives i and j. |
|
Lookahead[] r = new Lookahead[grammar.maxk + 1]; |
|
boolean haveAmbiguity; |
|
do { |
|
haveAmbiguity = false; |
|
if (DEBUG_ANALYZER) System.out.println("checking depth " + k + "<=" + grammar.maxk); |
|
Lookahead p,q; |
|
p = getAltLookahead(blk, i, k); |
|
q = getAltLookahead(blk, j, k); |
|
|
|
// compare LOOK(alt i) with LOOK(alt j). Is there an intersection? |
|
// Lookahead must be disjoint. |
|
if (DEBUG_ANALYZER) System.out.println("p is " + p.toString(",", charFormatter, grammar)); |
|
if (DEBUG_ANALYZER) System.out.println("q is " + q.toString(",", charFormatter, grammar)); |
|
// r[i] = p.fset.and(q.fset); |
|
r[k] = p.intersection(q); |
|
if (DEBUG_ANALYZER) System.out.println("intersection at depth " + k + " is " + r[k].toString()); |
|
if (!r[k].nil()) { |
|
haveAmbiguity = true; |
|
k++; |
|
} |
|
// go until no more lookahead to use or no intersection |
|
} while (haveAmbiguity && k <= grammar.maxk); |
|
|
|
Alternative ai = blk.getAlternativeAt(i); |
|
Alternative aj = blk.getAlternativeAt(j); |
|
if (haveAmbiguity) { |
|
det = false; |
|
ai.lookaheadDepth = NONDETERMINISTIC; |
|
aj.lookaheadDepth = NONDETERMINISTIC; |
|
|
|
/* if ith alt starts with a syntactic predicate, computing the |
|
* lookahead is still done for code generation, but messages |
|
* should not be generated when comparing against alt j. |
|
* Alternatives with syn preds that are unnecessary do |
|
* not result in syn pred try-blocks. |
|
*/ |
|
if (ai.synPred != null) { |
|
if (DEBUG_ANALYZER) { |
|
System.out.println("alt " + i + " has a syn pred"); |
|
} |
|
// The alt with the (...)=> block is nondeterministic for sure. |
|
// If the (...)=> conflicts with alt j, j is nondeterministic. |
|
// This prevents alt j from being in any switch statements. |
|
// move on to next alternative=>no possible ambiguity! |
|
// continue inner; |
|
} |
|
|
|
/* if ith alt starts with a semantic predicate, computing the |
|
* lookahead is still done for code generation, but messages |
|
* should not be generated when comparing against alt j. |
|
*/ |
|
else if (ai.semPred != null) { |
|
if (DEBUG_ANALYZER) { |
|
System.out.println("alt " + i + " has a sem pred"); |
|
} |
|
} |
|
|
|
/* if jth alt is exactly the wildcard or wildcard root of tree, |
|
* then remove elements from alt i lookahead from alt j's lookahead. |
|
* Don't do an ambiguity warning. |
|
*/ |
|
else if (altUsesWildcardDefault(aj)) { |
|
// System.out.println("removing pred sets"); |
|
// removeCompetingPredictionSetsFromWildcard(aj.cache, aj.head, grammar.maxk); |
|
wildcardAlt = aj; |
|
} |
|
|
|
/* If the user specified warnWhenFollowAmbig=false, then we |
|
* can turn off this warning IFF one of the alts is empty; |
|
* that is, it points immediately at the end block. |
|
*/ |
|
else if (!blk.warnWhenFollowAmbig && |
|
(ai.head instanceof BlockEndElement || |
|
aj.head instanceof BlockEndElement)) { |
|
// System.out.println("ai.head pts to "+ai.head.getClass()); |
|
// System.out.println("aj.head pts to "+aj.head.getClass()); |
|
} |
|
|
|
/* If they have the generateAmbigWarnings option off for the block |
|
* then don't generate a warning. |
|
*/ |
|
else if (!blk.generateAmbigWarnings) { |
|
} |
|
|
|
/* If greedy=true and *one* empty alt shut off warning. */ |
|
else if (blk.greedySet && blk.greedy && |
|
((ai.head instanceof BlockEndElement && |
|
!(aj.head instanceof BlockEndElement)) || |
|
(aj.head instanceof BlockEndElement && |
|
!(ai.head instanceof BlockEndElement)))) { |
|
// System.out.println("greedy set to true; one alt empty"); |
|
} |
|
|
|
|
|
/* We have no choice, but to report a nondetermism */ |
|
else { |
|
tool.errorHandler.warnAltAmbiguity( |
|
grammar, |
|
blk, // the block |
|
lexicalAnalysis, // true if lexical |
|
grammar.maxk, // depth of ambiguity |
|
r, // set of linear ambiguities |
|
i, // first ambiguous alternative |
|
j // second ambiguous alternative |
|
); |
|
} |
|
} |
|
else { |
|
// a lookahead depth, k, was found where i and j do not conflict |
|
ai.lookaheadDepth = Math.max(ai.lookaheadDepth, k); |
|
aj.lookaheadDepth = Math.max(aj.lookaheadDepth, k); |
|
} |
|
} |
|
} |
|
|
|
// finished with block. |
|
|
|
// If had wildcard default clause idiom, remove competing lookahead |
|
/* |
|
if ( wildcardAlt!=null ) { |
|
removeCompetingPredictionSetsFromWildcard(wildcardAlt.cache, wildcardAlt.head, grammar.maxk); |
|
} |
|
*/ |
|
|
|
currentBlock = saveCurrentBlock; |
|
return det; |
|
} |
|
|
|
/**Is (...)+ block LL(1)? Fill in alternative cache for this block. |
|
* @return true if the block is deterministic |
|
*/ |
|
public boolean deterministic(OneOrMoreBlock blk) { |
|
if (DEBUG_ANALYZER) System.out.println("deterministic(...)+(" + blk + ")"); |
|
AlternativeBlock saveCurrentBlock = currentBlock; |
|
currentBlock = blk; |
|
boolean blkOk = deterministic((AlternativeBlock)blk); |
|
// block has been checked, now check that what follows does not conflict |
|
// with the lookahead of the (...)+ block. |
|
boolean det = deterministicImpliedPath(blk); |
|
currentBlock = saveCurrentBlock; |
|
return det && blkOk; |
|
} |
|
|
|
/**Is (...)* block LL(1)? Fill in alternative cache for this block. |
|
* @return true if the block is deterministic |
|
*/ |
|
public boolean deterministic(ZeroOrMoreBlock blk) { |
|
if (DEBUG_ANALYZER) System.out.println("deterministic(...)*(" + blk + ")"); |
|
AlternativeBlock saveCurrentBlock = currentBlock; |
|
currentBlock = blk; |
|
boolean blkOk = deterministic((AlternativeBlock)blk); |
|
// block has been checked, now check that what follows does not conflict |
|
// with the lookahead of the (...)* block. |
|
boolean det = deterministicImpliedPath(blk); |
|
currentBlock = saveCurrentBlock; |
|
return det && blkOk; |
|
} |
|
|
|
/**Is this (...)* or (...)+ block LL(k)? |
|
* @return true if the block is deterministic |
|
*/ |
|
public boolean deterministicImpliedPath(BlockWithImpliedExitPath blk) { |
|
/** The lookahead depth for this decision considering implied exit path */ |
|
int k; |
|
boolean det = true; |
|
Vector alts = blk.getAlternatives(); |
|
int nalts = alts.size(); |
|
currentBlock.altj = -1; // comparing against implicit optional/exit alt |
|
|
|
if (DEBUG_ANALYZER) System.out.println("deterministicImpliedPath"); |
|
for (int i = 0; i < nalts; i++) { // check follow against all alts |
|
Alternative alt = blk.getAlternativeAt(i); |
|
|
|
if (alt.head instanceof BlockEndElement) { |
|
tool.warning("empty alternative makes no sense in (...)* or (...)+", grammar.getFilename(), blk.getLine(), blk.getColumn()); |
|
} |
|
|
|
k = 1; // assume eac alt is LL(1) with exit branch |
|
// check to see if there is a lookahead depth that distinguishes |
|
// between alternative i and the exit branch. |
|
Lookahead[] r = new Lookahead[grammar.maxk + 1]; |
|
boolean haveAmbiguity; |
|
do { |
|
haveAmbiguity = false; |
|
if (DEBUG_ANALYZER) System.out.println("checking depth " + k + "<=" + grammar.maxk); |
|
Lookahead p; |
|
Lookahead follow = blk.next.look(k); |
|
blk.exitCache[k] = follow; |
|
currentBlock.alti = i; |
|
p = getAltLookahead(blk, i, k); |
|
|
|
if (DEBUG_ANALYZER) System.out.println("follow is " + follow.toString(",", charFormatter, grammar)); |
|
if (DEBUG_ANALYZER) System.out.println("p is " + p.toString(",", charFormatter, grammar)); |
|
//r[k] = follow.fset.and(p.fset); |
|
r[k] = follow.intersection(p); |
|
if (DEBUG_ANALYZER) System.out.println("intersection at depth " + k + " is " + r[k]); |
|
if (!r[k].nil()) { |
|
haveAmbiguity = true; |
|
k++; |
|
} |
|
// go until no more lookahead to use or no intersection |
|
} while (haveAmbiguity && k <= grammar.maxk); |
|
|
|
if (haveAmbiguity) { |
|
det = false; |
|
alt.lookaheadDepth = NONDETERMINISTIC; |
|
blk.exitLookaheadDepth = NONDETERMINISTIC; |
|
Alternative ambigAlt = blk.getAlternativeAt(currentBlock.alti); |
|
|
|
/* If the user specified warnWhenFollowAmbig=false, then we |
|
* can turn off this warning. |
|
*/ |
|
if (!blk.warnWhenFollowAmbig) { |
|
} |
|
|
|
/* If they have the generateAmbigWarnings option off for the block |
|
* then don't generate a warning. |
|
*/ |
|
else if (!blk.generateAmbigWarnings) { |
|
} |
|
|
|
/* If greedy=true and alt not empty, shut off warning */ |
|
else if (blk.greedy == true && blk.greedySet && |
|
!(ambigAlt.head instanceof BlockEndElement)) { |
|
if (DEBUG_ANALYZER) System.out.println("greedy loop"); |
|
} |
|
|
|
/* If greedy=false then shut off warning...will have |
|
* to add "if FOLLOW break" |
|
* block during code gen to compensate for removal of warning. |
|
*/ |
|
else if (blk.greedy == false && |
|
!(ambigAlt.head instanceof BlockEndElement)) { |
|
if (DEBUG_ANALYZER) System.out.println("nongreedy loop"); |
|
// if FOLLOW not single k-string (|set[k]| can |
|
// be > 1 actually) then must warn them that |
|
// loop may terminate incorrectly. |
|
// For example, ('a'..'d')+ ("ad"|"cb") |
|
if (!lookaheadEquivForApproxAndFullAnalysis(blk.exitCache, grammar.maxk)) { |
|
tool.warning(new String[]{ |
|
"nongreedy block may exit incorrectly due", |
|
"\tto limitations of linear approximate lookahead (first k-1 sets", |
|
"\tin lookahead not singleton)."}, |
|
grammar.getFilename(), blk.getLine(), blk.getColumn()); |
|
} |
|
} |
|
|
|
// no choice but to generate a warning |
|
else { |
|
tool.errorHandler.warnAltExitAmbiguity( |
|
grammar, |
|
blk, // the block |
|
lexicalAnalysis, // true if lexical |
|
grammar.maxk, // depth of ambiguity |
|
r, // set of linear ambiguities |
|
i // ambiguous alternative |
|
); |
|
} |
|
} |
|
else { |
|
alt.lookaheadDepth = Math.max(alt.lookaheadDepth, k); |
|
blk.exitLookaheadDepth = Math.max(blk.exitLookaheadDepth, k); |
|
} |
|
} |
|
return det; |
|
} |
|
|
|
/**Compute the lookahead set of whatever follows references to |
|
* the rule associated witht the FOLLOW block. |
|
*/ |
|
public Lookahead FOLLOW(int k, RuleEndElement end) { |
|
// what rule are we trying to compute FOLLOW of? |
|
RuleBlock rb = (RuleBlock)end.block; |
|
// rule name is different in lexer |
|
String rule; |
|
if (lexicalAnalysis) { |
|
rule = CodeGenerator.encodeLexerRuleName(rb.getRuleName()); |
|
} |
|
else { |
|
rule = rb.getRuleName(); |
|
} |
|
|
|
if (DEBUG_ANALYZER) System.out.println("FOLLOW(" + k + "," + rule + ")"); |
|
|
|
// are we in the midst of computing this FOLLOW already? |
|
if (end.lock[k]) { |
|
if (DEBUG_ANALYZER) System.out.println("FOLLOW cycle to " + rule); |
|
return new Lookahead(rule); |
|
} |
|
|
|
// Check to see if there is cached value |
|
if (end.cache[k] != null) { |
|
if (DEBUG_ANALYZER) { |
|
System.out.println("cache entry FOLLOW(" + k + ") for " + rule + ": " + end.cache[k].toString(",", charFormatter, grammar)); |
|
} |
|
// if the cache is a complete computation then simply return entry |
|
if (end.cache[k].cycle == null) { |
|
return (Lookahead)end.cache[k].clone(); |
|
} |
|
// A cache entry exists, but it is a reference to a cyclic computation. |
|
RuleSymbol rs = (RuleSymbol)grammar.getSymbol(end.cache[k].cycle); |
|
RuleEndElement re = rs.getBlock().endNode; |
|
// The other entry may not exist because it is still being |
|
// computed when this cycle cache entry was found here. |
|
if (re.cache[k] == null) { |
|
// return the cycle...that's all we can do at the moment. |
|
return (Lookahead)end.cache[k].clone(); |
|
} |
|
else { |
|
if (DEBUG_ANALYZER) { |
|
System.out.println("combining FOLLOW(" + k + ") for " + rule + ": from "+end.cache[k].toString(",", charFormatter, grammar) + " with FOLLOW for "+((RuleBlock)re.block).getRuleName()+": "+re.cache[k].toString(",", charFormatter, grammar)); |
|
} |
|
// combine results from other rule's FOLLOW |
|
if ( re.cache[k].cycle==null ) { |
|
// current rule depends on another rule's FOLLOW and |
|
// it is complete with no cycle; just kill our cycle and |
|
// combine full result from other rule's FOLLOW |
|
end.cache[k].combineWith(re.cache[k]); |
|
end.cache[k].cycle = null; // kill cycle as we're complete |
|
} |
|
else { |
|
// the FOLLOW cache for other rule has a cycle also. |
|
// Here is where we bubble up a cycle. We better recursively |
|
// wipe out cycles (partial computations). I'm a little nervous |
|
// that we might leave a cycle here, however. |
|
Lookahead refFOLLOW = FOLLOW(k, re); |
|
end.cache[k].combineWith( refFOLLOW ); |
|
// all cycles should be gone, but if not, record ref to cycle |
|
end.cache[k].cycle = refFOLLOW.cycle; |
|
} |
|
if (DEBUG_ANALYZER) { |
|
System.out.println("saving FOLLOW(" + k + ") for " + rule + ": from "+end.cache[k].toString(",", charFormatter, grammar)); |
|
} |
|
// Return the updated cache entry associated |
|
// with the cycle reference. |
|
return (Lookahead)end.cache[k].clone(); |
|
} |
|
} |
|
|
|
end.lock[k] = true; // prevent FOLLOW computation cycles |
|
|
|
Lookahead p = new Lookahead(); |
|
|
|
RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rule); |
|
|
|
// Walk list of references to this rule to compute FOLLOW |
|
for (int i = 0; i < rs.numReferences(); i++) { |
|
RuleRefElement rr = rs.getReference(i); |
|
if (DEBUG_ANALYZER) System.out.println("next[" + rule + "] is " + rr.next.toString()); |
|
Lookahead q = rr.next.look(k); |
|
if (DEBUG_ANALYZER) System.out.println("FIRST of next[" + rule + "] ptr is " + q.toString()); |
|
/* If there is a cycle then if the cycle is to the rule for |
|
* this end block, you have a cycle to yourself. Remove the |
|
* cycle indication--the lookahead is complete. |
|
*/ |
|
if (q.cycle != null && q.cycle.equals(rule)) { |
|
q.cycle = null; // don't want cycle to yourself! |
|
} |
|
// add the lookahead into the current FOLLOW computation set |
|
p.combineWith(q); |
|
if (DEBUG_ANALYZER) System.out.println("combined FOLLOW[" + rule + "] is " + p.toString()); |
|
} |
|
|
|
end.lock[k] = false; // we're not doing FOLLOW anymore |
|
|
|
// if no rules follow this, it can be a start symbol or called by a start sym. |
|
// set the follow to be end of file. |
|
if (p.fset.nil() && p.cycle == null) { |
|
if (grammar instanceof TreeWalkerGrammar) { |
|
// Tree grammars don't see EOF, they see end of sibling list or |
|
// "NULL TREE LOOKAHEAD". |
|
p.fset.add(Token.NULL_TREE_LOOKAHEAD); |
|
} |
|
else if (grammar instanceof LexerGrammar) { |
|
// Lexical grammars use Epsilon to indicate that the end of rule has been hit |
|
// EOF would be misleading; any character can follow a token rule not just EOF |
|
// as in a grammar (where a start symbol is followed by EOF). There is no |
|
// sequence info in a lexer between tokens to indicate what is the last token |
|
// to be seen. |
|
// p.fset.add(EPSILON_TYPE); |
|
p.setEpsilon(); |
|
} |
|
else { |
|
p.fset.add(Token.EOF_TYPE); |
|
} |
|
} |
|
|
|
// Cache the result of the FOLLOW computation |
|
if (DEBUG_ANALYZER) { |
|
System.out.println("saving FOLLOW(" + k + ") for " + rule + ": " + p.toString(",", charFormatter, grammar)); |
|
} |
|
end.cache[k] = (Lookahead)p.clone(); |
|
|
|
return p; |
|
} |
|
|
|
private Lookahead getAltLookahead(AlternativeBlock blk, int alt, int k) { |
|
Lookahead p; |
|
Alternative a = blk.getAlternativeAt(alt); |
|
AlternativeElement e = a.head; |
|
//System.out.println("getAltLookahead("+k+","+e+"), cache size is "+a.cache.length); |
|
if (a.cache[k] == null) { |
|
p = e.look(k); |
|
a.cache[k] = p; |
|
} |
|
else { |
|
p = a.cache[k]; |
|
} |
|
return p; |
|
} |
|
|
|
/**Actions are ignored */ |
|
public Lookahead look(int k, ActionElement action) { |
|
if (DEBUG_ANALYZER) System.out.println("lookAction(" + k + "," + action + ")"); |
|
return action.next.look(k); |
|
} |
|
|
|
/**Combine the lookahead computed for each alternative */ |
|
public Lookahead look(int k, AlternativeBlock blk) { |
|
if (DEBUG_ANALYZER) System.out.println("lookAltBlk(" + k + "," + blk + ")"); |
|
AlternativeBlock saveCurrentBlock = currentBlock; |
|
currentBlock = blk; |
|
Lookahead p = new Lookahead(); |
|
for (int i = 0; i < blk.alternatives.size(); i++) { |
|
if (DEBUG_ANALYZER) System.out.println("alt " + i + " of " + blk); |
|
// must set analysis alt |
|
currentBlock.analysisAlt = i; |
|
Alternative alt = blk.getAlternativeAt(i); |
|
AlternativeElement elem = alt.head; |
|
if (DEBUG_ANALYZER) { |
|
if (alt.head == alt.tail) { |
|
System.out.println("alt " + i + " is empty"); |
|
} |
|
} |
|
Lookahead q = elem.look(k); |
|
p.combineWith(q); |
|
} |
|
if (k == 1 && blk.not && subruleCanBeInverted(blk, lexicalAnalysis)) { |
|
// Invert the lookahead set |
|
if (lexicalAnalysis) { |
|
BitSet b = (BitSet)((LexerGrammar)grammar).charVocabulary.clone(); |
|
int[] elems = p.fset.toArray(); |
|
for (int j = 0; j < elems.length; j++) { |
|
b.remove(elems[j]); |
|
} |
|
p.fset = b; |
|
} |
|
else { |
|
p.fset.notInPlace(Token.MIN_USER_TYPE, grammar.tokenManager.maxTokenType()); |
|
} |
|
} |
|
currentBlock = saveCurrentBlock; |
|
return p; |
|
} |
|
|
|
/**Compute what follows this place-holder node and possibly |
|
* what begins the associated loop unless the |
|
* node is locked. |
|
* <p> |
|
* if we hit the end of a loop, we have to include |
|
* what tokens can begin the loop as well. If the start |
|
* node is locked, then we simply found an empty path |
|
* through this subrule while analyzing it. If the |
|
* start node is not locked, then this node was hit |
|
* during a FOLLOW operation and the FIRST of this |
|
* block must be included in that lookahead computation. |
|
*/ |
|
public Lookahead look(int k, BlockEndElement end) { |
|
if (DEBUG_ANALYZER) System.out.println("lookBlockEnd(" + k + ", " + end.block + "); lock is " + end.lock[k]); |
|
if (end.lock[k]) { |
|
// computation in progress => the tokens we would have |
|
// computed (had we not been locked) will be included |
|
// in the set by that computation with the lock on this |
|
// node. |
|
return new Lookahead(); |
|
} |
|
|
|
Lookahead p; |
|
|
|
/* Hitting the end of a loop means you can see what begins the loop */ |
|
if (end.block instanceof ZeroOrMoreBlock || |
|
end.block instanceof OneOrMoreBlock) { |
|
// compute what can start the block, |
|
// but lock end node so we don't do it twice in same |
|
// computation. |
|
end.lock[k] = true; |
|
p = look(k, end.block); |
|
end.lock[k] = false; |
|
} |
|
else { |
|
p = new Lookahead(); |
|
} |
|
|
|
/* Tree blocks do not have any follow because they are children |
|
* of what surrounds them. For example, A #(B C) D results in |
|
* a look() for the TreeElement end of NULL_TREE_LOOKAHEAD, which |
|
* indicates that nothing can follow the last node of tree #(B C) |
|
*/ |
|
if (end.block instanceof TreeElement) { |
|
p.combineWith(Lookahead.of(Token.NULL_TREE_LOOKAHEAD)); |
|
} |
|
|
|
/* Syntactic predicates such as ( (A)? )=> have no follow per se. |
|
* We cannot accurately say what would be matched following a |
|
* syntactic predicate (you MIGHT be ok if you said it was whatever |
|
* followed the alternative predicted by the predicate). Hence, |
|
* (like end-of-token) we return Epsilon to indicate "unknown |
|
* lookahead." |
|
*/ |
|
else if (end.block instanceof SynPredBlock) { |
|
p.setEpsilon(); |
|
} |
|
|
|
// compute what can follow the block |
|
else { |
|
Lookahead q = end.block.next.look(k); |
|
p.combineWith(q); |
|
} |
|
|
|
return p; |
|
} |
|
|
|
/**Return this char as the lookahead if k=1. |
|
* <p>### Doesn't work for ( 'a' 'b' | 'a' ~'b' ) yet!!! |
|
* <p> |
|
* If the atom has the <tt>not</tt> flag on, then |
|
* create the set complement of the tokenType |
|
* which is the set of all characters referenced |
|
* in the grammar with this char turned off. |
|
* Also remove characters from the set that |
|
* are currently allocated for predicting |
|
* previous alternatives. This avoids ambiguity |
|
* messages and is more properly what is meant. |
|
* ( 'a' | ~'a' ) implies that the ~'a' is the |
|
* "else" clause. |
|
* <p> |
|
* NOTE: we do <b>NOT</b> include exit path in |
|
* the exclusion set. E.g., |
|
* ( 'a' | ~'a' )* 'b' |
|
* should exit upon seeing a 'b' during the loop. |
|
*/ |
|
public Lookahead look(int k, CharLiteralElement atom) { |
|
if (DEBUG_ANALYZER) System.out.println("lookCharLiteral(" + k + "," + atom + ")"); |
|
// Skip until analysis hits k==1 |
|
if (k > 1) { |
|
return atom.next.look(k - 1); |
|
} |
|
if (lexicalAnalysis) { |
|
if (atom.not) { |
|
BitSet b = (BitSet)((LexerGrammar)grammar).charVocabulary.clone(); |
|
if (DEBUG_ANALYZER) System.out.println("charVocab is " + b.toString()); |
|
// remove stuff predicted by preceding alts and follow of block |
|
removeCompetingPredictionSets(b, atom); |
|
if (DEBUG_ANALYZER) System.out.println("charVocab after removal of prior alt lookahead " + b.toString()); |
|
// now remove element that is stated not to be in the set |
|
b.clear(atom.getType()); |
|
return new Lookahead(b); |
|
} |
|
else { |
|
return Lookahead.of(atom.getType()); |
|
} |
|
} |
|
else { |
|
// Should have been avoided by MakeGrammar |
|
tool.panic("Character literal reference found in parser"); |
|
// ... so we make the compiler happy |
|
return Lookahead.of(atom.getType()); |
|
} |
|
} |
|
|
|
public Lookahead look(int k, CharRangeElement r) { |
|
if (DEBUG_ANALYZER) System.out.println("lookCharRange(" + k + "," + r + ")"); |
|
// Skip until analysis hits k==1 |
|
if (k > 1) { |
|
return r.next.look(k - 1); |
|
} |
|
BitSet p = BitSet.of(r.begin); |
|
for (int i = r.begin + 1; i <= r.end; i++) { |
|
p.add(i); |
|
} |
|
return new Lookahead(p); |
|
} |
|
|
|
public Lookahead look(int k, GrammarAtom atom) { |
|
if (DEBUG_ANALYZER) System.out.println("look(" + k + "," + atom + "[" + atom.getType() + "])"); |
|
|
|
if (lexicalAnalysis) { |
|
// MakeGrammar should have created a rule reference instead |
|
tool.panic("token reference found in lexer"); |
|
} |
|
// Skip until analysis hits k==1 |
|
if (k > 1) { |
|
return atom.next.look(k - 1); |
|
} |
|
Lookahead l = Lookahead.of(atom.getType()); |
|
if (atom.not) { |
|
// Invert the lookahead set against the token vocabulary |
|
int maxToken = grammar.tokenManager.maxTokenType(); |
|
l.fset.notInPlace(Token.MIN_USER_TYPE, maxToken); |
|
// remove stuff predicted by preceding alts and follow of block |
|
removeCompetingPredictionSets(l.fset, atom); |
|
} |
|
return l; |
|
} |
|
|
|
/**The lookahead of a (...)+ block is the combined lookahead of |
|
* all alternatives and, if an empty path is found, the lookahead |
|
* of what follows the block. |
|
*/ |
|
public Lookahead look(int k, OneOrMoreBlock blk) { |
|
if (DEBUG_ANALYZER) System.out.println("look+" + k + "," + blk + ")"); |
|
Lookahead p = look(k, (AlternativeBlock)blk); |
|
return p; |
|
} |
|
|
|
/**Combine the lookahead computed for each alternative. |
|
* Lock the node so that no other computation may come back |
|
* on itself--infinite loop. This also implies infinite left-recursion |
|
* in the grammar (or an error in this algorithm ;)). |
|
*/ |
|
public Lookahead look(int k, RuleBlock blk) { |
|
if (DEBUG_ANALYZER) System.out.println("lookRuleBlk(" + k + "," + blk + ")"); |
|
Lookahead p = look(k, (AlternativeBlock)blk); |
|
return p; |
|
} |
|
|
|
/**If not locked or noFOLLOW set, compute FOLLOW of a rule. |
|
* <p> |
|
* TJP says 8/12/99: not true anymore: |
|
* Lexical rules never compute follow. They set epsilon and |
|
* the code generator gens code to check for any character. |
|
* The code generator must remove the tokens used to predict |
|
* any previous alts in the same block. |
|
* <p> |
|
* When the last node of a rule is reached and noFOLLOW, |
|
* it implies that a "local" FOLLOW will be computed |
|
* after this call. I.e., |
|
* <pre> |
|
* a : b A; |
|
* b : B | ; |
|
* c : b C; |
|
* </pre> |
|
* Here, when computing the look of rule b from rule a, |
|
* we want only {B,EPSILON_TYPE} so that look(b A) will |
|
* be {B,A} not {B,A,C}. |
|
* <p> |
|
* if the end block is not locked and the FOLLOW is |
|
* wanted, the algorithm must compute the lookahead |
|
* of what follows references to this rule. If |
|
* end block is locked, FOLLOW will return an empty set |
|
* with a cycle to the rule associated with this end block. |
|
*/ |
|
public Lookahead look(int k, RuleEndElement end) { |
|
if (DEBUG_ANALYZER) |
|
System.out.println("lookRuleBlockEnd(" + k + "); noFOLLOW=" + |
|
end.noFOLLOW + "; lock is " + end.lock[k]); |
|
if (/*lexicalAnalysis ||*/ end.noFOLLOW) { |
|
Lookahead p = new Lookahead(); |
|
p.setEpsilon(); |
|
p.epsilonDepth = BitSet.of(k); |
|
return p; |
|
} |
|
Lookahead p = FOLLOW(k, end); |
|
return p; |
|
} |
|
|
|
/**Compute the lookahead contributed by a rule reference. |
|
* |
|
* <p> |
|
* When computing ruleref lookahead, we don't want the FOLLOW |
|
* computation done if an empty path exists for the rule. |
|
* The FOLLOW is too loose of a set...we want only to |
|
* include the "local" FOLLOW or what can follow this |
|
* particular ref to the node. In other words, we use |
|
* context information to reduce the complexity of the |
|
* analysis and strengthen the parser. |
|
* |
|
* The noFOLLOW flag is used as a means of restricting |
|
* the FOLLOW to a "local" FOLLOW. This variable is |
|
* orthogonal to the <tt>lock</tt> variable that prevents |
|
* infinite recursion. noFOLLOW does not care about what k is. |
|
*/ |
|
public Lookahead look(int k, RuleRefElement rr) { |
|
if (DEBUG_ANALYZER) System.out.println("lookRuleRef(" + k + "," + rr + ")"); |
|
RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rr.targetRule); |
|
if (rs == null || !rs.defined) { |
|
tool.error("no definition of rule " + rr.targetRule, grammar.getFilename(), rr.getLine(), rr.getColumn()); |
|
return new Lookahead(); |
|
} |
|
RuleBlock rb = rs.getBlock(); |
|
RuleEndElement end = rb.endNode; |
|
boolean saveEnd = end.noFOLLOW; |
|
end.noFOLLOW = true; |
|
// go off to the rule and get the lookahead (w/o FOLLOW) |
|
Lookahead p = look(k, rr.targetRule); |
|
if (DEBUG_ANALYZER) System.out.println("back from rule ref to " + rr.targetRule); |
|
// restore state of end block |
|
end.noFOLLOW = saveEnd; |
|
|
|
// check for infinite recursion. If a cycle is returned: trouble! |
|
if (p.cycle != null) { |
|
tool.error("infinite recursion to rule " + p.cycle + " from rule " + |
|
rr.enclosingRuleName, grammar.getFilename(), rr.getLine(), rr.getColumn()); |
|
} |
|
|
|
// is the local FOLLOW required? |
|
if (p.containsEpsilon()) { |
|
if (DEBUG_ANALYZER) |
|
System.out.println("rule ref to " + |
|
rr.targetRule + " has eps, depth: " + p.epsilonDepth); |
|
|
|
// remove epsilon |
|
p.resetEpsilon(); |
|
// fset.clear(EPSILON_TYPE); |
|
|
|
// for each lookahead depth that saw epsilon |
|
int[] depths = p.epsilonDepth.toArray(); |
|
p.epsilonDepth = null; // clear all epsilon stuff |
|
for (int i = 0; i < depths.length; i++) { |
|
int rk = k - (k - depths[i]); |
|
Lookahead q = rr.next.look(rk); // see comments in Lookahead |
|
p.combineWith(q); |
|
} |
|
// note: any of these look() computations for local follow can |
|
// set EPSILON in the set again if the end of this rule is found. |
|
} |
|
|
|
return p; |
|
} |
|
|
|
public Lookahead look(int k, StringLiteralElement atom) { |
|
if (DEBUG_ANALYZER) System.out.println("lookStringLiteral(" + k + "," + atom + ")"); |
|
if (lexicalAnalysis) { |
|
// need more lookahead than string can provide? |
|
if (k > atom.processedAtomText.length()) { |
|
return atom.next.look(k - atom.processedAtomText.length()); |
|
} |
|
else { |
|
// get char at lookahead depth k, from the processed literal text |
|
return Lookahead.of(atom.processedAtomText.charAt(k - 1)); |
|
} |
|
} |
|
else { |
|
// Skip until analysis hits k==1 |
|
if (k > 1) { |
|
return atom.next.look(k - 1); |
|
} |
|
Lookahead l = Lookahead.of(atom.getType()); |
|
if (atom.not) { |
|
// Invert the lookahead set against the token vocabulary |
|
int maxToken = grammar.tokenManager.maxTokenType(); |
|
l.fset.notInPlace(Token.MIN_USER_TYPE, maxToken); |
|
} |
|
return l; |
|
} |
|
} |
|
|
|
/**The lookahead of a (...)=> block is the lookahead of |
|
* what follows the block. By definition, the syntactic |
|
* predicate block defies static analysis (you want to try it |
|
* out at run-time). The LOOK of (a)=>A B is A for LL(1) |
|
* ### is this even called? |
|
*/ |
|
public Lookahead look(int k, SynPredBlock blk) { |
|
if (DEBUG_ANALYZER) System.out.println("look=>(" + k + "," + blk + ")"); |
|
return blk.next.look(k); |
|
} |
|
|
|
public Lookahead look(int k, TokenRangeElement r) { |
|
if (DEBUG_ANALYZER) System.out.println("lookTokenRange(" + k + "," + r + ")"); |
|
// Skip until analysis hits k==1 |
|
if (k > 1) { |
|
return r.next.look(k - 1); |
|
} |
|
BitSet p = BitSet.of(r.begin); |
|
for (int i = r.begin + 1; i <= r.end; i++) { |
|
p.add(i); |
|
} |
|
return new Lookahead(p); |
|
} |
|
|
|
public Lookahead look(int k, TreeElement t) { |
|
if (DEBUG_ANALYZER) |
|
System.out.println("look(" + k + "," + t.root + "[" + t.root.getType() + "])"); |
|
if (k > 1) { |
|
return t.next.look(k - 1); |
|
} |
|
Lookahead l = null; |
|
if (t.root instanceof WildcardElement) { |
|
l = t.root.look(1); // compute FIRST set minus previous rows |
|
} |
|
else { |
|
l = Lookahead.of(t.root.getType()); |
|
if (t.root.not) { |
|
// Invert the lookahead set against the token vocabulary |
|
int maxToken = grammar.tokenManager.maxTokenType(); |
|
l.fset.notInPlace(Token.MIN_USER_TYPE, maxToken); |
|
} |
|
} |
|
return l; |
|
} |
|
|
|
public Lookahead look(int k, WildcardElement wc) { |
|
if (DEBUG_ANALYZER) System.out.println("look(" + k + "," + wc + ")"); |
|
|
|
// Skip until analysis hits k==1 |
|
if (k > 1) { |
|
return wc.next.look(k - 1); |
|
} |
|
|
|
BitSet b; |
|
if (lexicalAnalysis) { |
|
// Copy the character vocabulary |
|
b = (BitSet)((LexerGrammar)grammar).charVocabulary.clone(); |
|
} |
|
else { |
|
b = new BitSet(1); |
|
// Invert the lookahead set against the token vocabulary |
|
int maxToken = grammar.tokenManager.maxTokenType(); |
|
b.notInPlace(Token.MIN_USER_TYPE, maxToken); |
|
if (DEBUG_ANALYZER) System.out.println("look(" + k + "," + wc + ") after not: " + b); |
|
} |
|
|
|
// Remove prediction sets from competing alternatives |
|
// removeCompetingPredictionSets(b, wc); |
|
|
|
return new Lookahead(b); |
|
} |
|
|
|
/** The (...)* element is the combined lookahead of the alternatives and what can |
|
* follow the loop. |
|
*/ |
|
public Lookahead look(int k, ZeroOrMoreBlock blk) { |
|
if (DEBUG_ANALYZER) System.out.println("look*(" + k + "," + blk + ")"); |
|
Lookahead p = look(k, (AlternativeBlock)blk); |
|
Lookahead q = blk.next.look(k); |
|
p.combineWith(q); |
|
return p; |
|
} |
|
|
|
/**Compute the combined lookahead for all productions of a rule. |
|
* If the lookahead returns with epsilon, at least one epsilon |
|
* path exists (one that consumes no tokens). The noFOLLOW |
|
* flag being set for this endruleblk, indicates that the |
|
* a rule ref invoked this rule. |
|
* |
|
* Currently only look(RuleRef) calls this. There is no need |
|
* for the code generator to call this. |
|
*/ |
|
public Lookahead look(int k, String rule) { |
|
if (DEBUG_ANALYZER) System.out.println("lookRuleName(" + k + "," + rule + ")"); |
|
RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rule); |
|
RuleBlock rb = rs.getBlock(); |
|
|
|
if (rb.lock[k]) { |
|
if (DEBUG_ANALYZER) |
|
System.out.println("infinite recursion to rule " + rb.getRuleName()); |
|
return new Lookahead(rule); |
|
} |
|
|
|
// have we computed it before? |
|
if (rb.cache[k] != null) { |
|
if (DEBUG_ANALYZER) { |
|
System.out.println("found depth " + k + " result in FIRST " + rule + " cache: " + |
|
rb.cache[k].toString(",", charFormatter, grammar)); |
|
} |
|
return (Lookahead)rb.cache[k].clone(); |
|
} |
|
|
|
rb.lock[k] = true; |
|
Lookahead p = look(k, (RuleBlock)rb); |
|
rb.lock[k] = false; |
|
|
|
// cache results |
|
rb.cache[k] = (Lookahead)p.clone(); |
|
if (DEBUG_ANALYZER) { |
|
System.out.println("saving depth " + k + " result in FIRST " + rule + " cache: " + |
|
rb.cache[k].toString(",", charFormatter, grammar)); |
|
} |
|
return p; |
|
} |
|
|
|
/** If the first k-1 sets are singleton sets, the appoximate |
|
* lookahead analysis is equivalent to full lookahead analysis. |
|
*/ |
|
public static boolean lookaheadEquivForApproxAndFullAnalysis(Lookahead[] bset, int k) { |
|
// first k-1 sets degree 1? |
|
for (int i = 1; i <= k - 1; i++) { |
|
BitSet look = bset[i].fset; |
|
if (look.degree() > 1) { |
|
return false; |
|
} |
|
} |
|
return true; |
|
} |
|
|
|
/** Remove the prediction sets from preceding alternatives |
|
* and follow set, but *only* if this element is the first element |
|
* of the alternative. The class members currenBlock and |
|
* currentBlock.analysisAlt must be set correctly. |
|
* @param b The prediction bitset to be modified |
|
* @el The element of interest |
|
*/ |
|
private void removeCompetingPredictionSets(BitSet b, AlternativeElement el) { |
|
// Only do this if the element is the first element of the alt, |
|
// because we are making an implicit assumption that k==1. |
|
GrammarElement head = currentBlock.getAlternativeAt(currentBlock.analysisAlt).head; |
|
// if element is #(. blah) then check to see if el is root |
|
if (head instanceof TreeElement) { |
|
if (((TreeElement)head).root != el) { |
|
return; |
|
} |
|
} |
|
else if (el != head) { |
|
return; |
|
} |
|
for (int i = 0; i < currentBlock.analysisAlt; i++) { |
|
AlternativeElement e = currentBlock.getAlternativeAt(i).head; |
|
b.subtractInPlace(e.look(1).fset); |
|
} |
|
} |
|
|
|
/** Remove the prediction sets from preceding alternatives |
|
* The class members currenBlock must be set correctly. |
|
* Remove prediction sets from 1..k. |
|
* @param look The prediction lookahead to be modified |
|
* @el The element of interest |
|
* @k How deep into lookahead to modify |
|
*/ |
|
private void removeCompetingPredictionSetsFromWildcard(Lookahead[] look, AlternativeElement el, int k) { |
|
for (int d = 1; d <= k; d++) { |
|
for (int i = 0; i < currentBlock.analysisAlt; i++) { |
|
AlternativeElement e = currentBlock.getAlternativeAt(i).head; |
|
look[d].fset.subtractInPlace(e.look(d).fset); |
|
} |
|
} |
|
} |
|
|
|
/** reset the analyzer so it looks like a new one */ |
|
private void reset() { |
|
grammar = null; |
|
DEBUG_ANALYZER = false; |
|
currentBlock = null; |
|
lexicalAnalysis = false; |
|
} |
|
|
|
/** Set the grammar for the analyzer */ |
|
public void setGrammar(Grammar g) { |
|
if (grammar != null) { |
|
reset(); |
|
} |
|
grammar = g; |
|
|
|
// Is this lexical? |
|
lexicalAnalysis = (grammar instanceof LexerGrammar); |
|
DEBUG_ANALYZER = grammar.analyzerDebug; |
|
} |
|
|
|
public boolean subruleCanBeInverted(AlternativeBlock blk, boolean forLexer) { |
|
if ( |
|
blk instanceof ZeroOrMoreBlock || |
|
blk instanceof OneOrMoreBlock || |
|
blk instanceof SynPredBlock |
|
) { |
|
return false; |
|
} |
|
// Cannot invert an empty subrule |
|
if (blk.alternatives.size() == 0) { |
|
return false; |
|
} |
|
// The block must only contain alternatives with a single element, |
|
// where each element is a char, token, char range, or token range. |
|
for (int i = 0; i < blk.alternatives.size(); i++) { |
|
Alternative alt = blk.getAlternativeAt(i); |
|
// Cannot have anything interesting in the alternative ... |
|
if (alt.synPred != null || alt.semPred != null || alt.exceptionSpec != null) { |
|
return false; |
|
} |
|
// ... and there must be one simple element |
|
AlternativeElement elt = alt.head; |
|
if ( |
|
!( |
|
elt instanceof CharLiteralElement || |
|
elt instanceof TokenRefElement || |
|
elt instanceof CharRangeElement || |
|
elt instanceof TokenRangeElement || |
|
(elt instanceof StringLiteralElement && !forLexer) |
|
) || |
|
!(elt.next instanceof BlockEndElement) || |
|
elt.getAutoGenType() != GrammarElement.AUTO_GEN_NONE |
|
) { |
|
return false; |
|
} |
|
} |
|
return true; |
|
} |
|
}
|
|
|