You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1096 lines
46 KiB
1096 lines
46 KiB
5 years ago
|
package com.fr.third.antlr;
|
||
|
|
||
|
/* ANTLR Translator Generator
|
||
|
* Project led by Terence Parr at http://www.cs.usfca.edu
|
||
|
* Software rights: http://www.antlr.org/license.html
|
||
|
*
|
||
|
* $Id: //depot/code/org.antlr/release/antlr-2.7.7/antlr/LLkAnalyzer.java#2 $
|
||
|
*/
|
||
|
|
||
|
import com.fr.third.antlr.collections.impl.BitSet;
|
||
|
import com.fr.third.antlr.collections.impl.Vector;
|
||
|
|
||
|
/**A linear-approximate LL(k) grammar analzyer.
|
||
|
*
|
||
|
* All lookahead elements are sets of token types.
|
||
|
*
|
||
|
* @author Terence Parr, John Lilley
|
||
|
* @see com.fr.third.antlr.Grammar
|
||
|
* @see com.fr.third.antlr.Lookahead
|
||
|
*/
|
||
|
public class LLkAnalyzer implements LLkGrammarAnalyzer {
|
||
|
// Set "analyzerDebug" to true
|
||
|
public boolean DEBUG_ANALYZER = false;
|
||
|
private AlternativeBlock currentBlock;
|
||
|
protected Tool tool = null;
|
||
|
protected Grammar grammar = null;
|
||
|
// True if analyzing a lexical grammar
|
||
|
protected boolean lexicalAnalysis = false;
|
||
|
// Used for formatting bit sets in default (Java) format
|
||
|
CharFormatter charFormatter = new JavaCharFormatter();
|
||
|
|
||
|
/** Create an LLk analyzer */
|
||
|
public LLkAnalyzer(Tool tool_) {
|
||
|
tool = tool_;
|
||
|
}
|
||
|
|
||
|
/** Return true if someone used the '.' wildcard default idiom.
|
||
|
* Either #(. children) or '.' as an alt by itself.
|
||
|
*/
|
||
|
protected boolean altUsesWildcardDefault(Alternative alt) {
|
||
|
AlternativeElement head = alt.head;
|
||
|
// if element is #(. blah) then check to see if el is root
|
||
|
if (head instanceof TreeElement &&
|
||
|
((TreeElement)head).root instanceof WildcardElement) {
|
||
|
return true;
|
||
|
}
|
||
|
if (head instanceof WildcardElement && head.next instanceof BlockEndElement) {
|
||
|
return true;
|
||
|
}
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
/**Is this block of alternatives LL(k)? Fill in alternative cache for this block.
|
||
|
* @return true if the block is deterministic
|
||
|
*/
|
||
|
public boolean deterministic(AlternativeBlock blk) {
|
||
|
/** The lookahead depth for this decision */
|
||
|
int k = 1; // start at k=1
|
||
|
if (DEBUG_ANALYZER) System.out.println("deterministic(" + blk + ")");
|
||
|
boolean det = true;
|
||
|
int nalts = blk.alternatives.size();
|
||
|
AlternativeBlock saveCurrentBlock = currentBlock;
|
||
|
Alternative wildcardAlt = null;
|
||
|
currentBlock = blk;
|
||
|
|
||
|
/* don't allow nongreedy (...) blocks */
|
||
|
if (blk.greedy == false && !(blk instanceof OneOrMoreBlock) && !(blk instanceof ZeroOrMoreBlock)) {
|
||
|
tool.warning("Being nongreedy only makes sense for (...)+ and (...)*", grammar.getFilename(), blk.getLine(), blk.getColumn());
|
||
|
}
|
||
|
|
||
|
// SPECIAL CASE: only one alternative. We don't need to check the
|
||
|
// determinism, but other code expects the lookahead cache to be
|
||
|
// set for the single alt.
|
||
|
if (nalts == 1) {
|
||
|
AlternativeElement e = blk.getAlternativeAt(0).head;
|
||
|
currentBlock.alti = 0;
|
||
|
blk.getAlternativeAt(0).cache[1] = e.look(1);
|
||
|
blk.getAlternativeAt(0).lookaheadDepth = 1; // set lookahead to LL(1)
|
||
|
currentBlock = saveCurrentBlock;
|
||
|
return true; // always deterministic for one alt
|
||
|
}
|
||
|
|
||
|
// outer:
|
||
|
for (int i = 0; i < nalts - 1; i++) {
|
||
|
currentBlock.alti = i;
|
||
|
currentBlock.analysisAlt = i; // which alt are we analyzing?
|
||
|
currentBlock.altj = i + 1; // reset this alt. Haven't computed yet,
|
||
|
// but we need the alt number.
|
||
|
// inner:
|
||
|
// compare against other alternatives with lookahead depth k
|
||
|
for (int j = i + 1; j < nalts; j++) {
|
||
|
currentBlock.altj = j;
|
||
|
if (DEBUG_ANALYZER) System.out.println("comparing " + i + " against alt " + j);
|
||
|
currentBlock.analysisAlt = j; // which alt are we analyzing?
|
||
|
k = 1; // always attempt minimum lookahead possible.
|
||
|
|
||
|
// check to see if there is a lookahead depth that distinguishes
|
||
|
// between alternatives i and j.
|
||
|
Lookahead[] r = new Lookahead[grammar.maxk + 1];
|
||
|
boolean haveAmbiguity;
|
||
|
do {
|
||
|
haveAmbiguity = false;
|
||
|
if (DEBUG_ANALYZER) System.out.println("checking depth " + k + "<=" + grammar.maxk);
|
||
|
Lookahead p,q;
|
||
|
p = getAltLookahead(blk, i, k);
|
||
|
q = getAltLookahead(blk, j, k);
|
||
|
|
||
|
// compare LOOK(alt i) with LOOK(alt j). Is there an intersection?
|
||
|
// Lookahead must be disjoint.
|
||
|
if (DEBUG_ANALYZER) System.out.println("p is " + p.toString(",", charFormatter, grammar));
|
||
|
if (DEBUG_ANALYZER) System.out.println("q is " + q.toString(",", charFormatter, grammar));
|
||
|
// r[i] = p.fset.and(q.fset);
|
||
|
r[k] = p.intersection(q);
|
||
|
if (DEBUG_ANALYZER) System.out.println("intersection at depth " + k + " is " + r[k].toString());
|
||
|
if (!r[k].nil()) {
|
||
|
haveAmbiguity = true;
|
||
|
k++;
|
||
|
}
|
||
|
// go until no more lookahead to use or no intersection
|
||
|
} while (haveAmbiguity && k <= grammar.maxk);
|
||
|
|
||
|
Alternative ai = blk.getAlternativeAt(i);
|
||
|
Alternative aj = blk.getAlternativeAt(j);
|
||
|
if (haveAmbiguity) {
|
||
|
det = false;
|
||
|
ai.lookaheadDepth = NONDETERMINISTIC;
|
||
|
aj.lookaheadDepth = NONDETERMINISTIC;
|
||
|
|
||
|
/* if ith alt starts with a syntactic predicate, computing the
|
||
|
* lookahead is still done for code generation, but messages
|
||
|
* should not be generated when comparing against alt j.
|
||
|
* Alternatives with syn preds that are unnecessary do
|
||
|
* not result in syn pred try-blocks.
|
||
|
*/
|
||
|
if (ai.synPred != null) {
|
||
|
if (DEBUG_ANALYZER) {
|
||
|
System.out.println("alt " + i + " has a syn pred");
|
||
|
}
|
||
|
// The alt with the (...)=> block is nondeterministic for sure.
|
||
|
// If the (...)=> conflicts with alt j, j is nondeterministic.
|
||
|
// This prevents alt j from being in any switch statements.
|
||
|
// move on to next alternative=>no possible ambiguity!
|
||
|
// continue inner;
|
||
|
}
|
||
|
|
||
|
/* if ith alt starts with a semantic predicate, computing the
|
||
|
* lookahead is still done for code generation, but messages
|
||
|
* should not be generated when comparing against alt j.
|
||
|
*/
|
||
|
else if (ai.semPred != null) {
|
||
|
if (DEBUG_ANALYZER) {
|
||
|
System.out.println("alt " + i + " has a sem pred");
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* if jth alt is exactly the wildcard or wildcard root of tree,
|
||
|
* then remove elements from alt i lookahead from alt j's lookahead.
|
||
|
* Don't do an ambiguity warning.
|
||
|
*/
|
||
|
else if (altUsesWildcardDefault(aj)) {
|
||
|
// System.out.println("removing pred sets");
|
||
|
// removeCompetingPredictionSetsFromWildcard(aj.cache, aj.head, grammar.maxk);
|
||
|
wildcardAlt = aj;
|
||
|
}
|
||
|
|
||
|
/* If the user specified warnWhenFollowAmbig=false, then we
|
||
|
* can turn off this warning IFF one of the alts is empty;
|
||
|
* that is, it points immediately at the end block.
|
||
|
*/
|
||
|
else if (!blk.warnWhenFollowAmbig &&
|
||
|
(ai.head instanceof BlockEndElement ||
|
||
|
aj.head instanceof BlockEndElement)) {
|
||
|
// System.out.println("ai.head pts to "+ai.head.getClass());
|
||
|
// System.out.println("aj.head pts to "+aj.head.getClass());
|
||
|
}
|
||
|
|
||
|
/* If they have the generateAmbigWarnings option off for the block
|
||
|
* then don't generate a warning.
|
||
|
*/
|
||
|
else if (!blk.generateAmbigWarnings) {
|
||
|
}
|
||
|
|
||
|
/* If greedy=true and *one* empty alt shut off warning. */
|
||
|
else if (blk.greedySet && blk.greedy &&
|
||
|
((ai.head instanceof BlockEndElement &&
|
||
|
!(aj.head instanceof BlockEndElement)) ||
|
||
|
(aj.head instanceof BlockEndElement &&
|
||
|
!(ai.head instanceof BlockEndElement)))) {
|
||
|
// System.out.println("greedy set to true; one alt empty");
|
||
|
}
|
||
|
|
||
|
|
||
|
/* We have no choice, but to report a nondetermism */
|
||
|
else {
|
||
|
tool.errorHandler.warnAltAmbiguity(
|
||
|
grammar,
|
||
|
blk, // the block
|
||
|
lexicalAnalysis, // true if lexical
|
||
|
grammar.maxk, // depth of ambiguity
|
||
|
r, // set of linear ambiguities
|
||
|
i, // first ambiguous alternative
|
||
|
j // second ambiguous alternative
|
||
|
);
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
// a lookahead depth, k, was found where i and j do not conflict
|
||
|
ai.lookaheadDepth = Math.max(ai.lookaheadDepth, k);
|
||
|
aj.lookaheadDepth = Math.max(aj.lookaheadDepth, k);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// finished with block.
|
||
|
|
||
|
// If had wildcard default clause idiom, remove competing lookahead
|
||
|
/*
|
||
|
if ( wildcardAlt!=null ) {
|
||
|
removeCompetingPredictionSetsFromWildcard(wildcardAlt.cache, wildcardAlt.head, grammar.maxk);
|
||
|
}
|
||
|
*/
|
||
|
|
||
|
currentBlock = saveCurrentBlock;
|
||
|
return det;
|
||
|
}
|
||
|
|
||
|
/**Is (...)+ block LL(1)? Fill in alternative cache for this block.
|
||
|
* @return true if the block is deterministic
|
||
|
*/
|
||
|
public boolean deterministic(OneOrMoreBlock blk) {
|
||
|
if (DEBUG_ANALYZER) System.out.println("deterministic(...)+(" + blk + ")");
|
||
|
AlternativeBlock saveCurrentBlock = currentBlock;
|
||
|
currentBlock = blk;
|
||
|
boolean blkOk = deterministic((AlternativeBlock)blk);
|
||
|
// block has been checked, now check that what follows does not conflict
|
||
|
// with the lookahead of the (...)+ block.
|
||
|
boolean det = deterministicImpliedPath(blk);
|
||
|
currentBlock = saveCurrentBlock;
|
||
|
return det && blkOk;
|
||
|
}
|
||
|
|
||
|
/**Is (...)* block LL(1)? Fill in alternative cache for this block.
|
||
|
* @return true if the block is deterministic
|
||
|
*/
|
||
|
public boolean deterministic(ZeroOrMoreBlock blk) {
|
||
|
if (DEBUG_ANALYZER) System.out.println("deterministic(...)*(" + blk + ")");
|
||
|
AlternativeBlock saveCurrentBlock = currentBlock;
|
||
|
currentBlock = blk;
|
||
|
boolean blkOk = deterministic((AlternativeBlock)blk);
|
||
|
// block has been checked, now check that what follows does not conflict
|
||
|
// with the lookahead of the (...)* block.
|
||
|
boolean det = deterministicImpliedPath(blk);
|
||
|
currentBlock = saveCurrentBlock;
|
||
|
return det && blkOk;
|
||
|
}
|
||
|
|
||
|
/**Is this (...)* or (...)+ block LL(k)?
|
||
|
* @return true if the block is deterministic
|
||
|
*/
|
||
|
public boolean deterministicImpliedPath(BlockWithImpliedExitPath blk) {
|
||
|
/** The lookahead depth for this decision considering implied exit path */
|
||
|
int k;
|
||
|
boolean det = true;
|
||
|
Vector alts = blk.getAlternatives();
|
||
|
int nalts = alts.size();
|
||
|
currentBlock.altj = -1; // comparing against implicit optional/exit alt
|
||
|
|
||
|
if (DEBUG_ANALYZER) System.out.println("deterministicImpliedPath");
|
||
|
for (int i = 0; i < nalts; i++) { // check follow against all alts
|
||
|
Alternative alt = blk.getAlternativeAt(i);
|
||
|
|
||
|
if (alt.head instanceof BlockEndElement) {
|
||
|
tool.warning("empty alternative makes no sense in (...)* or (...)+", grammar.getFilename(), blk.getLine(), blk.getColumn());
|
||
|
}
|
||
|
|
||
|
k = 1; // assume eac alt is LL(1) with exit branch
|
||
|
// check to see if there is a lookahead depth that distinguishes
|
||
|
// between alternative i and the exit branch.
|
||
|
Lookahead[] r = new Lookahead[grammar.maxk + 1];
|
||
|
boolean haveAmbiguity;
|
||
|
do {
|
||
|
haveAmbiguity = false;
|
||
|
if (DEBUG_ANALYZER) System.out.println("checking depth " + k + "<=" + grammar.maxk);
|
||
|
Lookahead p;
|
||
|
Lookahead follow = blk.next.look(k);
|
||
|
blk.exitCache[k] = follow;
|
||
|
currentBlock.alti = i;
|
||
|
p = getAltLookahead(blk, i, k);
|
||
|
|
||
|
if (DEBUG_ANALYZER) System.out.println("follow is " + follow.toString(",", charFormatter, grammar));
|
||
|
if (DEBUG_ANALYZER) System.out.println("p is " + p.toString(",", charFormatter, grammar));
|
||
|
//r[k] = follow.fset.and(p.fset);
|
||
|
r[k] = follow.intersection(p);
|
||
|
if (DEBUG_ANALYZER) System.out.println("intersection at depth " + k + " is " + r[k]);
|
||
|
if (!r[k].nil()) {
|
||
|
haveAmbiguity = true;
|
||
|
k++;
|
||
|
}
|
||
|
// go until no more lookahead to use or no intersection
|
||
|
} while (haveAmbiguity && k <= grammar.maxk);
|
||
|
|
||
|
if (haveAmbiguity) {
|
||
|
det = false;
|
||
|
alt.lookaheadDepth = NONDETERMINISTIC;
|
||
|
blk.exitLookaheadDepth = NONDETERMINISTIC;
|
||
|
Alternative ambigAlt = blk.getAlternativeAt(currentBlock.alti);
|
||
|
|
||
|
/* If the user specified warnWhenFollowAmbig=false, then we
|
||
|
* can turn off this warning.
|
||
|
*/
|
||
|
if (!blk.warnWhenFollowAmbig) {
|
||
|
}
|
||
|
|
||
|
/* If they have the generateAmbigWarnings option off for the block
|
||
|
* then don't generate a warning.
|
||
|
*/
|
||
|
else if (!blk.generateAmbigWarnings) {
|
||
|
}
|
||
|
|
||
|
/* If greedy=true and alt not empty, shut off warning */
|
||
|
else if (blk.greedy == true && blk.greedySet &&
|
||
|
!(ambigAlt.head instanceof BlockEndElement)) {
|
||
|
if (DEBUG_ANALYZER) System.out.println("greedy loop");
|
||
|
}
|
||
|
|
||
|
/* If greedy=false then shut off warning...will have
|
||
|
* to add "if FOLLOW break"
|
||
|
* block during code gen to compensate for removal of warning.
|
||
|
*/
|
||
|
else if (blk.greedy == false &&
|
||
|
!(ambigAlt.head instanceof BlockEndElement)) {
|
||
|
if (DEBUG_ANALYZER) System.out.println("nongreedy loop");
|
||
|
// if FOLLOW not single k-string (|set[k]| can
|
||
|
// be > 1 actually) then must warn them that
|
||
|
// loop may terminate incorrectly.
|
||
|
// For example, ('a'..'d')+ ("ad"|"cb")
|
||
|
if (!lookaheadEquivForApproxAndFullAnalysis(blk.exitCache, grammar.maxk)) {
|
||
|
tool.warning(new String[]{
|
||
|
"nongreedy block may exit incorrectly due",
|
||
|
"\tto limitations of linear approximate lookahead (first k-1 sets",
|
||
|
"\tin lookahead not singleton)."},
|
||
|
grammar.getFilename(), blk.getLine(), blk.getColumn());
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// no choice but to generate a warning
|
||
|
else {
|
||
|
tool.errorHandler.warnAltExitAmbiguity(
|
||
|
grammar,
|
||
|
blk, // the block
|
||
|
lexicalAnalysis, // true if lexical
|
||
|
grammar.maxk, // depth of ambiguity
|
||
|
r, // set of linear ambiguities
|
||
|
i // ambiguous alternative
|
||
|
);
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
alt.lookaheadDepth = Math.max(alt.lookaheadDepth, k);
|
||
|
blk.exitLookaheadDepth = Math.max(blk.exitLookaheadDepth, k);
|
||
|
}
|
||
|
}
|
||
|
return det;
|
||
|
}
|
||
|
|
||
|
/**Compute the lookahead set of whatever follows references to
|
||
|
* the rule associated witht the FOLLOW block.
|
||
|
*/
|
||
|
public Lookahead FOLLOW(int k, RuleEndElement end) {
|
||
|
// what rule are we trying to compute FOLLOW of?
|
||
|
RuleBlock rb = (RuleBlock)end.block;
|
||
|
// rule name is different in lexer
|
||
|
String rule;
|
||
|
if (lexicalAnalysis) {
|
||
|
rule = CodeGenerator.encodeLexerRuleName(rb.getRuleName());
|
||
|
}
|
||
|
else {
|
||
|
rule = rb.getRuleName();
|
||
|
}
|
||
|
|
||
|
if (DEBUG_ANALYZER) System.out.println("FOLLOW(" + k + "," + rule + ")");
|
||
|
|
||
|
// are we in the midst of computing this FOLLOW already?
|
||
|
if (end.lock[k]) {
|
||
|
if (DEBUG_ANALYZER) System.out.println("FOLLOW cycle to " + rule);
|
||
|
return new Lookahead(rule);
|
||
|
}
|
||
|
|
||
|
// Check to see if there is cached value
|
||
|
if (end.cache[k] != null) {
|
||
|
if (DEBUG_ANALYZER) {
|
||
|
System.out.println("cache entry FOLLOW(" + k + ") for " + rule + ": " + end.cache[k].toString(",", charFormatter, grammar));
|
||
|
}
|
||
|
// if the cache is a complete computation then simply return entry
|
||
|
if (end.cache[k].cycle == null) {
|
||
|
return (Lookahead)end.cache[k].clone();
|
||
|
}
|
||
|
// A cache entry exists, but it is a reference to a cyclic computation.
|
||
|
RuleSymbol rs = (RuleSymbol)grammar.getSymbol(end.cache[k].cycle);
|
||
|
RuleEndElement re = rs.getBlock().endNode;
|
||
|
// The other entry may not exist because it is still being
|
||
|
// computed when this cycle cache entry was found here.
|
||
|
if (re.cache[k] == null) {
|
||
|
// return the cycle...that's all we can do at the moment.
|
||
|
return (Lookahead)end.cache[k].clone();
|
||
|
}
|
||
|
else {
|
||
|
if (DEBUG_ANALYZER) {
|
||
|
System.out.println("combining FOLLOW(" + k + ") for " + rule + ": from "+end.cache[k].toString(",", charFormatter, grammar) + " with FOLLOW for "+((RuleBlock)re.block).getRuleName()+": "+re.cache[k].toString(",", charFormatter, grammar));
|
||
|
}
|
||
|
// combine results from other rule's FOLLOW
|
||
|
if ( re.cache[k].cycle==null ) {
|
||
|
// current rule depends on another rule's FOLLOW and
|
||
|
// it is complete with no cycle; just kill our cycle and
|
||
|
// combine full result from other rule's FOLLOW
|
||
|
end.cache[k].combineWith(re.cache[k]);
|
||
|
end.cache[k].cycle = null; // kill cycle as we're complete
|
||
|
}
|
||
|
else {
|
||
|
// the FOLLOW cache for other rule has a cycle also.
|
||
|
// Here is where we bubble up a cycle. We better recursively
|
||
|
// wipe out cycles (partial computations). I'm a little nervous
|
||
|
// that we might leave a cycle here, however.
|
||
|
Lookahead refFOLLOW = FOLLOW(k, re);
|
||
|
end.cache[k].combineWith( refFOLLOW );
|
||
|
// all cycles should be gone, but if not, record ref to cycle
|
||
|
end.cache[k].cycle = refFOLLOW.cycle;
|
||
|
}
|
||
|
if (DEBUG_ANALYZER) {
|
||
|
System.out.println("saving FOLLOW(" + k + ") for " + rule + ": from "+end.cache[k].toString(",", charFormatter, grammar));
|
||
|
}
|
||
|
// Return the updated cache entry associated
|
||
|
// with the cycle reference.
|
||
|
return (Lookahead)end.cache[k].clone();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
end.lock[k] = true; // prevent FOLLOW computation cycles
|
||
|
|
||
|
Lookahead p = new Lookahead();
|
||
|
|
||
|
RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rule);
|
||
|
|
||
|
// Walk list of references to this rule to compute FOLLOW
|
||
|
for (int i = 0; i < rs.numReferences(); i++) {
|
||
|
RuleRefElement rr = rs.getReference(i);
|
||
|
if (DEBUG_ANALYZER) System.out.println("next[" + rule + "] is " + rr.next.toString());
|
||
|
Lookahead q = rr.next.look(k);
|
||
|
if (DEBUG_ANALYZER) System.out.println("FIRST of next[" + rule + "] ptr is " + q.toString());
|
||
|
/* If there is a cycle then if the cycle is to the rule for
|
||
|
* this end block, you have a cycle to yourself. Remove the
|
||
|
* cycle indication--the lookahead is complete.
|
||
|
*/
|
||
|
if (q.cycle != null && q.cycle.equals(rule)) {
|
||
|
q.cycle = null; // don't want cycle to yourself!
|
||
|
}
|
||
|
// add the lookahead into the current FOLLOW computation set
|
||
|
p.combineWith(q);
|
||
|
if (DEBUG_ANALYZER) System.out.println("combined FOLLOW[" + rule + "] is " + p.toString());
|
||
|
}
|
||
|
|
||
|
end.lock[k] = false; // we're not doing FOLLOW anymore
|
||
|
|
||
|
// if no rules follow this, it can be a start symbol or called by a start sym.
|
||
|
// set the follow to be end of file.
|
||
|
if (p.fset.nil() && p.cycle == null) {
|
||
|
if (grammar instanceof TreeWalkerGrammar) {
|
||
|
// Tree grammars don't see EOF, they see end of sibling list or
|
||
|
// "NULL TREE LOOKAHEAD".
|
||
|
p.fset.add(Token.NULL_TREE_LOOKAHEAD);
|
||
|
}
|
||
|
else if (grammar instanceof LexerGrammar) {
|
||
|
// Lexical grammars use Epsilon to indicate that the end of rule has been hit
|
||
|
// EOF would be misleading; any character can follow a token rule not just EOF
|
||
|
// as in a grammar (where a start symbol is followed by EOF). There is no
|
||
|
// sequence info in a lexer between tokens to indicate what is the last token
|
||
|
// to be seen.
|
||
|
// p.fset.add(EPSILON_TYPE);
|
||
|
p.setEpsilon();
|
||
|
}
|
||
|
else {
|
||
|
p.fset.add(Token.EOF_TYPE);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Cache the result of the FOLLOW computation
|
||
|
if (DEBUG_ANALYZER) {
|
||
|
System.out.println("saving FOLLOW(" + k + ") for " + rule + ": " + p.toString(",", charFormatter, grammar));
|
||
|
}
|
||
|
end.cache[k] = (Lookahead)p.clone();
|
||
|
|
||
|
return p;
|
||
|
}
|
||
|
|
||
|
private Lookahead getAltLookahead(AlternativeBlock blk, int alt, int k) {
|
||
|
Lookahead p;
|
||
|
Alternative a = blk.getAlternativeAt(alt);
|
||
|
AlternativeElement e = a.head;
|
||
|
//System.out.println("getAltLookahead("+k+","+e+"), cache size is "+a.cache.length);
|
||
|
if (a.cache[k] == null) {
|
||
|
p = e.look(k);
|
||
|
a.cache[k] = p;
|
||
|
}
|
||
|
else {
|
||
|
p = a.cache[k];
|
||
|
}
|
||
|
return p;
|
||
|
}
|
||
|
|
||
|
/**Actions are ignored */
|
||
|
public Lookahead look(int k, ActionElement action) {
|
||
|
if (DEBUG_ANALYZER) System.out.println("lookAction(" + k + "," + action + ")");
|
||
|
return action.next.look(k);
|
||
|
}
|
||
|
|
||
|
/**Combine the lookahead computed for each alternative */
|
||
|
public Lookahead look(int k, AlternativeBlock blk) {
|
||
|
if (DEBUG_ANALYZER) System.out.println("lookAltBlk(" + k + "," + blk + ")");
|
||
|
AlternativeBlock saveCurrentBlock = currentBlock;
|
||
|
currentBlock = blk;
|
||
|
Lookahead p = new Lookahead();
|
||
|
for (int i = 0; i < blk.alternatives.size(); i++) {
|
||
|
if (DEBUG_ANALYZER) System.out.println("alt " + i + " of " + blk);
|
||
|
// must set analysis alt
|
||
|
currentBlock.analysisAlt = i;
|
||
|
Alternative alt = blk.getAlternativeAt(i);
|
||
|
AlternativeElement elem = alt.head;
|
||
|
if (DEBUG_ANALYZER) {
|
||
|
if (alt.head == alt.tail) {
|
||
|
System.out.println("alt " + i + " is empty");
|
||
|
}
|
||
|
}
|
||
|
Lookahead q = elem.look(k);
|
||
|
p.combineWith(q);
|
||
|
}
|
||
|
if (k == 1 && blk.not && subruleCanBeInverted(blk, lexicalAnalysis)) {
|
||
|
// Invert the lookahead set
|
||
|
if (lexicalAnalysis) {
|
||
|
BitSet b = (BitSet)((LexerGrammar)grammar).charVocabulary.clone();
|
||
|
int[] elems = p.fset.toArray();
|
||
|
for (int j = 0; j < elems.length; j++) {
|
||
|
b.remove(elems[j]);
|
||
|
}
|
||
|
p.fset = b;
|
||
|
}
|
||
|
else {
|
||
|
p.fset.notInPlace(Token.MIN_USER_TYPE, grammar.tokenManager.maxTokenType());
|
||
|
}
|
||
|
}
|
||
|
currentBlock = saveCurrentBlock;
|
||
|
return p;
|
||
|
}
|
||
|
|
||
|
/**Compute what follows this place-holder node and possibly
|
||
|
* what begins the associated loop unless the
|
||
|
* node is locked.
|
||
|
* <p>
|
||
|
* if we hit the end of a loop, we have to include
|
||
|
* what tokens can begin the loop as well. If the start
|
||
|
* node is locked, then we simply found an empty path
|
||
|
* through this subrule while analyzing it. If the
|
||
|
* start node is not locked, then this node was hit
|
||
|
* during a FOLLOW operation and the FIRST of this
|
||
|
* block must be included in that lookahead computation.
|
||
|
*/
|
||
|
public Lookahead look(int k, BlockEndElement end) {
|
||
|
if (DEBUG_ANALYZER) System.out.println("lookBlockEnd(" + k + ", " + end.block + "); lock is " + end.lock[k]);
|
||
|
if (end.lock[k]) {
|
||
|
// computation in progress => the tokens we would have
|
||
|
// computed (had we not been locked) will be included
|
||
|
// in the set by that computation with the lock on this
|
||
|
// node.
|
||
|
return new Lookahead();
|
||
|
}
|
||
|
|
||
|
Lookahead p;
|
||
|
|
||
|
/* Hitting the end of a loop means you can see what begins the loop */
|
||
|
if (end.block instanceof ZeroOrMoreBlock ||
|
||
|
end.block instanceof OneOrMoreBlock) {
|
||
|
// compute what can start the block,
|
||
|
// but lock end node so we don't do it twice in same
|
||
|
// computation.
|
||
|
end.lock[k] = true;
|
||
|
p = look(k, end.block);
|
||
|
end.lock[k] = false;
|
||
|
}
|
||
|
else {
|
||
|
p = new Lookahead();
|
||
|
}
|
||
|
|
||
|
/* Tree blocks do not have any follow because they are children
|
||
|
* of what surrounds them. For example, A #(B C) D results in
|
||
|
* a look() for the TreeElement end of NULL_TREE_LOOKAHEAD, which
|
||
|
* indicates that nothing can follow the last node of tree #(B C)
|
||
|
*/
|
||
|
if (end.block instanceof TreeElement) {
|
||
|
p.combineWith(Lookahead.of(Token.NULL_TREE_LOOKAHEAD));
|
||
|
}
|
||
|
|
||
|
/* Syntactic predicates such as ( (A)? )=> have no follow per se.
|
||
|
* We cannot accurately say what would be matched following a
|
||
|
* syntactic predicate (you MIGHT be ok if you said it was whatever
|
||
|
* followed the alternative predicted by the predicate). Hence,
|
||
|
* (like end-of-token) we return Epsilon to indicate "unknown
|
||
|
* lookahead."
|
||
|
*/
|
||
|
else if (end.block instanceof SynPredBlock) {
|
||
|
p.setEpsilon();
|
||
|
}
|
||
|
|
||
|
// compute what can follow the block
|
||
|
else {
|
||
|
Lookahead q = end.block.next.look(k);
|
||
|
p.combineWith(q);
|
||
|
}
|
||
|
|
||
|
return p;
|
||
|
}
|
||
|
|
||
|
/**Return this char as the lookahead if k=1.
|
||
|
* <p>### Doesn't work for ( 'a' 'b' | 'a' ~'b' ) yet!!!
|
||
|
* <p>
|
||
|
* If the atom has the <tt>not</tt> flag on, then
|
||
|
* create the set complement of the tokenType
|
||
|
* which is the set of all characters referenced
|
||
|
* in the grammar with this char turned off.
|
||
|
* Also remove characters from the set that
|
||
|
* are currently allocated for predicting
|
||
|
* previous alternatives. This avoids ambiguity
|
||
|
* messages and is more properly what is meant.
|
||
|
* ( 'a' | ~'a' ) implies that the ~'a' is the
|
||
|
* "else" clause.
|
||
|
* <p>
|
||
|
* NOTE: we do <b>NOT</b> include exit path in
|
||
|
* the exclusion set. E.g.,
|
||
|
* ( 'a' | ~'a' )* 'b'
|
||
|
* should exit upon seeing a 'b' during the loop.
|
||
|
*/
|
||
|
public Lookahead look(int k, CharLiteralElement atom) {
|
||
|
if (DEBUG_ANALYZER) System.out.println("lookCharLiteral(" + k + "," + atom + ")");
|
||
|
// Skip until analysis hits k==1
|
||
|
if (k > 1) {
|
||
|
return atom.next.look(k - 1);
|
||
|
}
|
||
|
if (lexicalAnalysis) {
|
||
|
if (atom.not) {
|
||
|
BitSet b = (BitSet)((LexerGrammar)grammar).charVocabulary.clone();
|
||
|
if (DEBUG_ANALYZER) System.out.println("charVocab is " + b.toString());
|
||
|
// remove stuff predicted by preceding alts and follow of block
|
||
|
removeCompetingPredictionSets(b, atom);
|
||
|
if (DEBUG_ANALYZER) System.out.println("charVocab after removal of prior alt lookahead " + b.toString());
|
||
|
// now remove element that is stated not to be in the set
|
||
|
b.clear(atom.getType());
|
||
|
return new Lookahead(b);
|
||
|
}
|
||
|
else {
|
||
|
return Lookahead.of(atom.getType());
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
// Should have been avoided by MakeGrammar
|
||
|
tool.panic("Character literal reference found in parser");
|
||
|
// ... so we make the compiler happy
|
||
|
return Lookahead.of(atom.getType());
|
||
|
}
|
||
|
}
|
||
|
|
||
|
public Lookahead look(int k, CharRangeElement r) {
|
||
|
if (DEBUG_ANALYZER) System.out.println("lookCharRange(" + k + "," + r + ")");
|
||
|
// Skip until analysis hits k==1
|
||
|
if (k > 1) {
|
||
|
return r.next.look(k - 1);
|
||
|
}
|
||
|
BitSet p = BitSet.of(r.begin);
|
||
|
for (int i = r.begin + 1; i <= r.end; i++) {
|
||
|
p.add(i);
|
||
|
}
|
||
|
return new Lookahead(p);
|
||
|
}
|
||
|
|
||
|
public Lookahead look(int k, GrammarAtom atom) {
|
||
|
if (DEBUG_ANALYZER) System.out.println("look(" + k + "," + atom + "[" + atom.getType() + "])");
|
||
|
|
||
|
if (lexicalAnalysis) {
|
||
|
// MakeGrammar should have created a rule reference instead
|
||
|
tool.panic("token reference found in lexer");
|
||
|
}
|
||
|
// Skip until analysis hits k==1
|
||
|
if (k > 1) {
|
||
|
return atom.next.look(k - 1);
|
||
|
}
|
||
|
Lookahead l = Lookahead.of(atom.getType());
|
||
|
if (atom.not) {
|
||
|
// Invert the lookahead set against the token vocabulary
|
||
|
int maxToken = grammar.tokenManager.maxTokenType();
|
||
|
l.fset.notInPlace(Token.MIN_USER_TYPE, maxToken);
|
||
|
// remove stuff predicted by preceding alts and follow of block
|
||
|
removeCompetingPredictionSets(l.fset, atom);
|
||
|
}
|
||
|
return l;
|
||
|
}
|
||
|
|
||
|
/**The lookahead of a (...)+ block is the combined lookahead of
|
||
|
* all alternatives and, if an empty path is found, the lookahead
|
||
|
* of what follows the block.
|
||
|
*/
|
||
|
public Lookahead look(int k, OneOrMoreBlock blk) {
|
||
|
if (DEBUG_ANALYZER) System.out.println("look+" + k + "," + blk + ")");
|
||
|
Lookahead p = look(k, (AlternativeBlock)blk);
|
||
|
return p;
|
||
|
}
|
||
|
|
||
|
/**Combine the lookahead computed for each alternative.
|
||
|
* Lock the node so that no other computation may come back
|
||
|
* on itself--infinite loop. This also implies infinite left-recursion
|
||
|
* in the grammar (or an error in this algorithm ;)).
|
||
|
*/
|
||
|
public Lookahead look(int k, RuleBlock blk) {
|
||
|
if (DEBUG_ANALYZER) System.out.println("lookRuleBlk(" + k + "," + blk + ")");
|
||
|
Lookahead p = look(k, (AlternativeBlock)blk);
|
||
|
return p;
|
||
|
}
|
||
|
|
||
|
/**If not locked or noFOLLOW set, compute FOLLOW of a rule.
|
||
|
* <p>
|
||
|
* TJP says 8/12/99: not true anymore:
|
||
|
* Lexical rules never compute follow. They set epsilon and
|
||
|
* the code generator gens code to check for any character.
|
||
|
* The code generator must remove the tokens used to predict
|
||
|
* any previous alts in the same block.
|
||
|
* <p>
|
||
|
* When the last node of a rule is reached and noFOLLOW,
|
||
|
* it implies that a "local" FOLLOW will be computed
|
||
|
* after this call. I.e.,
|
||
|
* <pre>
|
||
|
* a : b A;
|
||
|
* b : B | ;
|
||
|
* c : b C;
|
||
|
* </pre>
|
||
|
* Here, when computing the look of rule b from rule a,
|
||
|
* we want only {B,EPSILON_TYPE} so that look(b A) will
|
||
|
* be {B,A} not {B,A,C}.
|
||
|
* <p>
|
||
|
* if the end block is not locked and the FOLLOW is
|
||
|
* wanted, the algorithm must compute the lookahead
|
||
|
* of what follows references to this rule. If
|
||
|
* end block is locked, FOLLOW will return an empty set
|
||
|
* with a cycle to the rule associated with this end block.
|
||
|
*/
|
||
|
public Lookahead look(int k, RuleEndElement end) {
|
||
|
if (DEBUG_ANALYZER)
|
||
|
System.out.println("lookRuleBlockEnd(" + k + "); noFOLLOW=" +
|
||
|
end.noFOLLOW + "; lock is " + end.lock[k]);
|
||
|
if (/*lexicalAnalysis ||*/ end.noFOLLOW) {
|
||
|
Lookahead p = new Lookahead();
|
||
|
p.setEpsilon();
|
||
|
p.epsilonDepth = BitSet.of(k);
|
||
|
return p;
|
||
|
}
|
||
|
Lookahead p = FOLLOW(k, end);
|
||
|
return p;
|
||
|
}
|
||
|
|
||
|
/**Compute the lookahead contributed by a rule reference.
|
||
|
*
|
||
|
* <p>
|
||
|
* When computing ruleref lookahead, we don't want the FOLLOW
|
||
|
* computation done if an empty path exists for the rule.
|
||
|
* The FOLLOW is too loose of a set...we want only to
|
||
|
* include the "local" FOLLOW or what can follow this
|
||
|
* particular ref to the node. In other words, we use
|
||
|
* context information to reduce the complexity of the
|
||
|
* analysis and strengthen the parser.
|
||
|
*
|
||
|
* The noFOLLOW flag is used as a means of restricting
|
||
|
* the FOLLOW to a "local" FOLLOW. This variable is
|
||
|
* orthogonal to the <tt>lock</tt> variable that prevents
|
||
|
* infinite recursion. noFOLLOW does not care about what k is.
|
||
|
*/
|
||
|
public Lookahead look(int k, RuleRefElement rr) {
|
||
|
if (DEBUG_ANALYZER) System.out.println("lookRuleRef(" + k + "," + rr + ")");
|
||
|
RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rr.targetRule);
|
||
|
if (rs == null || !rs.defined) {
|
||
|
tool.error("no definition of rule " + rr.targetRule, grammar.getFilename(), rr.getLine(), rr.getColumn());
|
||
|
return new Lookahead();
|
||
|
}
|
||
|
RuleBlock rb = rs.getBlock();
|
||
|
RuleEndElement end = rb.endNode;
|
||
|
boolean saveEnd = end.noFOLLOW;
|
||
|
end.noFOLLOW = true;
|
||
|
// go off to the rule and get the lookahead (w/o FOLLOW)
|
||
|
Lookahead p = look(k, rr.targetRule);
|
||
|
if (DEBUG_ANALYZER) System.out.println("back from rule ref to " + rr.targetRule);
|
||
|
// restore state of end block
|
||
|
end.noFOLLOW = saveEnd;
|
||
|
|
||
|
// check for infinite recursion. If a cycle is returned: trouble!
|
||
|
if (p.cycle != null) {
|
||
|
tool.error("infinite recursion to rule " + p.cycle + " from rule " +
|
||
|
rr.enclosingRuleName, grammar.getFilename(), rr.getLine(), rr.getColumn());
|
||
|
}
|
||
|
|
||
|
// is the local FOLLOW required?
|
||
|
if (p.containsEpsilon()) {
|
||
|
if (DEBUG_ANALYZER)
|
||
|
System.out.println("rule ref to " +
|
||
|
rr.targetRule + " has eps, depth: " + p.epsilonDepth);
|
||
|
|
||
|
// remove epsilon
|
||
|
p.resetEpsilon();
|
||
|
// fset.clear(EPSILON_TYPE);
|
||
|
|
||
|
// for each lookahead depth that saw epsilon
|
||
|
int[] depths = p.epsilonDepth.toArray();
|
||
|
p.epsilonDepth = null; // clear all epsilon stuff
|
||
|
for (int i = 0; i < depths.length; i++) {
|
||
|
int rk = k - (k - depths[i]);
|
||
|
Lookahead q = rr.next.look(rk); // see comments in Lookahead
|
||
|
p.combineWith(q);
|
||
|
}
|
||
|
// note: any of these look() computations for local follow can
|
||
|
// set EPSILON in the set again if the end of this rule is found.
|
||
|
}
|
||
|
|
||
|
return p;
|
||
|
}
|
||
|
|
||
|
public Lookahead look(int k, StringLiteralElement atom) {
|
||
|
if (DEBUG_ANALYZER) System.out.println("lookStringLiteral(" + k + "," + atom + ")");
|
||
|
if (lexicalAnalysis) {
|
||
|
// need more lookahead than string can provide?
|
||
|
if (k > atom.processedAtomText.length()) {
|
||
|
return atom.next.look(k - atom.processedAtomText.length());
|
||
|
}
|
||
|
else {
|
||
|
// get char at lookahead depth k, from the processed literal text
|
||
|
return Lookahead.of(atom.processedAtomText.charAt(k - 1));
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
// Skip until analysis hits k==1
|
||
|
if (k > 1) {
|
||
|
return atom.next.look(k - 1);
|
||
|
}
|
||
|
Lookahead l = Lookahead.of(atom.getType());
|
||
|
if (atom.not) {
|
||
|
// Invert the lookahead set against the token vocabulary
|
||
|
int maxToken = grammar.tokenManager.maxTokenType();
|
||
|
l.fset.notInPlace(Token.MIN_USER_TYPE, maxToken);
|
||
|
}
|
||
|
return l;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**The lookahead of a (...)=> block is the lookahead of
|
||
|
* what follows the block. By definition, the syntactic
|
||
|
* predicate block defies static analysis (you want to try it
|
||
|
* out at run-time). The LOOK of (a)=>A B is A for LL(1)
|
||
|
* ### is this even called?
|
||
|
*/
|
||
|
public Lookahead look(int k, SynPredBlock blk) {
|
||
|
if (DEBUG_ANALYZER) System.out.println("look=>(" + k + "," + blk + ")");
|
||
|
return blk.next.look(k);
|
||
|
}
|
||
|
|
||
|
public Lookahead look(int k, TokenRangeElement r) {
|
||
|
if (DEBUG_ANALYZER) System.out.println("lookTokenRange(" + k + "," + r + ")");
|
||
|
// Skip until analysis hits k==1
|
||
|
if (k > 1) {
|
||
|
return r.next.look(k - 1);
|
||
|
}
|
||
|
BitSet p = BitSet.of(r.begin);
|
||
|
for (int i = r.begin + 1; i <= r.end; i++) {
|
||
|
p.add(i);
|
||
|
}
|
||
|
return new Lookahead(p);
|
||
|
}
|
||
|
|
||
|
public Lookahead look(int k, TreeElement t) {
|
||
|
if (DEBUG_ANALYZER)
|
||
|
System.out.println("look(" + k + "," + t.root + "[" + t.root.getType() + "])");
|
||
|
if (k > 1) {
|
||
|
return t.next.look(k - 1);
|
||
|
}
|
||
|
Lookahead l = null;
|
||
|
if (t.root instanceof WildcardElement) {
|
||
|
l = t.root.look(1); // compute FIRST set minus previous rows
|
||
|
}
|
||
|
else {
|
||
|
l = Lookahead.of(t.root.getType());
|
||
|
if (t.root.not) {
|
||
|
// Invert the lookahead set against the token vocabulary
|
||
|
int maxToken = grammar.tokenManager.maxTokenType();
|
||
|
l.fset.notInPlace(Token.MIN_USER_TYPE, maxToken);
|
||
|
}
|
||
|
}
|
||
|
return l;
|
||
|
}
|
||
|
|
||
|
public Lookahead look(int k, WildcardElement wc) {
|
||
|
if (DEBUG_ANALYZER) System.out.println("look(" + k + "," + wc + ")");
|
||
|
|
||
|
// Skip until analysis hits k==1
|
||
|
if (k > 1) {
|
||
|
return wc.next.look(k - 1);
|
||
|
}
|
||
|
|
||
|
BitSet b;
|
||
|
if (lexicalAnalysis) {
|
||
|
// Copy the character vocabulary
|
||
|
b = (BitSet)((LexerGrammar)grammar).charVocabulary.clone();
|
||
|
}
|
||
|
else {
|
||
|
b = new BitSet(1);
|
||
|
// Invert the lookahead set against the token vocabulary
|
||
|
int maxToken = grammar.tokenManager.maxTokenType();
|
||
|
b.notInPlace(Token.MIN_USER_TYPE, maxToken);
|
||
|
if (DEBUG_ANALYZER) System.out.println("look(" + k + "," + wc + ") after not: " + b);
|
||
|
}
|
||
|
|
||
|
// Remove prediction sets from competing alternatives
|
||
|
// removeCompetingPredictionSets(b, wc);
|
||
|
|
||
|
return new Lookahead(b);
|
||
|
}
|
||
|
|
||
|
/** The (...)* element is the combined lookahead of the alternatives and what can
|
||
|
* follow the loop.
|
||
|
*/
|
||
|
public Lookahead look(int k, ZeroOrMoreBlock blk) {
|
||
|
if (DEBUG_ANALYZER) System.out.println("look*(" + k + "," + blk + ")");
|
||
|
Lookahead p = look(k, (AlternativeBlock)blk);
|
||
|
Lookahead q = blk.next.look(k);
|
||
|
p.combineWith(q);
|
||
|
return p;
|
||
|
}
|
||
|
|
||
|
/**Compute the combined lookahead for all productions of a rule.
|
||
|
* If the lookahead returns with epsilon, at least one epsilon
|
||
|
* path exists (one that consumes no tokens). The noFOLLOW
|
||
|
* flag being set for this endruleblk, indicates that the
|
||
|
* a rule ref invoked this rule.
|
||
|
*
|
||
|
* Currently only look(RuleRef) calls this. There is no need
|
||
|
* for the code generator to call this.
|
||
|
*/
|
||
|
public Lookahead look(int k, String rule) {
|
||
|
if (DEBUG_ANALYZER) System.out.println("lookRuleName(" + k + "," + rule + ")");
|
||
|
RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rule);
|
||
|
RuleBlock rb = rs.getBlock();
|
||
|
|
||
|
if (rb.lock[k]) {
|
||
|
if (DEBUG_ANALYZER)
|
||
|
System.out.println("infinite recursion to rule " + rb.getRuleName());
|
||
|
return new Lookahead(rule);
|
||
|
}
|
||
|
|
||
|
// have we computed it before?
|
||
|
if (rb.cache[k] != null) {
|
||
|
if (DEBUG_ANALYZER) {
|
||
|
System.out.println("found depth " + k + " result in FIRST " + rule + " cache: " +
|
||
|
rb.cache[k].toString(",", charFormatter, grammar));
|
||
|
}
|
||
|
return (Lookahead)rb.cache[k].clone();
|
||
|
}
|
||
|
|
||
|
rb.lock[k] = true;
|
||
|
Lookahead p = look(k, (RuleBlock)rb);
|
||
|
rb.lock[k] = false;
|
||
|
|
||
|
// cache results
|
||
|
rb.cache[k] = (Lookahead)p.clone();
|
||
|
if (DEBUG_ANALYZER) {
|
||
|
System.out.println("saving depth " + k + " result in FIRST " + rule + " cache: " +
|
||
|
rb.cache[k].toString(",", charFormatter, grammar));
|
||
|
}
|
||
|
return p;
|
||
|
}
|
||
|
|
||
|
/** If the first k-1 sets are singleton sets, the appoximate
|
||
|
* lookahead analysis is equivalent to full lookahead analysis.
|
||
|
*/
|
||
|
public static boolean lookaheadEquivForApproxAndFullAnalysis(Lookahead[] bset, int k) {
|
||
|
// first k-1 sets degree 1?
|
||
|
for (int i = 1; i <= k - 1; i++) {
|
||
|
BitSet look = bset[i].fset;
|
||
|
if (look.degree() > 1) {
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
/** Remove the prediction sets from preceding alternatives
|
||
|
* and follow set, but *only* if this element is the first element
|
||
|
* of the alternative. The class members currenBlock and
|
||
|
* currentBlock.analysisAlt must be set correctly.
|
||
|
* @param b The prediction bitset to be modified
|
||
|
* @el The element of interest
|
||
|
*/
|
||
|
private void removeCompetingPredictionSets(BitSet b, AlternativeElement el) {
|
||
|
// Only do this if the element is the first element of the alt,
|
||
|
// because we are making an implicit assumption that k==1.
|
||
|
GrammarElement head = currentBlock.getAlternativeAt(currentBlock.analysisAlt).head;
|
||
|
// if element is #(. blah) then check to see if el is root
|
||
|
if (head instanceof TreeElement) {
|
||
|
if (((TreeElement)head).root != el) {
|
||
|
return;
|
||
|
}
|
||
|
}
|
||
|
else if (el != head) {
|
||
|
return;
|
||
|
}
|
||
|
for (int i = 0; i < currentBlock.analysisAlt; i++) {
|
||
|
AlternativeElement e = currentBlock.getAlternativeAt(i).head;
|
||
|
b.subtractInPlace(e.look(1).fset);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/** Remove the prediction sets from preceding alternatives
|
||
|
* The class members currenBlock must be set correctly.
|
||
|
* Remove prediction sets from 1..k.
|
||
|
* @param look The prediction lookahead to be modified
|
||
|
* @el The element of interest
|
||
|
* @k How deep into lookahead to modify
|
||
|
*/
|
||
|
private void removeCompetingPredictionSetsFromWildcard(Lookahead[] look, AlternativeElement el, int k) {
|
||
|
for (int d = 1; d <= k; d++) {
|
||
|
for (int i = 0; i < currentBlock.analysisAlt; i++) {
|
||
|
AlternativeElement e = currentBlock.getAlternativeAt(i).head;
|
||
|
look[d].fset.subtractInPlace(e.look(d).fset);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/** reset the analyzer so it looks like a new one */
|
||
|
private void reset() {
|
||
|
grammar = null;
|
||
|
DEBUG_ANALYZER = false;
|
||
|
currentBlock = null;
|
||
|
lexicalAnalysis = false;
|
||
|
}
|
||
|
|
||
|
/** Set the grammar for the analyzer */
|
||
|
public void setGrammar(Grammar g) {
|
||
|
if (grammar != null) {
|
||
|
reset();
|
||
|
}
|
||
|
grammar = g;
|
||
|
|
||
|
// Is this lexical?
|
||
|
lexicalAnalysis = (grammar instanceof LexerGrammar);
|
||
|
DEBUG_ANALYZER = grammar.analyzerDebug;
|
||
|
}
|
||
|
|
||
|
public boolean subruleCanBeInverted(AlternativeBlock blk, boolean forLexer) {
|
||
|
if (
|
||
|
blk instanceof ZeroOrMoreBlock ||
|
||
|
blk instanceof OneOrMoreBlock ||
|
||
|
blk instanceof SynPredBlock
|
||
|
) {
|
||
|
return false;
|
||
|
}
|
||
|
// Cannot invert an empty subrule
|
||
|
if (blk.alternatives.size() == 0) {
|
||
|
return false;
|
||
|
}
|
||
|
// The block must only contain alternatives with a single element,
|
||
|
// where each element is a char, token, char range, or token range.
|
||
|
for (int i = 0; i < blk.alternatives.size(); i++) {
|
||
|
Alternative alt = blk.getAlternativeAt(i);
|
||
|
// Cannot have anything interesting in the alternative ...
|
||
|
if (alt.synPred != null || alt.semPred != null || alt.exceptionSpec != null) {
|
||
|
return false;
|
||
|
}
|
||
|
// ... and there must be one simple element
|
||
|
AlternativeElement elt = alt.head;
|
||
|
if (
|
||
|
!(
|
||
|
elt instanceof CharLiteralElement ||
|
||
|
elt instanceof TokenRefElement ||
|
||
|
elt instanceof CharRangeElement ||
|
||
|
elt instanceof TokenRangeElement ||
|
||
|
(elt instanceof StringLiteralElement && !forLexer)
|
||
|
) ||
|
||
|
!(elt.next instanceof BlockEndElement) ||
|
||
|
elt.getAutoGenType() != GrammarElement.AUTO_GEN_NONE
|
||
|
) {
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
return true;
|
||
|
}
|
||
|
}
|