package com.fr.third.antlr; /* ANTLR Translator Generator * Project led by Terence Parr at http://www.cs.usfca.edu * Software rights: http://www.antlr.org/license.html */ import java.util.*; import com.fr.third.antlr.ASdebug.ASDebugStream; import com.fr.third.antlr.ASdebug.IASDebugStream; import com.fr.third.antlr.ASdebug.TokenOffsetInfo; import com.fr.third.antlr.collections.impl.BitSet; /** This token stream tracks the *entire* token stream coming from * a lexer, but does not pass on the whitespace (or whatever else * you want to discard) to the parser. * * This class can then be asked for the ith token in the input stream. * Useful for dumping out the input stream exactly after doing some * augmentation or other manipulations. Tokens are index from 0..n-1 * * You can insert stuff, replace, and delete chunks. Note that the * operations are done lazily--only if you convert the buffer to a * String. This is very efficient because you are not moving data around * all the time. As the buffer of tokens is converted to strings, the * toString() method(s) check to see if there is an operation at the * current index. If so, the operation is done and then normal String * rendering continues on the buffer. This is like having multiple Turing * machine instruction streams (programs) operating on a single input tape. :) * * Since the operations are done lazily at toString-time, operations do not * screw up the token index values. That is, an insert operation at token * index i does not change the index values for tokens i+1..n-1. * * Because operations never actually alter the buffer, you may always get * the original token stream back without undoing anything. Since * the instructions are queued up, you can easily simulate transactions and * roll back any changes if there is an error just by removing instructions. * For example, * * TokenStreamRewriteEngine rewriteEngine = * new TokenStreamRewriteEngine(lexer); * JavaRecognizer parser = new JavaRecognizer(rewriteEngine); * ... * rewriteEngine.insertAfter("pass1", t, "foobar");} * rewriteEngine.insertAfter("pass2", u, "start");} * System.out.println(rewriteEngine.toString("pass1")); * System.out.println(rewriteEngine.toString("pass2")); * * You can also have multiple "instruction streams" and get multiple * rewrites from a single pass over the input. Just name the instruction * streams and use that name again when printing the buffer. This could be * useful for generating a C file and also its header file--all from the * same buffer. * * If you don't use named rewrite streams, a "default" stream is used. * * Terence Parr, parrt at antlr.org * University of San Francisco * February 2004 */ public class TokenStreamRewriteEngine implements TokenStream, IASDebugStream { public static final int MIN_TOKEN_INDEX = 0; static class RewriteOperation { protected int index; protected String text; protected RewriteOperation(int index, String text) { this.index = index; this.text = text; } /** Execute the rewrite operation by possibly adding to the buffer. * Return the index of the next token to operate on. */ public int execute(StringBuffer buf) { return index; } public String toString() { String opName = getClass().getName(); int $index = opName.indexOf('$'); opName = opName.substring($index+1, opName.length()); return opName+"@"+index+'"'+text+'"'; } } static class InsertBeforeOp extends RewriteOperation { public InsertBeforeOp(int index, String text) { super(index,text); } public int execute(StringBuffer buf) { buf.append(text); return index; } } /** I'm going to try replacing range from x..y with (y-x)+1 ReplaceOp * instructions. */ static class ReplaceOp extends RewriteOperation { protected int lastIndex; public ReplaceOp(int from, int to, String text) { super(from,text); lastIndex = to; } public int execute(StringBuffer buf) { if ( text!=null ) { buf.append(text); } return lastIndex+1; } } static class DeleteOp extends ReplaceOp { public DeleteOp(int from, int to) { super(from, to, null); } } public static final String DEFAULT_PROGRAM_NAME = "default"; public static final int PROGRAM_INIT_SIZE = 100; /** Track the incoming list of tokens */ protected List tokens; /** You may have multiple, named streams of rewrite operations. * I'm calling these things "programs." * Maps String (name) -> rewrite (List) */ protected Map programs = null; /** Map String (program name) -> Integer index */ protected Map lastRewriteTokenIndexes = null; /** track index of tokens */ protected int index = MIN_TOKEN_INDEX; /** Who do we suck tokens from? */ protected TokenStream stream; /** Which (whitespace) token(s) to throw out */ protected BitSet discardMask = new BitSet(); public TokenStreamRewriteEngine(TokenStream upstream) { this(upstream,1000); } public TokenStreamRewriteEngine(TokenStream upstream, int initialSize) { stream = upstream; tokens = new ArrayList(initialSize); programs = new HashMap(); programs.put(DEFAULT_PROGRAM_NAME, new ArrayList(PROGRAM_INIT_SIZE)); lastRewriteTokenIndexes = new HashMap(); } public Token nextToken() throws TokenStreamException { TokenWithIndex t; // suck tokens until end of stream or we find a non-discarded token do { t = (TokenWithIndex)stream.nextToken(); if ( t!=null ) { t.setIndex(index); // what is t's index in list? if ( t.getType()!=Token.EOF_TYPE ) { tokens.add(t); // track all tokens except EOF } index++; // move to next position } } while ( t!=null && discardMask.member(t.getType()) ); return t; } public void rollback(int instructionIndex) { rollback(DEFAULT_PROGRAM_NAME, instructionIndex); } /** Rollback the instruction stream for a program so that * the indicated instruction (via instructionIndex) is no * longer in the stream. UNTESTED! */ public void rollback(String programName, int instructionIndex) { List is = (List)programs.get(programName); if ( is!=null ) { programs.put(programName, is.subList(MIN_TOKEN_INDEX,instructionIndex)); } } public void deleteProgram() { deleteProgram(DEFAULT_PROGRAM_NAME); } /** Reset the program so that no instructions exist */ public void deleteProgram(String programName) { rollback(programName, MIN_TOKEN_INDEX); } /** If op.index > lastRewriteTokenIndexes, just add to the end. * Otherwise, do linear */ protected void addToSortedRewriteList(RewriteOperation op) { addToSortedRewriteList(DEFAULT_PROGRAM_NAME, op); } /** old; before moving v3 stuff in protected void addToSortedRewriteList(String programName, RewriteOperation op) { List rewrites = getProgram(programName); // if at or beyond last op's index, just append if ( op.index>=getLastRewriteTokenIndex(programName) ) { rewrites.add(op); // append to list of operations // record the index of this operation for next time through setLastRewriteTokenIndex(programName, op.index); return; } // not after the last one, so must insert to ordered list Comparator comparator = new Comparator() { public int compare(Object o, Object o1) { RewriteOperation a = (RewriteOperation)o; RewriteOperation b = (RewriteOperation)o1; if ( a.indexb.index ) return 1; return 0; } }; int pos = Collections.binarySearch(rewrites, op, comparator); if ( pos<0 ) { rewrites.add(-pos-1, op); } } */ /** Add an instruction to the rewrite instruction list ordered by * the instruction number (use a binary search for efficiency). * The list is ordered so that toString() can be done efficiently. * * When there are multiple instructions at the same index, the instructions * must be ordered to ensure proper behavior. For example, a delete at * index i must kill any replace operation at i. Insert-before operations * must come before any replace / delete instructions. If there are * multiple insert instructions for a single index, they are done in * reverse insertion order so that "insert foo" then "insert bar" yields * "foobar" in front rather than "barfoo". This is convenient because * I can insert new InsertOp instructions at the index returned by * the binary search. A ReplaceOp kills any previous replace op. Since * delete is the same as replace with null text, i can check for * ReplaceOp and cover DeleteOp at same time. :) */ protected void addToSortedRewriteList(String programName, RewriteOperation op) { List rewrites = getProgram(programName); //System.out.println("### add "+op+"; rewrites="+rewrites); Comparator comparator = new Comparator() { public int compare(Object o, Object o1) { RewriteOperation a = (RewriteOperation)o; RewriteOperation b = (RewriteOperation)o1; if ( a.indexb.index ) return 1; return 0; } }; int pos = Collections.binarySearch(rewrites, op, comparator); //System.out.println("bin search returns: pos="+pos); if ( pos>=0 ) { // binarySearch does not guarantee first element when multiple // are found. I must seach backwards for first op with op.index for (; pos>=0; pos--) { RewriteOperation prevOp = (RewriteOperation)rewrites.get(pos); if ( prevOp.index=MIN_TOKEN_INDEX && i<=end && i