001    /* Copyright 2000, 2001, Compaq Computer Corporation */
002    
003    package javafe.parser;
004    
005    import java.io.IOException;
006    import java.util.Hashtable;
007    
008    import javafe.ast.Identifier;
009    import javafe.ast.PrettyPrint;
010    import javafe.ast.LexicalPragma;
011    import javafe.ast.LexicalPragmaVec;
012    import javafe.ast._SpecialParserInterface;
013    
014    import javafe.util.Assert;
015    import javafe.util.ErrorSet;
016    import javafe.util.CorrelatedReader;
017    import javafe.util.Location;
018    
019    /**
020    
021     A <TT>Lex</TT> object generates a sequence of Java "input elements"
022     (that is, tokens) by converting the sequence of input characters and
023     line terminators generated by an underlying
024     <code>CorrelatedReader</code>.
025    
026     <p> The conversion of input characters occurs according to the lexical
027     rules in Chapter Three of the <cite>Java Language
028     Specification</cite>.  This specification describes three lexical
029     translation steps: the first two steps translate a raw Unicode input
030     stream into a "cooked" one in which Unicode escape characters from the
031     raw stream have been processed and in which line terminators have been
032     identified; the last step translates this cooked stream into a
033     sequence of Java "input elements" (comments, white space, identifiers,
034     tokens, literals, and punctuation).  <code>Lex</code> objects perform
035     the last of these translations steps; the first two are performed by
036     an underlying <code>CorrelatedReader</code> that is given to the
037     <code>Lex</code> object as the source of input characters.
038    
039     <p> Before a newly-created <code>Lex</code> object can be used, its
040     <code>restart</code> method must be called giving a
041     <code>CorrelatedReader</code> to scan.  At any point, a
042     <code>Lex</code> can be restarted on a different underlying reader.
043    
044     <p> The <TT>Lex</TT> class is thread safe, but instances of
045     <TT>Lex</TT> are not.  That is, two different threads may safely
046     access two different instances of <TT>Lex</TT> concurrently, but they
047     may not access the same instance of <TT>Lex</TT> concurrently.
048    
049    
050     <h3> Simple scanning </h3>
051    
052     <p> The <code>getNextToken</code> method of <code>Lex</code> objects
053     returns the translated token sequence, one token at a time.  It
054     discards white space, and it processes comments as described below.
055     In addition, <code>getNextToken</code> fills in the <code>Token</code>
056     fields of <code>this</code> (<code>Lex</code> is a subclass of
057     <code>Token</code>); for example, <code>ttype</code> gets an integer
058     code defining the type of token returned and <code>startingLoc</code>
059     gives the location of the first character making up the token.  If the
060     token is an identifier, <code>identifierVal</code> indicates which
061     one; if the token is a literal, <code>auxVal</code> gives its value.
062    
063     <p> <code>Lex</code> objects report errors by calling both the
064     <code>fatal</code> and <code>error</code> methods of
065     <code>ErrorSet</code>.  The fatal errors are unexpected characters,
066     unterminated comments and string and character literals, and IO errors
067     in the underlying input stream.  Recoverable errors are overflows in
068     literals (including overflows in octal escape sequences), non-octal
069     digits in integer literals, the string <code>0x</code> (interpreted as
070     a malformed integer literal), missing digits in a floating-point
071     exponent, bad escape sequences in character and string literals,
072     character literals containing no or multiple characters, and the
073     character literal <code>'''</code> (interpreted as <code>'\''</code>).
074    
075    
076     <h3> Lookahead </h3>
077    
078     <code>Lex</code> objects allow their clients to peek ahead into the
079     token stream by calling <code>lookahead</code>.  This method returns
080     the token code for the future token, but it does not affect the
081     <code>Token</code> fields of <code>this</code>.
082    
083     <p> If a call to <code>lookahead</code> needs to look past the set of
084     tokens already scanned, and those tokens have errors, then the errors
085     are reported immediately.
086    
087    
088     <h3> Extensibility: Keywords, punctuation </h3>
089    
090     <p> A keyword is a Java identifier with a special token code.
091     Ordinarily, identifiers are associated with the code
092     <code>TagConstants.IDENT</code>.  Keywords, while matching the lexical
093     grammar of identifiers, are associated with different codes.  In fact,
094     each keyword is typically associated with its own code.
095    
096     <p> The set of identifiers that a <code>Lex</code> object recognizes
097     as keywords is extensible.  A keyword is added to a <code>Lex</code>
098     object by calling the <code>addKeyword</code> method.  As a
099     convenience, a boolean given to the <code>Lex</code> constructor
100     indicates whether a newly-constructed <code>Lex</code> object should
101     automatically have all Java keywords added to it.
102    
103     <p> A punctuation string is a string of punctuation characters
104     recognized by a <code>Lex</code> object to be a token.  (Punctuation
105     characters are non-alphanumeric ASCII characters whose ASCII codes are
106     between 33 ('!')  and 126 ('~') inclusive.)  As with keywords, the set
107     of punctuation strings recognized by a <code>Lex</code> object is
108     extensible.  A punctuation string is added to a <code>Lex</code>
109     object by calling the <code>addPunctuation</code> method.  A boolean
110     given to the <code>Lex</code> constructor indicates whether a
111     newly-constructed <code>Lex</code> object should automatically have
112     all Java punctuation strings added to it.
113    
114    
115     <h3> Extensibility: comments and pragmas </h3>
116    
117     <p> The handling of comments is special in two ways: the punctuation
118     strings that start comments is extensible, the text of comments can be
119     parsed for pragmas.
120    
121     <h5> Comment recognition </h5>
122    
123     <p> Ordinarily, keywords and punctuation strings are mapped to token
124     codes that are returned by calls to <code>getNextToken</code>.
125     However, two token codes are treated specially:
126     <code>TagConstants.C_COMMENT</code> and <code>TagConstants.EOL_COMMENT</code>.
127     These codes are used to indicate the start of C-like comments
128     (<code>/*...*</code><code>/</code>) and end-of-line comments
129     (<code>//...</code>), respectively.  When a keyword or punctuation
130     string is mapped to these codes, it is handled like a comment
131     initiator rather than a regular token.
132    
133     <p> For all newly-created <code>Lex</code> objects, <code>/*</code> is
134     mapped to <code>TagConstants.C_COMMENT</code> and <code>//</code> is mapped
135     to <code>TagConstants.EOL_COMMENT</code>.  Other punctuation strings can be
136     made comment initiators by mapping them to comment-initiating codes.
137     This is more useful for <code>TagConstants.EOL_COMMENT</code> than for
138     <code>TagConstants.C_COMMENT</code>, since the string
139     <code>*</code><code>/</code> is hard-wired as the terminator of C-like
140     comments.
141    
142     <h5> Pragma parsing </h5>
143    
144     <code>Lex</code> objects are designed to support annotation of Java
145     programs through pragmas.  A <a href = "../pragma-handling.html">
146     separate document </a> describes our overall aproach to pragmas.  In
147     brief, our front-end supports two kinds of pragmas: control pragmas
148     that can appear anywhere in an input file and are collected in a list
149     apart from the parse tree, and syntax pragmas that can only appear in
150     certain grammatical contexts and become part of the parse tree.
151     Pragmas always appear in Java comments, at most one pragma per
152     comment.  These comments must have one of the following forms:
153    
154     <menu>
155     <li> <code>/*</code><i>tag</i> <i>white-space</i> <i>pragma-text</i><code>*</code><code>/</code>
156     <li> <code>//</code><i>tag</i> <i>white-space-minus-EOL</i> <i>pragma-text</i> <i>EOL</code>
157     </menu>
158    
159     <p> When a <code>Lex</code> object is created, it can optionally be
160     associated with a <code>PragmaParser</code> object.  If a
161     <code>Lex</code> object has no <code>PragmaParser</code>, it discards
162     all comments.  Otherwise, the <code>Lex</code> object passes the first
163     character of the comment (or -1 if the comment is empty) to the
164     <code>checkTag</code> method of the <code>PragmaParser</code> object,
165     which returns <code>false</code> if the comment definitely does not
166     contain any pragmas.  If the comment may contain pragmas, the
167     <code>Lex</code> object bundles the text between the delimiters of a
168     comment into a <code>CorrelatedReader</code> which it passes to the
169     <code>restart</code> method of its <code>PragmaParser</code>.  (This
170     text excludes both the opening <code>/*</code> or <code>//</code> and
171     the closing <code>*</code><code>/</code> or line terminator.)
172    
173     <p> The <code>Lex</code> object then calls <code>getNextPragma</code>
174     to read the pragmas out of the comment one at a time.  The
175     <code>Lex</code> object does this in a lazy manner; that is, it reads
176     a pragma, returns it to the parser, and waits until the parser calls
177     for the next token before it attempts to read another pragma.  The
178     <code>getNextPragma</code> method returns a boolean, returning
179     <code>false</code> if there are no more pragmas to be parsed.  The
180     <code>getNextPragma</code> method takes a <code>Token</code> as an
181     argument, storing information about the pragma parsed into this
182     argument.
183    
184     <p> When <code>PragmaParser.getNextPragma</code> returns a
185     <code>LexicalPragma</code>, the <code>Lex</code> object puts it in an
186     internal list rather than returning it to the parser.  The list of
187     collected lexical pragmas can be retrieved by calling
188     <code>getLexicalPragmas</code>.
189    
190     @see javafe.util.CorrelatedReader
191     @see javafe.parser.TagConstants
192     @see javafe.parser.Token
193     @see javafe.parser.PragmaParser
194    
195     */
196    
197    public class Lex extends Token
198    {
199        //// Underlying input stream
200    
201        /**
202         * Current state of input stream underlying the scanner minus the
203         * first character.  See <code>m_nextchr</code>. <p>
204         *
205         * This is null iff we are closed.
206         */
207    
208        /*@ spec_public @*/ protected CorrelatedReader m_in = null;
209    
210        /** Each call to <code>getNextToken</code> reads ahead one character
211         and leaves the result in <code>m_nextchr</code>.  In other words,
212         between calls to <code>getNextToken</code>, the stream of
213         characters yet-to-scanned consists of the character in
214         <code>m_nextchr</code> followed by the characters remaining in
215         <code>m_in</code>. */
216    
217        protected int m_nextchr;
218    
219    
220        //// Buffer used to hold the actual char's making up a token.
221    
222        // The exact characters making up a token are copied into this array
223        // for all tokens.  Although in some cases this isn't needed (e.g.,
224        // for string literals), we do it anyway in case someday we want to
225        // export that ability of getting a token's exact text rather than
226        // its interpretation, esp. in the case of literals.
227    
228        /** The characters that constitute the current token.  Only the
229         first <TT>textlen</TT> characters are part of the current token;
230         the actual length of <TT>text</TT> may be bigger.  The lexer may
231         occasionally need to resize <TT>text</TT>, so the same array might
232         not be used throughout the lifetime of the lexer.  */
233    
234        //@ invariant text != null;
235        //@ invariant text.length > 4;
236        protected char[] text = new char[64]; // Invariant: text.length > 4
237        // 64 should be large enough that overflows won't happen for most inputs
238    
239        /** The number of characters in the current token.  The "current
240         token" is the one parsed by the previous call to
241         <TT>getNextToken</TT> (there is no "current token" between
242         creation of a lexer and the first call to getNextToken).  */
243    
244        //@ invariant 0 <= textlen &&  textlen <= text.length;
245        protected int textlen = 0;
246    
247        /** Append 'c' to <CODE>text</CODE>, expanding if necessary. */
248        //@ modifies text, textlen;
249        //@ ensures textlen == \old(textlen)+1;
250        protected void append(int c) {
251            try {
252                text[textlen] = (char)c;            //@ nowarn IndexTooBig;  // caught
253            } catch (ArrayIndexOutOfBoundsException e) {
254                //@ assume textlen>=text.length;
255                char[] newtext = new char[textlen + 128];
256                System.arraycopy(text, 0, newtext, 0, textlen);
257                text = newtext;
258                text[textlen] = (char)c;
259            }
260            textlen++;
261        }
262    
263    
264        //// Instance variables for token queue and pragma parsing
265    
266        protected final TokenQueue lookaheadq = new TokenQueue();
267    
268        public /*@ non_null @*/ LexicalPragmaVec lexicalPragmas;
269    
270        //@ invariant inPragma ==> pragmaParser != null;
271        protected PragmaParser pragmaParser;
272        protected boolean inPragma;
273    
274    
275        //// Constructors
276    
277        /**
278         * Creates a lexical analyzer that will tokenize the characters
279         * read from an underlying <code>CorrelatedReader</code>.  Before
280         * the newly-created scanner can be used, its <code>restart</code>
281         * method must be called on a <code>CorrelatedReader</code>.  The
282         * <code>pragmaParser</code> object is used to parse pragmas out
283         * of comments; if it is <code>null</code>, all comments are
284         * discarded.  The <code>isJava</code> flag controls the initial
285         * set of keywords and punctuation strings; if <code>true</code>,
286         * the new scanner will recognize Java's keywords and punctuation
287         * strings, if <code>false</code>, the new scanner will recognize
288         * <em>no</em> keywords or punctuation strings.  If
289         * <code>isJava</code> is true, the token codes used for the
290         * Java's keywords and punctuation strings are those defined by
291         * the <code>TagConstants</code> class. 
292         */
293    
294        //@ ensures m_in==null;
295        public Lex(PragmaParser pragmaParser, boolean isJava) {
296            this.pragmaParser = pragmaParser;
297            lexicalPragmas = LexicalPragmaVec.make();
298            
299            inPragma = false;
300            if (isJava) {
301                addJavaKeywords();
302                addJavaPunctuation();
303            } else {
304                addPunctuation("/*", TagConstants.C_COMMENT);
305                addPunctuation("//", TagConstants.EOL_COMMENT);
306            }
307        }
308    
309    
310        /** Start scaning a new <code>CorrelatedReader</code>.  First closes
311         the old <code>CorrelatedReader</code> associated with
312         <code>this</code> (if there was one), and clears out the set of
313         collected lexical pragms.  In addition to (re)-seting the
314         underlying input stream, this method scans the first token,
315         returning the token kind of the result and setting the
316         <code>Token</code> fields of <code>this</code>.  If a
317         <code>CorrelatedReader</code> is already underlying
318         <code>this</code>, it is closed before the new reader is
319         installed.  Note: The argument <code>in</code> is "captured" in
320         the internal, private state of the resulting scanner and should
321         not be used by other parts of the program. */
322        //@ public normal_behavior
323        //@ modifies m_in, ttype, auxVal, identifierVal, lexicalPragmas;
324        //@ ensures m_in != null;
325        //@ also
326        //@ protected normal_behavior
327        //@ modifies m_nextchr;
328        public int restart(/*@ non_null @*/ CorrelatedReader in) {
329            close();
330            try {
331                m_in = in;
332                m_nextchr = m_in.read();
333            } catch (IOException e) {
334                ErrorSet.fatal(m_in.getLocation(), e.toString());
335                return TagConstants.NULL; // Dummy
336            }
337            return getNextToken();
338        }
339    
340    
341        /** Closes the <code>CorrelatedReader</code> underlying
342         <code>this</code>, clears the set of collected lexical pragmas,
343         and in other ways frees up resources associated with
344         <code>this</code>.  After being closed, a <code>Lex</code> object
345         can be restarted by calling <code>restart</code>.  (An IO
346         exception raised by closing the underlying input stream will be
347         converted into a <code>javafe.util.FatalError</code> runtime
348         exception.) */
349        //@ public normal_behavior
350        //@ modifies m_in, lexicalPragmas;
351        //@ ensures m_in==null;
352        //@ also 
353        //@ protected normal_behavior
354        //@ modifies inPragma;
355        public void close() {
356            if (m_in != null) {
357                m_in.close();
358            }
359            m_in = null;
360            lookaheadq.clear();
361            if (pragmaParser != null) {
362                pragmaParser.close();
363                lexicalPragmas = LexicalPragmaVec.make();
364            }
365            inPragma = false;
366        }
367    
368    
369    
370        //// Public members for scanning
371    
372        public void replaceLookaheadToken(int k, Token t) {
373            lookaheadq.setElementAt(k,t);
374        }
375    
376        /** Scans next token from input stream.  Returns the code of the
377         next token in the token stream and fills in the <code>Token</code>
378         fields of <CODE>this</CODE>.  Note that the
379         <code>startingLoc</code> and <code>endingLoc</code> fields of
380         <code>this</code> are not accurate for the end-of-file token. */
381    
382        //@ public normal_behavior
383        //@ requires m_in != null;
384        //@ modifies ttype, auxVal, identifierVal;
385        //@ also 
386        //@ protected normal_behavior
387        //@ modifies inPragma, m_nextchr;
388        public int getNextToken() {
389            if (lookaheadq.notempty) {
390                lookaheadq.dequeue(this);
391                return ttype;
392            }
393    
394            for(;;) {
395                if (inPragma)
396                    if (! pragmaParser.getNextPragma(this)) {
397                        inPragma = false;
398                    } else if (ttype != TagConstants.LEXICALPRAGMA) {
399                        return ttype;
400                    } else {
401                        lexicalPragmas.addElement((LexicalPragma)this.auxVal);
402                        continue;
403                    }
404    
405                int t = scanToken();
406                if (t != TagConstants.C_COMMENT &&
407                    t != TagConstants.EOL_COMMENT) {
408                    return t;
409                }
410                else { 
411                    scanComment(t);
412                }
413            }
414        }
415    
416        /** Returns token type of the <TT>k</TT>th future token, where k=0
417         is the current token.  If <code>k</code> is past the end of the
418         token stream, <code>TagConstants.EOF</code> is returned. */
419    
420        //@ requires k>=0;
421        //@ requires m_in != null;
422        public int lookahead(int k) {
423            if (k == 0) return ttype;
424            int lookahead_count = lookaheadq.size();
425    
426            if (lookahead_count < k) { // Need to add more tokens in lookahead buffer
427                this.copyInto(savedState);
428                try {
429                    for(int deficit = k - lookahead_count; 0 < deficit; deficit--) {
430                        for(;;) {
431                            if (inPragma)
432                                if (! pragmaParser.getNextPragma(this))
433                                    inPragma = false;
434                                else if (ttype != TagConstants.LEXICALPRAGMA)
435                                    break;
436                                else {
437                                    lexicalPragmas.addElement((LexicalPragma)this.auxVal);
438                                    continue;
439                                }
440    
441                            int t = scanToken();
442                            if (t != TagConstants.C_COMMENT
443                                && t != TagConstants.EOL_COMMENT)
444                                break;
445                            else scanComment(t);
446                        }
447                        lookaheadq.enqueue(this);
448                    }
449                } finally { 
450                    savedState.copyInto(this);
451                    savedState.clear();
452                }
453            }
454            return lookaheadq.elementAt(k-1).ttype;
455        }
456        //@ invariant savedState != null;
457        protected Token savedState = new Token();
458    
459        public Token lookaheadToken(int k) {
460            if (k==0) return this;
461            lookahead(k);
462            return lookaheadq.elementAt(k-1);
463        }
464    
465        /** Returns the set of lexical pragmas collected.  It also clears
466         the set of lexical pragmas so that the next call will not include
467         them.  (If this lexer has no <code>PragmaParser</code>, then an
468         empty vector is returned.) */
469    
470        //@ ensures \result != null;
471        public LexicalPragmaVec getLexicalPragmas() {
472            return lexicalPragmas.copy();
473        }
474    
475        /**
476         * Remove the first LexicalPragma from our set of lexical pragmas
477         * collected, returning it or null if our set is empty.
478         */
479        public LexicalPragma popLexicalPragma() {
480            if (lexicalPragmas.size()>0)
481                return lexicalPragmas.pop();
482    
483            return null;
484        }
485    
486    
487        //// Internal scanning routines
488    
489        /** Returns the code of the next token in the token stream, updating
490         the <code>Token</code> fields of <code>this</code> along the way.
491         Advances underlying stream to the character just past the last
492         character of the token returned, and changes the internal buffer
493         used by <code>getTokenText</code> to contain the text of this
494         token.
495    
496         <p> In most cases, this method leaves <code>m_nextchr</code>
497         holding the character just after the token scanned and
498         <code>m_in</code> pointing to the character after that.  However,
499         if <code>TagConstants.C_COMMENT</code> or
500         <code>TagConstants.EOL_COMMENT</code> is returned, it leaves
501         <code>m_in</code> pointing to the character just after the token
502         scanned and <code>m_nextchr</code> undefined.  This aids in pragma
503         processing. */
504    
505        //@ requires m_in != null;
506        private int scanToken() {
507            // The following "assert" is obvious from the program text, because
508            // the two arrays are non-null and their types do not coincide in any
509            // other way.  ESC/Java can figure this out, too, so therefore the
510            // following "assert" produces no warning.  This property is also
511            // cruicial to figuring out that the updates to "text[...]" in this
512            // body do not destroy the invariants declared in TagConstants about
513            // "punctuationCodes[...]".  However, oddly enough, without being
514            // prompted to first prove this property, ESC/Java isn't able to
515            // prove the necessary thing about the updates to "text[...]".
516            //@ assert (Object)text != (Object)TagConstants.punctuationCodes;
517    
518            try {
519                int nextchr = m_nextchr;
520                textlen = 0;
521    
522                while (Character.isWhitespace((char)nextchr)) {
523                    m_in.mark();
524                    nextchr = m_in.read();
525                }
526                startingLoc = m_in.getLocation();
527    
528                // Inline the identifier-handling code because it's the common case
529                if (Character.isJavaIdentifierStart((char)nextchr)) { 
530                    int h = 0;
531                    do {
532                        try {
533                            text[textlen] = (char)nextchr;
534                            textlen++;
535                        } catch (ArrayIndexOutOfBoundsException e) { this.append(nextchr);}
536                        h = _SpecialParserInterface.HC*h + nextchr;
537                        nextchr = m_in.read();
538                    } while (Character.isJavaIdentifierPart((char)nextchr));
539                    m_nextchr = nextchr;
540                    auxVal = null;
541                    identifierVal = _SpecialParserInterface.intern(text, textlen, h);
542                    if (onlyjavakeywords) {
543                        ttype = _SpecialParserInterface.getTokenType(identifierVal);
544                    } else if (keywords != null) {
545                        Object val = keywords.get(identifierVal);
546                        if (val != null) {
547                            ttype = ((Integer)val).intValue();
548                            // From the Unenforceable invariant on keyword:
549                            /*@ assume ttype != TagConstants.BOOLEANLIT &&
550                             ttype != TagConstants.INTLIT &&
551                             ttype != TagConstants.LONGLIT &&
552                             ttype != TagConstants.FLOATLIT &&
553                             ttype != TagConstants.DOUBLELIT &&
554                             ttype != TagConstants.STRINGLIT &&
555                             ttype != TagConstants.CHARLIT &&
556                             ttype != TagConstants.LEXICALPRAGMA &&
557                             ttype != TagConstants.MODIFIERPRAGMA &&
558                             ttype != TagConstants.STMTPRAGMA &&
559                             ttype != TagConstants.TYPEDECLELEMPRAGMA &&
560                             ttype != TagConstants.TYPEMODIFIERPRAGMA;  */
561                        } else if (javakeywords)
562                            ttype = _SpecialParserInterface.getTokenType(identifierVal);
563                        else ttype = TagConstants.IDENT;
564                    } else ttype = TagConstants.IDENT;
565                    return ttype;
566                }
567                ttype = Token.CLEAR;      // keep Token invariants happy
568                identifierVal = null;
569    
570                if (Character.isDigit((char)nextchr))
571                    return scanNumber(nextchr);
572                if (nextchr == '\'' || nextchr == '\"')
573                    return scanCharOrString(nextchr);
574    
575                scanJavaExtensions(nextchr);
576                if (ttype != TagConstants.NULL) return ttype;
577    
578                scanPunctuation(nextchr);
579                if (ttype != TagConstants.NULL) return ttype;
580    
581                if (nextchr == -1) {
582                    m_nextchr = nextchr;
583                    return ttype = TagConstants.EOF;
584                }
585    
586                String s = PrettyPrint.toCanonicalString(TagConstants.CHARLIT, 
587                                                         new Integer(nextchr));
588                ErrorSet.fatal(m_in.getLocation(), "Unexpected character " + s);
589    
590                //@ unreachable;
591                return ttype;
592    
593            } catch (IOException e) {
594                ErrorSet.fatal(m_in.getLocation(), e.toString());
595                return TagConstants.NULL; // Dummy
596            }
597        }
598    
599    
600        /** Handle a comment.  m_in points to the character just after the
601         "//" or "/*".  The mark is set at the last character read.  */
602    
603        //@ requires m_in != null;
604        //@ requires !inPragma;
605        //@ modifies inPragma, m_nextchr;
606        private void scanComment(int commentKind) {
607            try {
608                m_in.mark();
609                int firstchr = m_in.read();
610                // System.out.println("scanComment: firstchr = '"+(char)firstchr+"'");
611    
612                /* Decide if this comment contains a pragma. */
613    
614                if (pragmaParser != null) {
615                    if (pragmaParser.checkTag(firstchr)) {
616                        Assert.notFalse(! inPragma);
617                        inPragma = true;
618                    }
619                }
620    
621                // Scan the comment
622                boolean nonEmptyComment = false; // For empty multi-line comment
623          
624                int locStartComment = m_in.getLocation();
625                int nextchr = firstchr;
626                if (commentKind == TagConstants.EOL_COMMENT) {
627                    nonEmptyComment = true;
628                    while (nextchr != -1 && nextchr != '\n')
629                        nextchr = m_in.read();
630                } else {
631                    for(;;) {
632                        if (nextchr == -1) {
633                            ErrorSet.fatal(
634                               startingLoc,
635                               "Unterminated or improperly nested comment or pragma");
636                        }
637                        int oldchr = nextchr;
638                        nextchr = m_in.read();
639                        if (oldchr == '*' && nextchr == '/') break;
640                        nonEmptyComment = true;
641                    }
642                }
643    
644                // If the comment contains a pragma, set up the pragma parser
645                if (inPragma) {
646                    if(nonEmptyComment) {
647                        boolean eolComment = commentKind == TagConstants.EOL_COMMENT;
648                        int discard = !eolComment ? 2 :
649                            nextchr == '\n' ? 1 : 0;
650                        CorrelatedReader nu = m_in.createReaderFromMark(discard);
651                        pragmaParser.restart(nu, eolComment);
652                    } else {
653                        // Is an empty multi-line comment
654                        inPragma = false;
655                    }
656                }
657                if (!inPragma) {
658                    m_in.clearMark();
659                }
660    
661                // Clean up and return
662                m_nextchr = m_in.read();
663            } catch (IOException e) {
664                m_in.clearMark();
665                ErrorSet.fatal(m_in.getLocation(), "IO error");
666            }
667        }   //@ nowarn Exception;   // ignore Index... from createReaderFromMark
668    
669    
670        // Notes.  The routines below all assume that the input stream to be
671        // scanned consist of their input parameter nextchr plus the characters
672        // in m_in; that is, the nextchr argument is playing the role that
673        // m_nextchr plays between calls to getNextToken.  These routines also
674        // assume that textlen = 0 and startingLoc has already been filled in
675        // with the location of the character in nextchr.
676        // Unless otherwise noted, the routines below fill in all fields except
677        // startingLoc, that is, they fill in m_nextchr, text & textlen,
678        // plus the Token fields ttype, endingLoc, and auxVal.
679        // Errors are handled internally by these routines.  Either the
680        // error is fatal, or the error is recoverable and recovery is done
681        // by the routines.
682    
683        /** Scan a character or string constant. */
684    
685        //@ requires m_in != null;
686        private int scanCharOrString(int nextchr) {
687            try {
688                int endquote = nextchr;
689                this.append(endquote);
690                boolean chr = (endquote == '\'');
691                nextchr = m_in.read();
692                stringLitLen = 0;
693                for(;;) {
694                    if (nextchr != '\\' && nextchr != '\n' && nextchr != -1) {
695                        // Normal case
696                        this.append(nextchr);
697                        if (nextchr == endquote) { nextchr = m_in.read(); break; }
698                        this.stringLitAppend(nextchr);
699                        nextchr = m_in.read();
700                        continue;
701                    }
702    
703                    if (nextchr == -1 || nextchr == '\n') {
704                        if (chr) ErrorSet.fatal(startingLoc,
705                                                "Unterminated character literal");
706                        else ErrorSet.fatal(startingLoc, "Unterminated string literal");
707                        break;
708                    }
709    
710                    // At this point, we know we're at the start of an escape sequence
711                    this.append(nextchr);
712                    nextchr = m_in.read();
713                    if ('0' <= nextchr && nextchr <= '7') { // Octal sequence
714                        int firstdigit = nextchr - '0';
715                        int result = firstdigit;
716                        this.append(nextchr);
717                        nextchr = m_in.read();
718                        if ('0' <= nextchr && nextchr <= '7') {
719                            this.append(nextchr); result = 8*result + (nextchr - '0');
720                            nextchr = m_in.read();
721                            if (0 <= firstdigit && firstdigit <= 3
722                                && '0' <= nextchr && nextchr <= '7') {
723                                this.append(nextchr); result = 8*result + (nextchr - '0');
724                                nextchr = m_in.read();
725                            }
726                        }
727                        if (255 < result) {
728                            ErrorSet.error(m_in.getLocation(),
729                                           "Octal escape sequence overflow");
730                            this.stringLitAppend('\0');
731                        } else this.stringLitAppend(result);
732                    } else {
733                        if (nextchr == 'b') this.stringLitAppend('\b');
734                        else if (nextchr == 't')  this.stringLitAppend('\t');
735                        else if (nextchr == 'n')  this.stringLitAppend('\n');
736                        else if (nextchr == 'f')  this.stringLitAppend('\f');
737                        else if (nextchr == 'r')  this.stringLitAppend('\r');
738                        else if (nextchr == '\"') this.stringLitAppend('\"');
739                        else if (nextchr == '\'') this.stringLitAppend('\'');
740                        else if (nextchr == '\\') this.stringLitAppend('\\');
741                        else {
742                            ErrorSet.error(startingLoc, "Bad escape sequence");
743                            continue;
744                        }
745                        this.append(nextchr);
746                        nextchr = m_in.read();
747                    }
748                }
749    
750                if (chr) {
751                    if (1 < stringLitLen)
752                        ErrorSet.error(startingLoc,
753                                       "Character literal with multiple characters.");
754                    else if (stringLitLen == 0)
755                        if (nextchr == '\'') {
756                            this.append(nextchr);
757                            nextchr = m_in.read();
758                            stringLit[0] = '\'';
759                            ErrorSet.error(startingLoc, "Unquoted \' in character literal.");
760                        } else {
761                            stringLit[0] = '\0';
762                            ErrorSet.error(startingLoc, "Empty character literal.");
763                        }
764                    ttype = TagConstants.CHARLIT;
765                    auxVal = new Integer(stringLit[0]);  // CF
766                } else {
767                    ttype = TagConstants.STRINGLIT;
768                    auxVal = String.valueOf(stringLit, 0, stringLitLen);
769                }
770                m_nextchr = nextchr;
771                endingLoc = m_in.getLocation();
772                return ttype;
773            } catch (IOException e) {
774                ErrorSet.fatal(m_in.getLocation(), e.toString());
775                return TagConstants.NULL; // Dummy
776            }
777        }
778    
779    
780        /** Scans a numeric literal.  Requires <code>nextchr</code> is a
781         decimal digit.  Reads a numeric literal into <code>text</code>.
782         Depending on the kind of literal found, will return one of
783         <code>TagConstants.INTLIT</code>,
784         <code>TagConstants.LONGLIT</code>,
785         <code>TagConstants.FLOATLIT</code> or
786         <code>TagConstants.DOUBLELIT</code>.  If an error is detected, a
787         message is sent to <code>ErrorSet</code>, <code>m_in</code> is
788         advanced to what appears to be the end of the erroneous token, and
789         a legal literal is left in <code>text</code>. */
790    
791        //@ requires m_in != null;
792        //@ modifies endingLoc, ttype, auxVal, m_nextchr, text, textlen;
793        //@ ensures \result==ttype;
794        /*@ ensures \result==TagConstants.INTLIT || \result==TagConstants.LONGLIT ||
795         \result==TagConstants.FLOATLIT || \result==TagConstants.DOUBLELIT ||
796         \result==TagConstants.MAX_INT_PLUS_ONE ||
797         \result==TagConstants.MAX_LONG_PLUS_ONE ; */
798        private int scanNumber(int nextchr) {
799            try {
800                // Get the first chunk of digits
801                int tentativeResult = 0;
802                while (Character.isDigit((char)nextchr)) {
803                    this.append(nextchr);
804                    tentativeResult = 10*tentativeResult + (nextchr - '0');
805                    nextchr = m_in.read();
806                }
807                //@ assume textlen>0; // because know nextchar was initially a digit...
808    
809                // Handle floating point literals in another subtroutine
810                if (nextchr == '.') {
811                    m_in.mark();
812                    int nextnextchr = m_in.read();
813                    if (nextnextchr == '.') {
814                        // Two . in a row - cannot be a double so
815                        // back up 
816                        m_in.reset();
817                        // fall through and let the text so far be handled as
818                        // a integer literal
819                    } else {
820                        this.append(nextchr);
821                        return finishFloatingPointLiteral(nextnextchr);
822                    }
823                } else if (nextchr == 'e' || nextchr == 'E'
824                           || nextchr == 'F' || nextchr == 'f'
825                           || nextchr == 'd' || nextchr == 'D')
826                    return finishFloatingPointLiteral(nextchr);
827    
828                long result = 0;
829                if (text[0] != '0' || (textlen == 1 && nextchr != 'x' && nextchr != 'X'))
830                    // Handle base-ten literal
831                    if (textlen <= 9) result = tentativeResult;
832                    else {
833                        // Parse as negative value to avoid problems w/ -Long.MIN_VALUE
834                        long mullimit = Long.MIN_VALUE / 10;
835                        for(int i = 0; i < textlen; i++) {
836                            int d = text[i] - '0';
837                            long r2 = 10*result;
838                            if (result < mullimit || r2 < Long.MIN_VALUE + d) {
839                                ErrorSet.error(startingLoc, "Integer literal overflow");
840                                result = 0;
841                                break;
842                            } else result = r2 - d;
843                        }
844                        if (nextchr == 'L' || nextchr == 'l') {
845                            if (result == Long.MIN_VALUE) {
846                                this.append(nextchr);
847                                m_nextchr = m_in.read();
848                                endingLoc = m_in.getLocation();
849                                auxVal = null;
850                                return ttype = TagConstants.MAX_LONG_PLUS_ONE;
851                            }
852                        } else if (result == Integer.MIN_VALUE) {
853                            m_nextchr = nextchr;
854                            endingLoc = m_in.getLocation();
855                            auxVal = null;
856                            return ttype = TagConstants.MAX_INT_PLUS_ONE;
857                        }
858                        result = -result;
859                    }
860    
861                else if (nextchr != 'x' && nextchr != 'X') {
862                    // Handle octal literal
863                    for(int i = 1; i < textlen; i++) {
864                        int d = text[i] - '0';
865                        if (0 <= d && d <= 7) {
866                            if ((result >>> 61) != 0) {
867                                ErrorSet.error(startingLoc, "Integer literal overflow");
868                                result = 0;
869                                break;
870                            }
871                            result = (result << 3) + d;
872                        } else {
873                            ErrorSet.error(startingLoc,
874                                           "Non-octal digit found in octal literal.");
875                            result = 0;
876                            break;
877                        }
878                    }
879    
880                } else {
881                    // Handle hex literal
882                    this.append(nextchr);
883                    boolean overflow = false;
884                    for(nextchr = m_in.read(); ; nextchr = m_in.read()) {
885                        int d = Character.digit((char)nextchr, 16);
886                        if (d == -1) break;
887                        this.append(nextchr);
888                        if (! overflow)
889                            if ((result >>> 60) != 0) overflow = true;
890                            else result = (result << 4) + d;
891                    }
892                    if (textlen <= 2)
893                        ErrorSet.error(startingLoc, "Too few digits in a hex literal.");
894                    else if (overflow) {
895                        ErrorSet.error(startingLoc, "Integer literal overflow");
896                        result = 0;
897                    }
898                }
899    
900                if (nextchr == 'L' || nextchr == 'l') {
901                    this.append(nextchr);
902                    m_nextchr = m_in.read();
903                    endingLoc = m_in.getLocation();
904                    auxVal = new Long(result);
905                    return ttype = TagConstants.LONGLIT;
906                }
907                if ((result & 0xffffffff00000000L) != 0) {
908                    ErrorSet.error(startingLoc, "Integer literal overflow");
909                    result = 0;
910                }
911                m_nextchr = nextchr;
912                endingLoc = m_in.getLocation();
913                auxVal = new Integer((int)result);
914                return ttype = TagConstants.INTLIT;
915            } catch (IOException e) {
916                ErrorSet.fatal(m_in.getLocation(), e.toString());
917                return TagConstants.NULL; // Dummy
918            }
919        }
920    
921        /** Finishes scanning a floating-point literal.
922    
923         <p> Requires: <code>text</code> contains a possibly empty sequence
924         of decimal digits followed by an optional <code>'.'</code>; also,
925         <code>text</code> cannot be empty.  Further, let <i>s</i> be the
926         sequence of characters consisting of the characters in
927         <code>text</code> followed by <code>nextchr</code> followed by the
928         characters in <code>m_in</code>.  This routine requires that a
929         prefix of <i>s</i> match the syntax of floating-point literals as
930         defined by the Java language specification.
931    
932         <p> Ensures: Scans the floating-point literal in <i>s</i>.
933         Depending on the type of the literal, returns
934         <code>TagConstants.FLOATLIT</code> or
935         <code>TagConstants.DOUBLELIT</code> and sets sets
936         <code>auxVal</code> to either a <code>Float</code> or
937         <code>Double</code>.  If an error is encountered, a message is
938         sent to <code>ErorrSet</code> and recovery is performed. */
939    
940        //@ requires m_in != null;
941        //@ requires textlen>0;
942        //@ modifies ttype, auxVal, m_nextchr, endingLoc, text, textlen;
943        //@ ensures \result==ttype;
944        //@ ensures \result==TagConstants.FLOATLIT || \result==TagConstants.DOUBLELIT;
945        private int finishFloatingPointLiteral(int nextchr) {
946            try {
947                boolean error = false;
948                boolean zeroMantissa = true; // Used for underflow detection
949    
950                // First, see if there are any non-zero digits in the non-fractional
951                // part of the mantissa
952                for(int i = 0, len = textlen - (text[textlen-1] == '.' ? 1 : 0);
953                    i < len; i++)
954                    if (text[i] != '0') { zeroMantissa = false; break; }
955    
956                // If there's a fractional part, scan it in
957                while (Character.isDigit((char)nextchr)) {
958                    if (nextchr != '0') zeroMantissa = false;
959                    this.append(nextchr);
960                    nextchr = m_in.read();          // throws IOException
961                }
962    
963                // If there's an exponent, scan it in
964                if (nextchr == 'e' || nextchr == 'E') {
965                    this.append(nextchr);
966                    nextchr = m_in.read();
967                    if (nextchr == '+' || nextchr == '-') {
968                        this.append(nextchr);
969                        nextchr = m_in.read();
970                    }
971                    while (Character.isDigit((char)nextchr)) {
972                        this.append(nextchr);
973                        nextchr = m_in.read();
974                    }
975                    if (! Character.isDigit(text[textlen-1])) {
976                        ErrorSet.error(startingLoc, ("Digits required in exponent part"
977                                                     + " of floating-point literal."));
978                        error = true;
979                    }
980                }
981    
982                String s = (error ? "1.0" : String.valueOf(this.text, 0, textlen));
983    
984                // Assert: s has one of the following forms:
985                //   [0-9]+ (. [0-9]*)? (e [+-] [0-9]+)?
986                //   . [0-9]+ (e [+-] [0-9]+)?
987                // Also, zeroMantissa is true iff all digits in the mantissa are zero
988                // (Todo: examine code more closely to see if this is really true.)
989    
990                boolean f = (nextchr == 'F' || nextchr == 'f');
991                if (f || nextchr == 'D' || nextchr == 'd') {
992                    this.append(nextchr);
993                    m_nextchr = m_in.read();
994                } else m_nextchr = nextchr;
995                endingLoc = m_in.getLocation();
996    
997                if (f) {
998                    Float result = Float.valueOf(s);  // !throw NumberFormatException
999                    auxVal = result;
1000                    if (result.isInfinite())
1001                        ErrorSet.error(startingLoc, "Floating-point literal overflow");
1002                    else if (! zeroMantissa && result.floatValue() == 0.0)
1003                        ErrorSet.error(startingLoc, "Floating-point literal underflow");
1004                } else {
1005                    Double result = Double.valueOf(s);  // !throw NumberFormatException
1006                    auxVal = result;
1007                    if (result.isInfinite())
1008                        ErrorSet.error(startingLoc, "Floating-point literal overflow");
1009                    else if (! zeroMantissa && result.doubleValue() == 0.0D)
1010                        ErrorSet.error(startingLoc, "Floating-point literal underflow");
1011                }   
1012                return ttype = (f ? TagConstants.FLOATLIT : TagConstants.DOUBLELIT);
1013            } catch (IOException e) {
1014                ErrorSet.fatal(m_in.getLocation(), e.toString());
1015                return TagConstants.NULL; // Dummy
1016            }
1017        }   //@ nowarn Exception;   // NumberFormatException won't be thrown
1018    
1019        /** Scans a punctuation string <em>or</em> a floating-point
1020         number.  If input doesn't match either a floating-point number or
1021         any punctuation, returns <code>TagConstants.NULL</code>.  Assumes
1022         <code>startingLoc</code> already filled in.
1023    
1024         <p>The routine may change the mark arbitrarily.
1025    
1026         <p> This method leaves <code>m_in</code> in a different state than
1027         the previous ones do.  Ordinarily, <code>scanXXX</code> routines
1028         return with <code>m_nextchr</code> holding the character just
1029         after the token scanned and <code>m_in</code> pointing to the
1030         character after that.  <code>scanPunctuation</code> does too, but
1031         only when the value returned is <em>not</em>
1032         <code>TagConstants.C_COMMENT</code> or
1033         <code>TagConstants.EOL_COMMENT</code>; in those two cases, it
1034         returns with <code>m_in</code> pointing to the character just
1035         after the token scanned and <code>m_nextchr</code> undefined.
1036         This aids in pragma processing.  Also, if <code>TagConstants.NULL</code>
1037         is returned, then <code>m_nextchr</code> is undefined and
1038         <code>m_in</code> is where it was on entry.
1039         */
1040    
1041        //@ requires m_in != null;
1042        //@ modifies ttype, auxVal, m_nextchr, textlen, endingLoc, text;
1043        //@ modifies m_in.marked;
1044        //@ ensures \result==ttype;
1045        private int scanPunctuation(int nextchr) {
1046            try {
1047                boolean possibleFloatingPointNumber = (nextchr == '.');
1048                text[0] = (char)nextchr;
1049                textlen = 1;
1050                PunctuationPrefixTree prefix = punctuationTable;
1051                PunctuationPrefixTree lastPunctuation = prefix;
1052                int lastPunctuationLength = 0;
1053                m_in.mark();
1054    
1055                // The following loop, which has been unrolled once so the
1056                // first iteration can be handled specially, has the invariant:
1057                //   prefix == null || depth(prefix) == textlen-1
1058                //   nextchr == text[textlen-1]
1059                //   (nextchr_0 + m_in_0) == (text[0,textlen) + m_in)
1060                //   lastPunctuation != null && lastPunctuation is last valid punct seen
1061                //   lastPunctuationLength == depth(lastPunctuation)
1062                //   (nextchr_0+m_in_0)==(text[0,lastPunctuationLength]+m_in.resetedtext)
1063                // Stating part of this invariant graphically:
1064                //               l      t       l = lastPunctuationLength; t = textlen
1065                //    text:   abcdefghij
1066                // nextchr_0+
1067                //    m_in_0: abcdefghijklmnop
1068                //               r     nm       r = resetted text, n = nextchr, m = m_in
1069    
1070                int index = nextchr - '!';
1071                if (index < 0 || PunctuationPrefixTree.CHILDLEN <= index)
1072                    prefix = null;
1073                else
1074                    prefix = prefix.children[nextchr - '!'];
1075                if (prefix != null && prefix.code != TagConstants.NULL) {
1076                    lastPunctuation = prefix;
1077                    lastPunctuationLength = textlen;
1078                    m_in.mark();
1079                }
1080                nextchr = m_in.read();
1081                if (possibleFloatingPointNumber && Character.isDigit((char)nextchr)) {
1082                    // Deal with special case of floating-point numbers
1083                    m_in.clearMark();
1084                    return finishFloatingPointLiteral(nextchr);
1085                }
1086                this.append(nextchr);
1087                while (prefix != null) {
1088                    index = nextchr - '!';
1089                    if (index < 0 || PunctuationPrefixTree.CHILDLEN <= index)
1090                        prefix = null;
1091                    else
1092                        prefix = prefix.children[nextchr - '!'];
1093                    if (prefix != null && prefix.code != TagConstants.NULL) {
1094                        lastPunctuation = prefix;
1095                        lastPunctuationLength = textlen;
1096                        m_in.mark();
1097                    }
1098                    nextchr = m_in.read();
1099                    this.append(nextchr);
1100                }
1101                // End of unrolled loop
1102    
1103                m_in.reset();
1104                textlen = lastPunctuationLength;
1105                endingLoc = m_in.getLocation();
1106                ttype = lastPunctuation.code;
1107                if (ttype != TagConstants.C_COMMENT &&
1108                    ttype != TagConstants.EOL_COMMENT &&
1109                    ttype != TagConstants.NULL) {
1110                    m_nextchr = m_in.read();
1111                }
1112                return ttype;
1113            } catch (IOException e) {
1114                ErrorSet.fatal(m_in.getLocation(), e.toString());
1115                return TagConstants.NULL; // Dummy
1116            }
1117        }
1118    
1119        /** Scans a Java extension.  If input doesn't match any Java extension,
1120         returns <code>TagConstants.NULL</code>.  Assumes <code>startingLoc</code>
1121         already filled in, and assumes <code>textlen</code> is 0.
1122    
1123         <p>The routine may change the mark arbitrarily.
1124    
1125         <p> If a Java extension is matched, returns with <code>m_nextchr</code>
1126         holding the character just after the token scanned and <code>m_in</code>
1127         pointing to the character after that.
1128         */
1129        //@ requires m_in != null;
1130        //@ requires textlen == 0;
1131        //@ modifies ttype, auxVal, m_nextchr;
1132        //@ modifies m_in.marked;
1133        //@ ensures \result==ttype;
1134        protected int scanJavaExtensions(int nextchr) {
1135            ttype = TagConstants.NULL;
1136            return ttype;
1137        }
1138    
1139        //// Internal char buffer used by string scanner.
1140    
1141        // Having this separate from text allows us to accumulate both the
1142        // original input in text and the escape-converted input in stringLit
1143    
1144        //@ private invariant stringLit != null;
1145        //@ private invariant stringLit.length>=64;
1146        private char[] stringLit = new char[64];
1147    
1148        //@ private invariant stringLitLen>=0;
1149        private int stringLitLen = 0;
1150    
1151        private void stringLitAppend(int c) {
1152            try {
1153                stringLit[stringLitLen] = (char)c;    //@ nowarn IndexTooBig; // caught
1154            } catch (ArrayIndexOutOfBoundsException e) {
1155                char[] newstringLit = new char[stringLitLen + 128];
1156                System.arraycopy(stringLit, 0, newstringLit, 0, stringLitLen);
1157                stringLit = newstringLit;
1158                stringLit[stringLitLen] = (char)c;
1159            }
1160            stringLitLen++;
1161        }
1162    
1163    
1164        //// The keyword and punctuation tables, plus methods for changing them
1165    
1166        //@ private invariant punctuationTable != null;
1167        private PunctuationPrefixTree punctuationTable = new PunctuationPrefixTree();
1168    
1169        /**
1170         * Unenforceable invariant: all tokenTypes in this table do not
1171         * require a non-null auxVal.  (cf. Token.auxVal).
1172         */
1173        //  Old specs from original full JML spec files.  Must be
1174        //  rewritten for current java.util.Hashtable specs.
1175        /*  invariant (keywords != null) ==> (keywords.keyType == \type(Identifier) &&
1176                      keywords.elementType == \type(Integer)); */
1177        protected Hashtable keywords = null;
1178    
1179        protected boolean javakeywords = false, onlyjavakeywords = false;
1180        //@ invariant onlyjavakeywords == (javakeywords && keywords == null);
1181    
1182        static {
1183            // When class initializes, change the <CODE>tokenType</CODE> field
1184            // of <CODE>Identifier</CODE> instances associated with keywords.
1185            for(int code = TagConstants.FIRST_KEYWORD;
1186                code <= TagConstants.LAST_KEYWORD;
1187                code++) {
1188                Identifier idn = Identifier.intern(TagConstants.toString(code));
1189                _SpecialParserInterface.setTokenType(idn, code);
1190            }
1191        } 
1192    
1193        /** Add all of Java's keywords to the scanner.  The token codes used
1194         for these keywords are those defined by the <code>TagConstants</code>
1195         class.  Requires that none of these keywords have been added
1196         already. */
1197    
1198        public void addJavaKeywords() {
1199            javakeywords = true;
1200            if (keywords != null)
1201                // New values override existing ones...
1202                for(int i = TagConstants.FIRST_KEYWORD;
1203                    i <= TagConstants.LAST_KEYWORD;
1204                    i++)
1205                    keywords.remove(Identifier.intern(TagConstants.toString(i)));
1206            else onlyjavakeywords = true;
1207        }
1208    
1209    
1210        /**
1211         * Add a keyword to a <code>Lex</code> object with the given code.
1212         * Requires that <code>newkeyword</code> is a Java identifier and
1213         * that <code>code</code> is not <code>TagConstants.NULL</code> or
1214         * a tokenType that requires auxVal to be non-null;
1215         * (cf. Token.auxVal).
1216         *
1217         * Also requires that the keyword hasn't already been added.
1218         */
1219        //@ requires code != TagConstants.NULL;
1220        //@ requires code != TagConstants.BOOLEANLIT;
1221        //@ requires code != TagConstants.INTLIT;
1222        //@ requires code != TagConstants.LONGLIT;
1223        //@ requires code != TagConstants.DOUBLELIT;
1224        //@ requires code != TagConstants.STRINGLIT;
1225        //@ requires code != TagConstants.CHARLIT;
1226        //@ requires code != TagConstants.LEXICALPRAGMA;
1227        //@ requires code != TagConstants.MODIFIERPRAGMA;
1228        //@ requires code != TagConstants.STMTPRAGMA;
1229        //@ requires code != TagConstants.TYPEDECLELEMPRAGMA;
1230        //@ requires code != TagConstants.TYPEMODIFIERPRAGMA;
1231        public void addKeyword(/*@ non_null @*/ String newkeyword, int code) {
1232            Assert.precondition(code != TagConstants.NULL);
1233            if (keywords == null) {
1234                keywords = new Hashtable();
1235                //  Old specs from original full JML spec files.  Must be
1236                //  rewritten for current java.util.Hashtable specs.
1237                //  set keywords.keyType = \type(Identifier);
1238                //  set keywords.elementType = \type(Integer);
1239            }
1240            keywords.put(Identifier.intern(newkeyword), new Integer(code));
1241            onlyjavakeywords = false;
1242        }
1243    
1244    
1245    
1246        /** Add all of Java's punctuation strings to the scanner.  The codes
1247         used for these punctuation strings are the found in the
1248         <code>TagConstants</code> class.  Requires that none of these
1249         punctuation strings have been added before. */
1250    
1251        public void addJavaPunctuation() {
1252            Assert.notFalse(TagConstants.punctuationStrings.length ==
1253                            TagConstants.punctuationCodes.length);
1254            for(int i = 0; i < TagConstants.punctuationStrings.length; i++)
1255                addPunctuation(TagConstants.punctuationStrings[i],
1256                               TagConstants.punctuationCodes[i]);
1257        }
1258    
1259        /** Add a punctuation string to a scanner associated with a given
1260         code.  Requires that the characters in the punctuation string are
1261         all punctuation characters, that is, non-alphanumeric ASCII
1262         characters whose codes are between 33 ('!')  and 126 ('~')
1263         inclusive.  Also requires that the code is not
1264         <code>TagConstants.NULL</code> and that the punctuation string
1265         hasn't already been added. */
1266        //@ requires punctuation != null;
1267        //@ requires code != TagConstants.NULL;
1268        //@ requires code != TagConstants.IDENT;
1269        //@ requires code != TagConstants.BOOLEANLIT;
1270        //@ requires code != TagConstants.INTLIT;
1271        //@ requires code != TagConstants.LONGLIT;
1272        //@ requires code != TagConstants.FLOATLIT;
1273        //@ requires code != TagConstants.DOUBLELIT;
1274        //@ requires code != TagConstants.STRINGLIT;
1275        //@ requires code != TagConstants.CHARLIT;
1276        //@ requires code != TagConstants.LEXICALPRAGMA;
1277        //@ requires code != TagConstants.MODIFIERPRAGMA;
1278        //@ requires code != TagConstants.STMTPRAGMA;
1279        //@ requires code != TagConstants.TYPEDECLELEMPRAGMA;
1280        //@ requires code != TagConstants.TYPEMODIFIERPRAGMA;
1281        public void addPunctuation(String punctuation, int code) {
1282            Assert.precondition(code != TagConstants.NULL);
1283            PunctuationPrefixTree prefix = punctuationTable;
1284            for(int j = 0; j < punctuation.length(); j++) {
1285                int c = punctuation.charAt(j);
1286                Assert.precondition(        //@ nowarn Pre;
1287                                    ('!' <= c && c <= '/') || (':' <= c && c <= '@')
1288                                    || ('[' <= c && c <= '`') || ('{' <= c && c <= '~'));
1289                int index = c - '!';
1290                PunctuationPrefixTree child = prefix.children[index];
1291                if (child == null)
1292                    child = prefix.children[index] = new PunctuationPrefixTree();
1293                prefix = child;
1294            }
1295            prefix.code = code;
1296        }
1297    
1298    
1299        //// Diagnostic routines
1300    
1301        /** Checks invariants (assumes that <CODE>Token</CODE> fields
1302         haven't been mucked with by outside code).  <code>prefix</code> is
1303         used to prefix error messages with context provided by the
1304         caller. */
1305    
1306        public void zzz(String prefix) {
1307            super.zzz();
1308            lookaheadq.zzz("Near character "
1309                           + Location.toOffset(startingLoc)
1310                           + ": " + prefix);
1311        }
1312    }
1313    
1314    class PunctuationPrefixTree {
1315        public static final int CHILDLEN = 1 + '~' - '!';
1316    
1317        /*@ invariant code != TagConstants.IDENT &&
1318         code != TagConstants.BOOLEANLIT &&
1319         code != TagConstants.INTLIT &&
1320         code != TagConstants.LONGLIT &&
1321         code != TagConstants.FLOATLIT &&
1322         code != TagConstants.DOUBLELIT &&
1323         code != TagConstants.STRINGLIT &&
1324         code != TagConstants.CHARLIT &&
1325         code != TagConstants.LEXICALPRAGMA &&
1326         code != TagConstants.MODIFIERPRAGMA &&
1327         code != TagConstants.STMTPRAGMA &&
1328         code != TagConstants.TYPEDECLELEMPRAGMA &&
1329         code != TagConstants.TYPEMODIFIERPRAGMA; */
1330        public int code = TagConstants.NULL; // ! NULL ==> a punctuation string
1331    
1332        //@ invariant children != null;
1333        //@ invariant children.length == CHILDLEN;
1334        //@ invariant \typeof(children) == \type(PunctuationPrefixTree[]);
1335        public PunctuationPrefixTree[] children
1336                = new PunctuationPrefixTree[CHILDLEN];
1337    }