001 /* Copyright 2000, 2001, Compaq Computer Corporation */ 002 003 package javafe.parser; 004 005 import java.io.IOException; 006 import java.util.Hashtable; 007 008 import javafe.ast.Identifier; 009 import javafe.ast.PrettyPrint; 010 import javafe.ast.LexicalPragma; 011 import javafe.ast.LexicalPragmaVec; 012 import javafe.ast._SpecialParserInterface; 013 014 import javafe.util.Assert; 015 import javafe.util.ErrorSet; 016 import javafe.util.CorrelatedReader; 017 import javafe.util.Location; 018 019 /** 020 021 A <TT>Lex</TT> object generates a sequence of Java "input elements" 022 (that is, tokens) by converting the sequence of input characters and 023 line terminators generated by an underlying 024 <code>CorrelatedReader</code>. 025 026 <p> The conversion of input characters occurs according to the lexical 027 rules in Chapter Three of the <cite>Java Language 028 Specification</cite>. This specification describes three lexical 029 translation steps: the first two steps translate a raw Unicode input 030 stream into a "cooked" one in which Unicode escape characters from the 031 raw stream have been processed and in which line terminators have been 032 identified; the last step translates this cooked stream into a 033 sequence of Java "input elements" (comments, white space, identifiers, 034 tokens, literals, and punctuation). <code>Lex</code> objects perform 035 the last of these translations steps; the first two are performed by 036 an underlying <code>CorrelatedReader</code> that is given to the 037 <code>Lex</code> object as the source of input characters. 038 039 <p> Before a newly-created <code>Lex</code> object can be used, its 040 <code>restart</code> method must be called giving a 041 <code>CorrelatedReader</code> to scan. At any point, a 042 <code>Lex</code> can be restarted on a different underlying reader. 043 044 <p> The <TT>Lex</TT> class is thread safe, but instances of 045 <TT>Lex</TT> are not. That is, two different threads may safely 046 access two different instances of <TT>Lex</TT> concurrently, but they 047 may not access the same instance of <TT>Lex</TT> concurrently. 048 049 050 <h3> Simple scanning </h3> 051 052 <p> The <code>getNextToken</code> method of <code>Lex</code> objects 053 returns the translated token sequence, one token at a time. It 054 discards white space, and it processes comments as described below. 055 In addition, <code>getNextToken</code> fills in the <code>Token</code> 056 fields of <code>this</code> (<code>Lex</code> is a subclass of 057 <code>Token</code>); for example, <code>ttype</code> gets an integer 058 code defining the type of token returned and <code>startingLoc</code> 059 gives the location of the first character making up the token. If the 060 token is an identifier, <code>identifierVal</code> indicates which 061 one; if the token is a literal, <code>auxVal</code> gives its value. 062 063 <p> <code>Lex</code> objects report errors by calling both the 064 <code>fatal</code> and <code>error</code> methods of 065 <code>ErrorSet</code>. The fatal errors are unexpected characters, 066 unterminated comments and string and character literals, and IO errors 067 in the underlying input stream. Recoverable errors are overflows in 068 literals (including overflows in octal escape sequences), non-octal 069 digits in integer literals, the string <code>0x</code> (interpreted as 070 a malformed integer literal), missing digits in a floating-point 071 exponent, bad escape sequences in character and string literals, 072 character literals containing no or multiple characters, and the 073 character literal <code>'''</code> (interpreted as <code>'\''</code>). 074 075 076 <h3> Lookahead </h3> 077 078 <code>Lex</code> objects allow their clients to peek ahead into the 079 token stream by calling <code>lookahead</code>. This method returns 080 the token code for the future token, but it does not affect the 081 <code>Token</code> fields of <code>this</code>. 082 083 <p> If a call to <code>lookahead</code> needs to look past the set of 084 tokens already scanned, and those tokens have errors, then the errors 085 are reported immediately. 086 087 088 <h3> Extensibility: Keywords, punctuation </h3> 089 090 <p> A keyword is a Java identifier with a special token code. 091 Ordinarily, identifiers are associated with the code 092 <code>TagConstants.IDENT</code>. Keywords, while matching the lexical 093 grammar of identifiers, are associated with different codes. In fact, 094 each keyword is typically associated with its own code. 095 096 <p> The set of identifiers that a <code>Lex</code> object recognizes 097 as keywords is extensible. A keyword is added to a <code>Lex</code> 098 object by calling the <code>addKeyword</code> method. As a 099 convenience, a boolean given to the <code>Lex</code> constructor 100 indicates whether a newly-constructed <code>Lex</code> object should 101 automatically have all Java keywords added to it. 102 103 <p> A punctuation string is a string of punctuation characters 104 recognized by a <code>Lex</code> object to be a token. (Punctuation 105 characters are non-alphanumeric ASCII characters whose ASCII codes are 106 between 33 ('!') and 126 ('~') inclusive.) As with keywords, the set 107 of punctuation strings recognized by a <code>Lex</code> object is 108 extensible. A punctuation string is added to a <code>Lex</code> 109 object by calling the <code>addPunctuation</code> method. A boolean 110 given to the <code>Lex</code> constructor indicates whether a 111 newly-constructed <code>Lex</code> object should automatically have 112 all Java punctuation strings added to it. 113 114 115 <h3> Extensibility: comments and pragmas </h3> 116 117 <p> The handling of comments is special in two ways: the punctuation 118 strings that start comments is extensible, the text of comments can be 119 parsed for pragmas. 120 121 <h5> Comment recognition </h5> 122 123 <p> Ordinarily, keywords and punctuation strings are mapped to token 124 codes that are returned by calls to <code>getNextToken</code>. 125 However, two token codes are treated specially: 126 <code>TagConstants.C_COMMENT</code> and <code>TagConstants.EOL_COMMENT</code>. 127 These codes are used to indicate the start of C-like comments 128 (<code>/*...*</code><code>/</code>) and end-of-line comments 129 (<code>//...</code>), respectively. When a keyword or punctuation 130 string is mapped to these codes, it is handled like a comment 131 initiator rather than a regular token. 132 133 <p> For all newly-created <code>Lex</code> objects, <code>/*</code> is 134 mapped to <code>TagConstants.C_COMMENT</code> and <code>//</code> is mapped 135 to <code>TagConstants.EOL_COMMENT</code>. Other punctuation strings can be 136 made comment initiators by mapping them to comment-initiating codes. 137 This is more useful for <code>TagConstants.EOL_COMMENT</code> than for 138 <code>TagConstants.C_COMMENT</code>, since the string 139 <code>*</code><code>/</code> is hard-wired as the terminator of C-like 140 comments. 141 142 <h5> Pragma parsing </h5> 143 144 <code>Lex</code> objects are designed to support annotation of Java 145 programs through pragmas. A <a href = "../pragma-handling.html"> 146 separate document </a> describes our overall aproach to pragmas. In 147 brief, our front-end supports two kinds of pragmas: control pragmas 148 that can appear anywhere in an input file and are collected in a list 149 apart from the parse tree, and syntax pragmas that can only appear in 150 certain grammatical contexts and become part of the parse tree. 151 Pragmas always appear in Java comments, at most one pragma per 152 comment. These comments must have one of the following forms: 153 154 <menu> 155 <li> <code>/*</code><i>tag</i> <i>white-space</i> <i>pragma-text</i><code>*</code><code>/</code> 156 <li> <code>//</code><i>tag</i> <i>white-space-minus-EOL</i> <i>pragma-text</i> <i>EOL</code> 157 </menu> 158 159 <p> When a <code>Lex</code> object is created, it can optionally be 160 associated with a <code>PragmaParser</code> object. If a 161 <code>Lex</code> object has no <code>PragmaParser</code>, it discards 162 all comments. Otherwise, the <code>Lex</code> object passes the first 163 character of the comment (or -1 if the comment is empty) to the 164 <code>checkTag</code> method of the <code>PragmaParser</code> object, 165 which returns <code>false</code> if the comment definitely does not 166 contain any pragmas. If the comment may contain pragmas, the 167 <code>Lex</code> object bundles the text between the delimiters of a 168 comment into a <code>CorrelatedReader</code> which it passes to the 169 <code>restart</code> method of its <code>PragmaParser</code>. (This 170 text excludes both the opening <code>/*</code> or <code>//</code> and 171 the closing <code>*</code><code>/</code> or line terminator.) 172 173 <p> The <code>Lex</code> object then calls <code>getNextPragma</code> 174 to read the pragmas out of the comment one at a time. The 175 <code>Lex</code> object does this in a lazy manner; that is, it reads 176 a pragma, returns it to the parser, and waits until the parser calls 177 for the next token before it attempts to read another pragma. The 178 <code>getNextPragma</code> method returns a boolean, returning 179 <code>false</code> if there are no more pragmas to be parsed. The 180 <code>getNextPragma</code> method takes a <code>Token</code> as an 181 argument, storing information about the pragma parsed into this 182 argument. 183 184 <p> When <code>PragmaParser.getNextPragma</code> returns a 185 <code>LexicalPragma</code>, the <code>Lex</code> object puts it in an 186 internal list rather than returning it to the parser. The list of 187 collected lexical pragmas can be retrieved by calling 188 <code>getLexicalPragmas</code>. 189 190 @see javafe.util.CorrelatedReader 191 @see javafe.parser.TagConstants 192 @see javafe.parser.Token 193 @see javafe.parser.PragmaParser 194 195 */ 196 197 public class Lex extends Token 198 { 199 //// Underlying input stream 200 201 /** 202 * Current state of input stream underlying the scanner minus the 203 * first character. See <code>m_nextchr</code>. <p> 204 * 205 * This is null iff we are closed. 206 */ 207 208 /*@ spec_public @*/ protected CorrelatedReader m_in = null; 209 210 /** Each call to <code>getNextToken</code> reads ahead one character 211 and leaves the result in <code>m_nextchr</code>. In other words, 212 between calls to <code>getNextToken</code>, the stream of 213 characters yet-to-scanned consists of the character in 214 <code>m_nextchr</code> followed by the characters remaining in 215 <code>m_in</code>. */ 216 217 protected int m_nextchr; 218 219 220 //// Buffer used to hold the actual char's making up a token. 221 222 // The exact characters making up a token are copied into this array 223 // for all tokens. Although in some cases this isn't needed (e.g., 224 // for string literals), we do it anyway in case someday we want to 225 // export that ability of getting a token's exact text rather than 226 // its interpretation, esp. in the case of literals. 227 228 /** The characters that constitute the current token. Only the 229 first <TT>textlen</TT> characters are part of the current token; 230 the actual length of <TT>text</TT> may be bigger. The lexer may 231 occasionally need to resize <TT>text</TT>, so the same array might 232 not be used throughout the lifetime of the lexer. */ 233 234 //@ invariant text != null; 235 //@ invariant text.length > 4; 236 protected char[] text = new char[64]; // Invariant: text.length > 4 237 // 64 should be large enough that overflows won't happen for most inputs 238 239 /** The number of characters in the current token. The "current 240 token" is the one parsed by the previous call to 241 <TT>getNextToken</TT> (there is no "current token" between 242 creation of a lexer and the first call to getNextToken). */ 243 244 //@ invariant 0 <= textlen && textlen <= text.length; 245 protected int textlen = 0; 246 247 /** Append 'c' to <CODE>text</CODE>, expanding if necessary. */ 248 //@ modifies text, textlen; 249 //@ ensures textlen == \old(textlen)+1; 250 protected void append(int c) { 251 try { 252 text[textlen] = (char)c; //@ nowarn IndexTooBig; // caught 253 } catch (ArrayIndexOutOfBoundsException e) { 254 //@ assume textlen>=text.length; 255 char[] newtext = new char[textlen + 128]; 256 System.arraycopy(text, 0, newtext, 0, textlen); 257 text = newtext; 258 text[textlen] = (char)c; 259 } 260 textlen++; 261 } 262 263 264 //// Instance variables for token queue and pragma parsing 265 266 protected final TokenQueue lookaheadq = new TokenQueue(); 267 268 public /*@ non_null @*/ LexicalPragmaVec lexicalPragmas; 269 270 //@ invariant inPragma ==> pragmaParser != null; 271 protected PragmaParser pragmaParser; 272 protected boolean inPragma; 273 274 275 //// Constructors 276 277 /** 278 * Creates a lexical analyzer that will tokenize the characters 279 * read from an underlying <code>CorrelatedReader</code>. Before 280 * the newly-created scanner can be used, its <code>restart</code> 281 * method must be called on a <code>CorrelatedReader</code>. The 282 * <code>pragmaParser</code> object is used to parse pragmas out 283 * of comments; if it is <code>null</code>, all comments are 284 * discarded. The <code>isJava</code> flag controls the initial 285 * set of keywords and punctuation strings; if <code>true</code>, 286 * the new scanner will recognize Java's keywords and punctuation 287 * strings, if <code>false</code>, the new scanner will recognize 288 * <em>no</em> keywords or punctuation strings. If 289 * <code>isJava</code> is true, the token codes used for the 290 * Java's keywords and punctuation strings are those defined by 291 * the <code>TagConstants</code> class. 292 */ 293 294 //@ ensures m_in==null; 295 public Lex(PragmaParser pragmaParser, boolean isJava) { 296 this.pragmaParser = pragmaParser; 297 lexicalPragmas = LexicalPragmaVec.make(); 298 299 inPragma = false; 300 if (isJava) { 301 addJavaKeywords(); 302 addJavaPunctuation(); 303 } else { 304 addPunctuation("/*", TagConstants.C_COMMENT); 305 addPunctuation("//", TagConstants.EOL_COMMENT); 306 } 307 } 308 309 310 /** Start scaning a new <code>CorrelatedReader</code>. First closes 311 the old <code>CorrelatedReader</code> associated with 312 <code>this</code> (if there was one), and clears out the set of 313 collected lexical pragms. In addition to (re)-seting the 314 underlying input stream, this method scans the first token, 315 returning the token kind of the result and setting the 316 <code>Token</code> fields of <code>this</code>. If a 317 <code>CorrelatedReader</code> is already underlying 318 <code>this</code>, it is closed before the new reader is 319 installed. Note: The argument <code>in</code> is "captured" in 320 the internal, private state of the resulting scanner and should 321 not be used by other parts of the program. */ 322 //@ public normal_behavior 323 //@ modifies m_in, ttype, auxVal, identifierVal, lexicalPragmas; 324 //@ ensures m_in != null; 325 //@ also 326 //@ protected normal_behavior 327 //@ modifies m_nextchr; 328 public int restart(/*@ non_null @*/ CorrelatedReader in) { 329 close(); 330 try { 331 m_in = in; 332 m_nextchr = m_in.read(); 333 } catch (IOException e) { 334 ErrorSet.fatal(m_in.getLocation(), e.toString()); 335 return TagConstants.NULL; // Dummy 336 } 337 return getNextToken(); 338 } 339 340 341 /** Closes the <code>CorrelatedReader</code> underlying 342 <code>this</code>, clears the set of collected lexical pragmas, 343 and in other ways frees up resources associated with 344 <code>this</code>. After being closed, a <code>Lex</code> object 345 can be restarted by calling <code>restart</code>. (An IO 346 exception raised by closing the underlying input stream will be 347 converted into a <code>javafe.util.FatalError</code> runtime 348 exception.) */ 349 //@ public normal_behavior 350 //@ modifies m_in, lexicalPragmas; 351 //@ ensures m_in==null; 352 //@ also 353 //@ protected normal_behavior 354 //@ modifies inPragma; 355 public void close() { 356 if (m_in != null) { 357 m_in.close(); 358 } 359 m_in = null; 360 lookaheadq.clear(); 361 if (pragmaParser != null) { 362 pragmaParser.close(); 363 lexicalPragmas = LexicalPragmaVec.make(); 364 } 365 inPragma = false; 366 } 367 368 369 370 //// Public members for scanning 371 372 public void replaceLookaheadToken(int k, Token t) { 373 lookaheadq.setElementAt(k,t); 374 } 375 376 /** Scans next token from input stream. Returns the code of the 377 next token in the token stream and fills in the <code>Token</code> 378 fields of <CODE>this</CODE>. Note that the 379 <code>startingLoc</code> and <code>endingLoc</code> fields of 380 <code>this</code> are not accurate for the end-of-file token. */ 381 382 //@ public normal_behavior 383 //@ requires m_in != null; 384 //@ modifies ttype, auxVal, identifierVal; 385 //@ also 386 //@ protected normal_behavior 387 //@ modifies inPragma, m_nextchr; 388 public int getNextToken() { 389 if (lookaheadq.notempty) { 390 lookaheadq.dequeue(this); 391 return ttype; 392 } 393 394 for(;;) { 395 if (inPragma) 396 if (! pragmaParser.getNextPragma(this)) { 397 inPragma = false; 398 } else if (ttype != TagConstants.LEXICALPRAGMA) { 399 return ttype; 400 } else { 401 lexicalPragmas.addElement((LexicalPragma)this.auxVal); 402 continue; 403 } 404 405 int t = scanToken(); 406 if (t != TagConstants.C_COMMENT && 407 t != TagConstants.EOL_COMMENT) { 408 return t; 409 } 410 else { 411 scanComment(t); 412 } 413 } 414 } 415 416 /** Returns token type of the <TT>k</TT>th future token, where k=0 417 is the current token. If <code>k</code> is past the end of the 418 token stream, <code>TagConstants.EOF</code> is returned. */ 419 420 //@ requires k>=0; 421 //@ requires m_in != null; 422 public int lookahead(int k) { 423 if (k == 0) return ttype; 424 int lookahead_count = lookaheadq.size(); 425 426 if (lookahead_count < k) { // Need to add more tokens in lookahead buffer 427 this.copyInto(savedState); 428 try { 429 for(int deficit = k - lookahead_count; 0 < deficit; deficit--) { 430 for(;;) { 431 if (inPragma) 432 if (! pragmaParser.getNextPragma(this)) 433 inPragma = false; 434 else if (ttype != TagConstants.LEXICALPRAGMA) 435 break; 436 else { 437 lexicalPragmas.addElement((LexicalPragma)this.auxVal); 438 continue; 439 } 440 441 int t = scanToken(); 442 if (t != TagConstants.C_COMMENT 443 && t != TagConstants.EOL_COMMENT) 444 break; 445 else scanComment(t); 446 } 447 lookaheadq.enqueue(this); 448 } 449 } finally { 450 savedState.copyInto(this); 451 savedState.clear(); 452 } 453 } 454 return lookaheadq.elementAt(k-1).ttype; 455 } 456 //@ invariant savedState != null; 457 protected Token savedState = new Token(); 458 459 public Token lookaheadToken(int k) { 460 if (k==0) return this; 461 lookahead(k); 462 return lookaheadq.elementAt(k-1); 463 } 464 465 /** Returns the set of lexical pragmas collected. It also clears 466 the set of lexical pragmas so that the next call will not include 467 them. (If this lexer has no <code>PragmaParser</code>, then an 468 empty vector is returned.) */ 469 470 //@ ensures \result != null; 471 public LexicalPragmaVec getLexicalPragmas() { 472 return lexicalPragmas.copy(); 473 } 474 475 /** 476 * Remove the first LexicalPragma from our set of lexical pragmas 477 * collected, returning it or null if our set is empty. 478 */ 479 public LexicalPragma popLexicalPragma() { 480 if (lexicalPragmas.size()>0) 481 return lexicalPragmas.pop(); 482 483 return null; 484 } 485 486 487 //// Internal scanning routines 488 489 /** Returns the code of the next token in the token stream, updating 490 the <code>Token</code> fields of <code>this</code> along the way. 491 Advances underlying stream to the character just past the last 492 character of the token returned, and changes the internal buffer 493 used by <code>getTokenText</code> to contain the text of this 494 token. 495 496 <p> In most cases, this method leaves <code>m_nextchr</code> 497 holding the character just after the token scanned and 498 <code>m_in</code> pointing to the character after that. However, 499 if <code>TagConstants.C_COMMENT</code> or 500 <code>TagConstants.EOL_COMMENT</code> is returned, it leaves 501 <code>m_in</code> pointing to the character just after the token 502 scanned and <code>m_nextchr</code> undefined. This aids in pragma 503 processing. */ 504 505 //@ requires m_in != null; 506 private int scanToken() { 507 // The following "assert" is obvious from the program text, because 508 // the two arrays are non-null and their types do not coincide in any 509 // other way. ESC/Java can figure this out, too, so therefore the 510 // following "assert" produces no warning. This property is also 511 // cruicial to figuring out that the updates to "text[...]" in this 512 // body do not destroy the invariants declared in TagConstants about 513 // "punctuationCodes[...]". However, oddly enough, without being 514 // prompted to first prove this property, ESC/Java isn't able to 515 // prove the necessary thing about the updates to "text[...]". 516 //@ assert (Object)text != (Object)TagConstants.punctuationCodes; 517 518 try { 519 int nextchr = m_nextchr; 520 textlen = 0; 521 522 while (Character.isWhitespace((char)nextchr)) { 523 m_in.mark(); 524 nextchr = m_in.read(); 525 } 526 startingLoc = m_in.getLocation(); 527 528 // Inline the identifier-handling code because it's the common case 529 if (Character.isJavaIdentifierStart((char)nextchr)) { 530 int h = 0; 531 do { 532 try { 533 text[textlen] = (char)nextchr; 534 textlen++; 535 } catch (ArrayIndexOutOfBoundsException e) { this.append(nextchr);} 536 h = _SpecialParserInterface.HC*h + nextchr; 537 nextchr = m_in.read(); 538 } while (Character.isJavaIdentifierPart((char)nextchr)); 539 m_nextchr = nextchr; 540 auxVal = null; 541 identifierVal = _SpecialParserInterface.intern(text, textlen, h); 542 if (onlyjavakeywords) { 543 ttype = _SpecialParserInterface.getTokenType(identifierVal); 544 } else if (keywords != null) { 545 Object val = keywords.get(identifierVal); 546 if (val != null) { 547 ttype = ((Integer)val).intValue(); 548 // From the Unenforceable invariant on keyword: 549 /*@ assume ttype != TagConstants.BOOLEANLIT && 550 ttype != TagConstants.INTLIT && 551 ttype != TagConstants.LONGLIT && 552 ttype != TagConstants.FLOATLIT && 553 ttype != TagConstants.DOUBLELIT && 554 ttype != TagConstants.STRINGLIT && 555 ttype != TagConstants.CHARLIT && 556 ttype != TagConstants.LEXICALPRAGMA && 557 ttype != TagConstants.MODIFIERPRAGMA && 558 ttype != TagConstants.STMTPRAGMA && 559 ttype != TagConstants.TYPEDECLELEMPRAGMA && 560 ttype != TagConstants.TYPEMODIFIERPRAGMA; */ 561 } else if (javakeywords) 562 ttype = _SpecialParserInterface.getTokenType(identifierVal); 563 else ttype = TagConstants.IDENT; 564 } else ttype = TagConstants.IDENT; 565 return ttype; 566 } 567 ttype = Token.CLEAR; // keep Token invariants happy 568 identifierVal = null; 569 570 if (Character.isDigit((char)nextchr)) 571 return scanNumber(nextchr); 572 if (nextchr == '\'' || nextchr == '\"') 573 return scanCharOrString(nextchr); 574 575 scanJavaExtensions(nextchr); 576 if (ttype != TagConstants.NULL) return ttype; 577 578 scanPunctuation(nextchr); 579 if (ttype != TagConstants.NULL) return ttype; 580 581 if (nextchr == -1) { 582 m_nextchr = nextchr; 583 return ttype = TagConstants.EOF; 584 } 585 586 String s = PrettyPrint.toCanonicalString(TagConstants.CHARLIT, 587 new Integer(nextchr)); 588 ErrorSet.fatal(m_in.getLocation(), "Unexpected character " + s); 589 590 //@ unreachable; 591 return ttype; 592 593 } catch (IOException e) { 594 ErrorSet.fatal(m_in.getLocation(), e.toString()); 595 return TagConstants.NULL; // Dummy 596 } 597 } 598 599 600 /** Handle a comment. m_in points to the character just after the 601 "//" or "/*". The mark is set at the last character read. */ 602 603 //@ requires m_in != null; 604 //@ requires !inPragma; 605 //@ modifies inPragma, m_nextchr; 606 private void scanComment(int commentKind) { 607 try { 608 m_in.mark(); 609 int firstchr = m_in.read(); 610 // System.out.println("scanComment: firstchr = '"+(char)firstchr+"'"); 611 612 /* Decide if this comment contains a pragma. */ 613 614 if (pragmaParser != null) { 615 if (pragmaParser.checkTag(firstchr)) { 616 Assert.notFalse(! inPragma); 617 inPragma = true; 618 } 619 } 620 621 // Scan the comment 622 boolean nonEmptyComment = false; // For empty multi-line comment 623 624 int locStartComment = m_in.getLocation(); 625 int nextchr = firstchr; 626 if (commentKind == TagConstants.EOL_COMMENT) { 627 nonEmptyComment = true; 628 while (nextchr != -1 && nextchr != '\n') 629 nextchr = m_in.read(); 630 } else { 631 for(;;) { 632 if (nextchr == -1) { 633 ErrorSet.fatal( 634 startingLoc, 635 "Unterminated or improperly nested comment or pragma"); 636 } 637 int oldchr = nextchr; 638 nextchr = m_in.read(); 639 if (oldchr == '*' && nextchr == '/') break; 640 nonEmptyComment = true; 641 } 642 } 643 644 // If the comment contains a pragma, set up the pragma parser 645 if (inPragma) { 646 if(nonEmptyComment) { 647 boolean eolComment = commentKind == TagConstants.EOL_COMMENT; 648 int discard = !eolComment ? 2 : 649 nextchr == '\n' ? 1 : 0; 650 CorrelatedReader nu = m_in.createReaderFromMark(discard); 651 pragmaParser.restart(nu, eolComment); 652 } else { 653 // Is an empty multi-line comment 654 inPragma = false; 655 } 656 } 657 if (!inPragma) { 658 m_in.clearMark(); 659 } 660 661 // Clean up and return 662 m_nextchr = m_in.read(); 663 } catch (IOException e) { 664 m_in.clearMark(); 665 ErrorSet.fatal(m_in.getLocation(), "IO error"); 666 } 667 } //@ nowarn Exception; // ignore Index... from createReaderFromMark 668 669 670 // Notes. The routines below all assume that the input stream to be 671 // scanned consist of their input parameter nextchr plus the characters 672 // in m_in; that is, the nextchr argument is playing the role that 673 // m_nextchr plays between calls to getNextToken. These routines also 674 // assume that textlen = 0 and startingLoc has already been filled in 675 // with the location of the character in nextchr. 676 // Unless otherwise noted, the routines below fill in all fields except 677 // startingLoc, that is, they fill in m_nextchr, text & textlen, 678 // plus the Token fields ttype, endingLoc, and auxVal. 679 // Errors are handled internally by these routines. Either the 680 // error is fatal, or the error is recoverable and recovery is done 681 // by the routines. 682 683 /** Scan a character or string constant. */ 684 685 //@ requires m_in != null; 686 private int scanCharOrString(int nextchr) { 687 try { 688 int endquote = nextchr; 689 this.append(endquote); 690 boolean chr = (endquote == '\''); 691 nextchr = m_in.read(); 692 stringLitLen = 0; 693 for(;;) { 694 if (nextchr != '\\' && nextchr != '\n' && nextchr != -1) { 695 // Normal case 696 this.append(nextchr); 697 if (nextchr == endquote) { nextchr = m_in.read(); break; } 698 this.stringLitAppend(nextchr); 699 nextchr = m_in.read(); 700 continue; 701 } 702 703 if (nextchr == -1 || nextchr == '\n') { 704 if (chr) ErrorSet.fatal(startingLoc, 705 "Unterminated character literal"); 706 else ErrorSet.fatal(startingLoc, "Unterminated string literal"); 707 break; 708 } 709 710 // At this point, we know we're at the start of an escape sequence 711 this.append(nextchr); 712 nextchr = m_in.read(); 713 if ('0' <= nextchr && nextchr <= '7') { // Octal sequence 714 int firstdigit = nextchr - '0'; 715 int result = firstdigit; 716 this.append(nextchr); 717 nextchr = m_in.read(); 718 if ('0' <= nextchr && nextchr <= '7') { 719 this.append(nextchr); result = 8*result + (nextchr - '0'); 720 nextchr = m_in.read(); 721 if (0 <= firstdigit && firstdigit <= 3 722 && '0' <= nextchr && nextchr <= '7') { 723 this.append(nextchr); result = 8*result + (nextchr - '0'); 724 nextchr = m_in.read(); 725 } 726 } 727 if (255 < result) { 728 ErrorSet.error(m_in.getLocation(), 729 "Octal escape sequence overflow"); 730 this.stringLitAppend('\0'); 731 } else this.stringLitAppend(result); 732 } else { 733 if (nextchr == 'b') this.stringLitAppend('\b'); 734 else if (nextchr == 't') this.stringLitAppend('\t'); 735 else if (nextchr == 'n') this.stringLitAppend('\n'); 736 else if (nextchr == 'f') this.stringLitAppend('\f'); 737 else if (nextchr == 'r') this.stringLitAppend('\r'); 738 else if (nextchr == '\"') this.stringLitAppend('\"'); 739 else if (nextchr == '\'') this.stringLitAppend('\''); 740 else if (nextchr == '\\') this.stringLitAppend('\\'); 741 else { 742 ErrorSet.error(startingLoc, "Bad escape sequence"); 743 continue; 744 } 745 this.append(nextchr); 746 nextchr = m_in.read(); 747 } 748 } 749 750 if (chr) { 751 if (1 < stringLitLen) 752 ErrorSet.error(startingLoc, 753 "Character literal with multiple characters."); 754 else if (stringLitLen == 0) 755 if (nextchr == '\'') { 756 this.append(nextchr); 757 nextchr = m_in.read(); 758 stringLit[0] = '\''; 759 ErrorSet.error(startingLoc, "Unquoted \' in character literal."); 760 } else { 761 stringLit[0] = '\0'; 762 ErrorSet.error(startingLoc, "Empty character literal."); 763 } 764 ttype = TagConstants.CHARLIT; 765 auxVal = new Integer(stringLit[0]); // CF 766 } else { 767 ttype = TagConstants.STRINGLIT; 768 auxVal = String.valueOf(stringLit, 0, stringLitLen); 769 } 770 m_nextchr = nextchr; 771 endingLoc = m_in.getLocation(); 772 return ttype; 773 } catch (IOException e) { 774 ErrorSet.fatal(m_in.getLocation(), e.toString()); 775 return TagConstants.NULL; // Dummy 776 } 777 } 778 779 780 /** Scans a numeric literal. Requires <code>nextchr</code> is a 781 decimal digit. Reads a numeric literal into <code>text</code>. 782 Depending on the kind of literal found, will return one of 783 <code>TagConstants.INTLIT</code>, 784 <code>TagConstants.LONGLIT</code>, 785 <code>TagConstants.FLOATLIT</code> or 786 <code>TagConstants.DOUBLELIT</code>. If an error is detected, a 787 message is sent to <code>ErrorSet</code>, <code>m_in</code> is 788 advanced to what appears to be the end of the erroneous token, and 789 a legal literal is left in <code>text</code>. */ 790 791 //@ requires m_in != null; 792 //@ modifies endingLoc, ttype, auxVal, m_nextchr, text, textlen; 793 //@ ensures \result==ttype; 794 /*@ ensures \result==TagConstants.INTLIT || \result==TagConstants.LONGLIT || 795 \result==TagConstants.FLOATLIT || \result==TagConstants.DOUBLELIT || 796 \result==TagConstants.MAX_INT_PLUS_ONE || 797 \result==TagConstants.MAX_LONG_PLUS_ONE ; */ 798 private int scanNumber(int nextchr) { 799 try { 800 // Get the first chunk of digits 801 int tentativeResult = 0; 802 while (Character.isDigit((char)nextchr)) { 803 this.append(nextchr); 804 tentativeResult = 10*tentativeResult + (nextchr - '0'); 805 nextchr = m_in.read(); 806 } 807 //@ assume textlen>0; // because know nextchar was initially a digit... 808 809 // Handle floating point literals in another subtroutine 810 if (nextchr == '.') { 811 m_in.mark(); 812 int nextnextchr = m_in.read(); 813 if (nextnextchr == '.') { 814 // Two . in a row - cannot be a double so 815 // back up 816 m_in.reset(); 817 // fall through and let the text so far be handled as 818 // a integer literal 819 } else { 820 this.append(nextchr); 821 return finishFloatingPointLiteral(nextnextchr); 822 } 823 } else if (nextchr == 'e' || nextchr == 'E' 824 || nextchr == 'F' || nextchr == 'f' 825 || nextchr == 'd' || nextchr == 'D') 826 return finishFloatingPointLiteral(nextchr); 827 828 long result = 0; 829 if (text[0] != '0' || (textlen == 1 && nextchr != 'x' && nextchr != 'X')) 830 // Handle base-ten literal 831 if (textlen <= 9) result = tentativeResult; 832 else { 833 // Parse as negative value to avoid problems w/ -Long.MIN_VALUE 834 long mullimit = Long.MIN_VALUE / 10; 835 for(int i = 0; i < textlen; i++) { 836 int d = text[i] - '0'; 837 long r2 = 10*result; 838 if (result < mullimit || r2 < Long.MIN_VALUE + d) { 839 ErrorSet.error(startingLoc, "Integer literal overflow"); 840 result = 0; 841 break; 842 } else result = r2 - d; 843 } 844 if (nextchr == 'L' || nextchr == 'l') { 845 if (result == Long.MIN_VALUE) { 846 this.append(nextchr); 847 m_nextchr = m_in.read(); 848 endingLoc = m_in.getLocation(); 849 auxVal = null; 850 return ttype = TagConstants.MAX_LONG_PLUS_ONE; 851 } 852 } else if (result == Integer.MIN_VALUE) { 853 m_nextchr = nextchr; 854 endingLoc = m_in.getLocation(); 855 auxVal = null; 856 return ttype = TagConstants.MAX_INT_PLUS_ONE; 857 } 858 result = -result; 859 } 860 861 else if (nextchr != 'x' && nextchr != 'X') { 862 // Handle octal literal 863 for(int i = 1; i < textlen; i++) { 864 int d = text[i] - '0'; 865 if (0 <= d && d <= 7) { 866 if ((result >>> 61) != 0) { 867 ErrorSet.error(startingLoc, "Integer literal overflow"); 868 result = 0; 869 break; 870 } 871 result = (result << 3) + d; 872 } else { 873 ErrorSet.error(startingLoc, 874 "Non-octal digit found in octal literal."); 875 result = 0; 876 break; 877 } 878 } 879 880 } else { 881 // Handle hex literal 882 this.append(nextchr); 883 boolean overflow = false; 884 for(nextchr = m_in.read(); ; nextchr = m_in.read()) { 885 int d = Character.digit((char)nextchr, 16); 886 if (d == -1) break; 887 this.append(nextchr); 888 if (! overflow) 889 if ((result >>> 60) != 0) overflow = true; 890 else result = (result << 4) + d; 891 } 892 if (textlen <= 2) 893 ErrorSet.error(startingLoc, "Too few digits in a hex literal."); 894 else if (overflow) { 895 ErrorSet.error(startingLoc, "Integer literal overflow"); 896 result = 0; 897 } 898 } 899 900 if (nextchr == 'L' || nextchr == 'l') { 901 this.append(nextchr); 902 m_nextchr = m_in.read(); 903 endingLoc = m_in.getLocation(); 904 auxVal = new Long(result); 905 return ttype = TagConstants.LONGLIT; 906 } 907 if ((result & 0xffffffff00000000L) != 0) { 908 ErrorSet.error(startingLoc, "Integer literal overflow"); 909 result = 0; 910 } 911 m_nextchr = nextchr; 912 endingLoc = m_in.getLocation(); 913 auxVal = new Integer((int)result); 914 return ttype = TagConstants.INTLIT; 915 } catch (IOException e) { 916 ErrorSet.fatal(m_in.getLocation(), e.toString()); 917 return TagConstants.NULL; // Dummy 918 } 919 } 920 921 /** Finishes scanning a floating-point literal. 922 923 <p> Requires: <code>text</code> contains a possibly empty sequence 924 of decimal digits followed by an optional <code>'.'</code>; also, 925 <code>text</code> cannot be empty. Further, let <i>s</i> be the 926 sequence of characters consisting of the characters in 927 <code>text</code> followed by <code>nextchr</code> followed by the 928 characters in <code>m_in</code>. This routine requires that a 929 prefix of <i>s</i> match the syntax of floating-point literals as 930 defined by the Java language specification. 931 932 <p> Ensures: Scans the floating-point literal in <i>s</i>. 933 Depending on the type of the literal, returns 934 <code>TagConstants.FLOATLIT</code> or 935 <code>TagConstants.DOUBLELIT</code> and sets sets 936 <code>auxVal</code> to either a <code>Float</code> or 937 <code>Double</code>. If an error is encountered, a message is 938 sent to <code>ErorrSet</code> and recovery is performed. */ 939 940 //@ requires m_in != null; 941 //@ requires textlen>0; 942 //@ modifies ttype, auxVal, m_nextchr, endingLoc, text, textlen; 943 //@ ensures \result==ttype; 944 //@ ensures \result==TagConstants.FLOATLIT || \result==TagConstants.DOUBLELIT; 945 private int finishFloatingPointLiteral(int nextchr) { 946 try { 947 boolean error = false; 948 boolean zeroMantissa = true; // Used for underflow detection 949 950 // First, see if there are any non-zero digits in the non-fractional 951 // part of the mantissa 952 for(int i = 0, len = textlen - (text[textlen-1] == '.' ? 1 : 0); 953 i < len; i++) 954 if (text[i] != '0') { zeroMantissa = false; break; } 955 956 // If there's a fractional part, scan it in 957 while (Character.isDigit((char)nextchr)) { 958 if (nextchr != '0') zeroMantissa = false; 959 this.append(nextchr); 960 nextchr = m_in.read(); // throws IOException 961 } 962 963 // If there's an exponent, scan it in 964 if (nextchr == 'e' || nextchr == 'E') { 965 this.append(nextchr); 966 nextchr = m_in.read(); 967 if (nextchr == '+' || nextchr == '-') { 968 this.append(nextchr); 969 nextchr = m_in.read(); 970 } 971 while (Character.isDigit((char)nextchr)) { 972 this.append(nextchr); 973 nextchr = m_in.read(); 974 } 975 if (! Character.isDigit(text[textlen-1])) { 976 ErrorSet.error(startingLoc, ("Digits required in exponent part" 977 + " of floating-point literal.")); 978 error = true; 979 } 980 } 981 982 String s = (error ? "1.0" : String.valueOf(this.text, 0, textlen)); 983 984 // Assert: s has one of the following forms: 985 // [0-9]+ (. [0-9]*)? (e [+-] [0-9]+)? 986 // . [0-9]+ (e [+-] [0-9]+)? 987 // Also, zeroMantissa is true iff all digits in the mantissa are zero 988 // (Todo: examine code more closely to see if this is really true.) 989 990 boolean f = (nextchr == 'F' || nextchr == 'f'); 991 if (f || nextchr == 'D' || nextchr == 'd') { 992 this.append(nextchr); 993 m_nextchr = m_in.read(); 994 } else m_nextchr = nextchr; 995 endingLoc = m_in.getLocation(); 996 997 if (f) { 998 Float result = Float.valueOf(s); // !throw NumberFormatException 999 auxVal = result; 1000 if (result.isInfinite()) 1001 ErrorSet.error(startingLoc, "Floating-point literal overflow"); 1002 else if (! zeroMantissa && result.floatValue() == 0.0) 1003 ErrorSet.error(startingLoc, "Floating-point literal underflow"); 1004 } else { 1005 Double result = Double.valueOf(s); // !throw NumberFormatException 1006 auxVal = result; 1007 if (result.isInfinite()) 1008 ErrorSet.error(startingLoc, "Floating-point literal overflow"); 1009 else if (! zeroMantissa && result.doubleValue() == 0.0D) 1010 ErrorSet.error(startingLoc, "Floating-point literal underflow"); 1011 } 1012 return ttype = (f ? TagConstants.FLOATLIT : TagConstants.DOUBLELIT); 1013 } catch (IOException e) { 1014 ErrorSet.fatal(m_in.getLocation(), e.toString()); 1015 return TagConstants.NULL; // Dummy 1016 } 1017 } //@ nowarn Exception; // NumberFormatException won't be thrown 1018 1019 /** Scans a punctuation string <em>or</em> a floating-point 1020 number. If input doesn't match either a floating-point number or 1021 any punctuation, returns <code>TagConstants.NULL</code>. Assumes 1022 <code>startingLoc</code> already filled in. 1023 1024 <p>The routine may change the mark arbitrarily. 1025 1026 <p> This method leaves <code>m_in</code> in a different state than 1027 the previous ones do. Ordinarily, <code>scanXXX</code> routines 1028 return with <code>m_nextchr</code> holding the character just 1029 after the token scanned and <code>m_in</code> pointing to the 1030 character after that. <code>scanPunctuation</code> does too, but 1031 only when the value returned is <em>not</em> 1032 <code>TagConstants.C_COMMENT</code> or 1033 <code>TagConstants.EOL_COMMENT</code>; in those two cases, it 1034 returns with <code>m_in</code> pointing to the character just 1035 after the token scanned and <code>m_nextchr</code> undefined. 1036 This aids in pragma processing. Also, if <code>TagConstants.NULL</code> 1037 is returned, then <code>m_nextchr</code> is undefined and 1038 <code>m_in</code> is where it was on entry. 1039 */ 1040 1041 //@ requires m_in != null; 1042 //@ modifies ttype, auxVal, m_nextchr, textlen, endingLoc, text; 1043 //@ modifies m_in.marked; 1044 //@ ensures \result==ttype; 1045 private int scanPunctuation(int nextchr) { 1046 try { 1047 boolean possibleFloatingPointNumber = (nextchr == '.'); 1048 text[0] = (char)nextchr; 1049 textlen = 1; 1050 PunctuationPrefixTree prefix = punctuationTable; 1051 PunctuationPrefixTree lastPunctuation = prefix; 1052 int lastPunctuationLength = 0; 1053 m_in.mark(); 1054 1055 // The following loop, which has been unrolled once so the 1056 // first iteration can be handled specially, has the invariant: 1057 // prefix == null || depth(prefix) == textlen-1 1058 // nextchr == text[textlen-1] 1059 // (nextchr_0 + m_in_0) == (text[0,textlen) + m_in) 1060 // lastPunctuation != null && lastPunctuation is last valid punct seen 1061 // lastPunctuationLength == depth(lastPunctuation) 1062 // (nextchr_0+m_in_0)==(text[0,lastPunctuationLength]+m_in.resetedtext) 1063 // Stating part of this invariant graphically: 1064 // l t l = lastPunctuationLength; t = textlen 1065 // text: abcdefghij 1066 // nextchr_0+ 1067 // m_in_0: abcdefghijklmnop 1068 // r nm r = resetted text, n = nextchr, m = m_in 1069 1070 int index = nextchr - '!'; 1071 if (index < 0 || PunctuationPrefixTree.CHILDLEN <= index) 1072 prefix = null; 1073 else 1074 prefix = prefix.children[nextchr - '!']; 1075 if (prefix != null && prefix.code != TagConstants.NULL) { 1076 lastPunctuation = prefix; 1077 lastPunctuationLength = textlen; 1078 m_in.mark(); 1079 } 1080 nextchr = m_in.read(); 1081 if (possibleFloatingPointNumber && Character.isDigit((char)nextchr)) { 1082 // Deal with special case of floating-point numbers 1083 m_in.clearMark(); 1084 return finishFloatingPointLiteral(nextchr); 1085 } 1086 this.append(nextchr); 1087 while (prefix != null) { 1088 index = nextchr - '!'; 1089 if (index < 0 || PunctuationPrefixTree.CHILDLEN <= index) 1090 prefix = null; 1091 else 1092 prefix = prefix.children[nextchr - '!']; 1093 if (prefix != null && prefix.code != TagConstants.NULL) { 1094 lastPunctuation = prefix; 1095 lastPunctuationLength = textlen; 1096 m_in.mark(); 1097 } 1098 nextchr = m_in.read(); 1099 this.append(nextchr); 1100 } 1101 // End of unrolled loop 1102 1103 m_in.reset(); 1104 textlen = lastPunctuationLength; 1105 endingLoc = m_in.getLocation(); 1106 ttype = lastPunctuation.code; 1107 if (ttype != TagConstants.C_COMMENT && 1108 ttype != TagConstants.EOL_COMMENT && 1109 ttype != TagConstants.NULL) { 1110 m_nextchr = m_in.read(); 1111 } 1112 return ttype; 1113 } catch (IOException e) { 1114 ErrorSet.fatal(m_in.getLocation(), e.toString()); 1115 return TagConstants.NULL; // Dummy 1116 } 1117 } 1118 1119 /** Scans a Java extension. If input doesn't match any Java extension, 1120 returns <code>TagConstants.NULL</code>. Assumes <code>startingLoc</code> 1121 already filled in, and assumes <code>textlen</code> is 0. 1122 1123 <p>The routine may change the mark arbitrarily. 1124 1125 <p> If a Java extension is matched, returns with <code>m_nextchr</code> 1126 holding the character just after the token scanned and <code>m_in</code> 1127 pointing to the character after that. 1128 */ 1129 //@ requires m_in != null; 1130 //@ requires textlen == 0; 1131 //@ modifies ttype, auxVal, m_nextchr; 1132 //@ modifies m_in.marked; 1133 //@ ensures \result==ttype; 1134 protected int scanJavaExtensions(int nextchr) { 1135 ttype = TagConstants.NULL; 1136 return ttype; 1137 } 1138 1139 //// Internal char buffer used by string scanner. 1140 1141 // Having this separate from text allows us to accumulate both the 1142 // original input in text and the escape-converted input in stringLit 1143 1144 //@ private invariant stringLit != null; 1145 //@ private invariant stringLit.length>=64; 1146 private char[] stringLit = new char[64]; 1147 1148 //@ private invariant stringLitLen>=0; 1149 private int stringLitLen = 0; 1150 1151 private void stringLitAppend(int c) { 1152 try { 1153 stringLit[stringLitLen] = (char)c; //@ nowarn IndexTooBig; // caught 1154 } catch (ArrayIndexOutOfBoundsException e) { 1155 char[] newstringLit = new char[stringLitLen + 128]; 1156 System.arraycopy(stringLit, 0, newstringLit, 0, stringLitLen); 1157 stringLit = newstringLit; 1158 stringLit[stringLitLen] = (char)c; 1159 } 1160 stringLitLen++; 1161 } 1162 1163 1164 //// The keyword and punctuation tables, plus methods for changing them 1165 1166 //@ private invariant punctuationTable != null; 1167 private PunctuationPrefixTree punctuationTable = new PunctuationPrefixTree(); 1168 1169 /** 1170 * Unenforceable invariant: all tokenTypes in this table do not 1171 * require a non-null auxVal. (cf. Token.auxVal). 1172 */ 1173 // Old specs from original full JML spec files. Must be 1174 // rewritten for current java.util.Hashtable specs. 1175 /* invariant (keywords != null) ==> (keywords.keyType == \type(Identifier) && 1176 keywords.elementType == \type(Integer)); */ 1177 protected Hashtable keywords = null; 1178 1179 protected boolean javakeywords = false, onlyjavakeywords = false; 1180 //@ invariant onlyjavakeywords == (javakeywords && keywords == null); 1181 1182 static { 1183 // When class initializes, change the <CODE>tokenType</CODE> field 1184 // of <CODE>Identifier</CODE> instances associated with keywords. 1185 for(int code = TagConstants.FIRST_KEYWORD; 1186 code <= TagConstants.LAST_KEYWORD; 1187 code++) { 1188 Identifier idn = Identifier.intern(TagConstants.toString(code)); 1189 _SpecialParserInterface.setTokenType(idn, code); 1190 } 1191 } 1192 1193 /** Add all of Java's keywords to the scanner. The token codes used 1194 for these keywords are those defined by the <code>TagConstants</code> 1195 class. Requires that none of these keywords have been added 1196 already. */ 1197 1198 public void addJavaKeywords() { 1199 javakeywords = true; 1200 if (keywords != null) 1201 // New values override existing ones... 1202 for(int i = TagConstants.FIRST_KEYWORD; 1203 i <= TagConstants.LAST_KEYWORD; 1204 i++) 1205 keywords.remove(Identifier.intern(TagConstants.toString(i))); 1206 else onlyjavakeywords = true; 1207 } 1208 1209 1210 /** 1211 * Add a keyword to a <code>Lex</code> object with the given code. 1212 * Requires that <code>newkeyword</code> is a Java identifier and 1213 * that <code>code</code> is not <code>TagConstants.NULL</code> or 1214 * a tokenType that requires auxVal to be non-null; 1215 * (cf. Token.auxVal). 1216 * 1217 * Also requires that the keyword hasn't already been added. 1218 */ 1219 //@ requires code != TagConstants.NULL; 1220 //@ requires code != TagConstants.BOOLEANLIT; 1221 //@ requires code != TagConstants.INTLIT; 1222 //@ requires code != TagConstants.LONGLIT; 1223 //@ requires code != TagConstants.DOUBLELIT; 1224 //@ requires code != TagConstants.STRINGLIT; 1225 //@ requires code != TagConstants.CHARLIT; 1226 //@ requires code != TagConstants.LEXICALPRAGMA; 1227 //@ requires code != TagConstants.MODIFIERPRAGMA; 1228 //@ requires code != TagConstants.STMTPRAGMA; 1229 //@ requires code != TagConstants.TYPEDECLELEMPRAGMA; 1230 //@ requires code != TagConstants.TYPEMODIFIERPRAGMA; 1231 public void addKeyword(/*@ non_null @*/ String newkeyword, int code) { 1232 Assert.precondition(code != TagConstants.NULL); 1233 if (keywords == null) { 1234 keywords = new Hashtable(); 1235 // Old specs from original full JML spec files. Must be 1236 // rewritten for current java.util.Hashtable specs. 1237 // set keywords.keyType = \type(Identifier); 1238 // set keywords.elementType = \type(Integer); 1239 } 1240 keywords.put(Identifier.intern(newkeyword), new Integer(code)); 1241 onlyjavakeywords = false; 1242 } 1243 1244 1245 1246 /** Add all of Java's punctuation strings to the scanner. The codes 1247 used for these punctuation strings are the found in the 1248 <code>TagConstants</code> class. Requires that none of these 1249 punctuation strings have been added before. */ 1250 1251 public void addJavaPunctuation() { 1252 Assert.notFalse(TagConstants.punctuationStrings.length == 1253 TagConstants.punctuationCodes.length); 1254 for(int i = 0; i < TagConstants.punctuationStrings.length; i++) 1255 addPunctuation(TagConstants.punctuationStrings[i], 1256 TagConstants.punctuationCodes[i]); 1257 } 1258 1259 /** Add a punctuation string to a scanner associated with a given 1260 code. Requires that the characters in the punctuation string are 1261 all punctuation characters, that is, non-alphanumeric ASCII 1262 characters whose codes are between 33 ('!') and 126 ('~') 1263 inclusive. Also requires that the code is not 1264 <code>TagConstants.NULL</code> and that the punctuation string 1265 hasn't already been added. */ 1266 //@ requires punctuation != null; 1267 //@ requires code != TagConstants.NULL; 1268 //@ requires code != TagConstants.IDENT; 1269 //@ requires code != TagConstants.BOOLEANLIT; 1270 //@ requires code != TagConstants.INTLIT; 1271 //@ requires code != TagConstants.LONGLIT; 1272 //@ requires code != TagConstants.FLOATLIT; 1273 //@ requires code != TagConstants.DOUBLELIT; 1274 //@ requires code != TagConstants.STRINGLIT; 1275 //@ requires code != TagConstants.CHARLIT; 1276 //@ requires code != TagConstants.LEXICALPRAGMA; 1277 //@ requires code != TagConstants.MODIFIERPRAGMA; 1278 //@ requires code != TagConstants.STMTPRAGMA; 1279 //@ requires code != TagConstants.TYPEDECLELEMPRAGMA; 1280 //@ requires code != TagConstants.TYPEMODIFIERPRAGMA; 1281 public void addPunctuation(String punctuation, int code) { 1282 Assert.precondition(code != TagConstants.NULL); 1283 PunctuationPrefixTree prefix = punctuationTable; 1284 for(int j = 0; j < punctuation.length(); j++) { 1285 int c = punctuation.charAt(j); 1286 Assert.precondition( //@ nowarn Pre; 1287 ('!' <= c && c <= '/') || (':' <= c && c <= '@') 1288 || ('[' <= c && c <= '`') || ('{' <= c && c <= '~')); 1289 int index = c - '!'; 1290 PunctuationPrefixTree child = prefix.children[index]; 1291 if (child == null) 1292 child = prefix.children[index] = new PunctuationPrefixTree(); 1293 prefix = child; 1294 } 1295 prefix.code = code; 1296 } 1297 1298 1299 //// Diagnostic routines 1300 1301 /** Checks invariants (assumes that <CODE>Token</CODE> fields 1302 haven't been mucked with by outside code). <code>prefix</code> is 1303 used to prefix error messages with context provided by the 1304 caller. */ 1305 1306 public void zzz(String prefix) { 1307 super.zzz(); 1308 lookaheadq.zzz("Near character " 1309 + Location.toOffset(startingLoc) 1310 + ": " + prefix); 1311 } 1312 } 1313 1314 class PunctuationPrefixTree { 1315 public static final int CHILDLEN = 1 + '~' - '!'; 1316 1317 /*@ invariant code != TagConstants.IDENT && 1318 code != TagConstants.BOOLEANLIT && 1319 code != TagConstants.INTLIT && 1320 code != TagConstants.LONGLIT && 1321 code != TagConstants.FLOATLIT && 1322 code != TagConstants.DOUBLELIT && 1323 code != TagConstants.STRINGLIT && 1324 code != TagConstants.CHARLIT && 1325 code != TagConstants.LEXICALPRAGMA && 1326 code != TagConstants.MODIFIERPRAGMA && 1327 code != TagConstants.STMTPRAGMA && 1328 code != TagConstants.TYPEDECLELEMPRAGMA && 1329 code != TagConstants.TYPEMODIFIERPRAGMA; */ 1330 public int code = TagConstants.NULL; // ! NULL ==> a punctuation string 1331 1332 //@ invariant children != null; 1333 //@ invariant children.length == CHILDLEN; 1334 //@ invariant \typeof(children) == \type(PunctuationPrefixTree[]); 1335 public PunctuationPrefixTree[] children 1336 = new PunctuationPrefixTree[CHILDLEN]; 1337 }