Monitoring JavaMelody on _ip-10-0-5-101.ec2.internal

1 /*

2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.

3  *

4  * Copyright (c) 1997-2017 Oracle and/or its affiliates. All rights reserved.

5  *

6  * The contents of this file are subject to the terms of either the GNU

7  * General Public License Version 2 only ("GPL") or the Common Development

8  * and Distribution License("CDDL") (collectively, the "License").  You

9  * may not use this file except in compliance with the License.  You can

10  * obtain a copy of the License at

11  * https://oss.oracle.com/licenses/CDDL+GPL-1.1

12  * or LICENSE.txt.  See the License for the specific

13  * language governing permissions and limitations under the License.

14  *

15  * When distributing the software, include this License Header Notice in each

16  * file and include the License file at LICENSE.txt.

17  *

18  * GPL Classpath Exception:

19  * Oracle designates this particular file as subject to the "Classpath"

20  * exception as provided by Oracle in the GPL Version 2 section of the License

21  * file that accompanied this code.

22  *

23  * Modifications:

24  * If applicable, add the following below the License Header, with the fields

25  * enclosed by brackets [] replaced by your own identifying information:

26  * "Portions Copyright [year] [name of copyright owner]"

27  *

28  * Contributor(s):

29  * If you wish your version of this file to be governed by only the CDDL or

30  * only the GPL Version 2, indicate your decision by adding "[Contributor]

31  * elects to include this software in this distribution under the [CDDL or GPL

32  * Version 2] license."  If you don't indicate a single choice of license, a

33  * recipient has the option to distribute your version of this file under

34  * either the CDDL, the GPL Version 2 or to extend the choice of license to

35  * its licensees as provided above.  However, if you add GPL Version 2 code

36  * and therefore, elected the GPL Version 2 license, then the option applies

37  * only if the new code is made subject to such option by the copyright

38  * holder.

39  */

40 

41 package javax.mail.internet;

42 

43 import java.util.*;

44 

45 /**

46  * This class tokenizes RFC822 and MIME headers into the basic

47  * symbols specified by RFC822 and MIME. <p>

48  *

49  * This class handles folded headers (ie headers with embedded

50  * CRLF SPACE sequences). The folds are removed in the returned

51  * tokens. 

52  *

53  * @author  John Mani

54  * @author  Bill Shannon

55  */

56 

57 public class HeaderTokenizer {

58 

59     /**

60      * The Token class represents tokens returned by the 

61      * HeaderTokenizer.

62      */

63     public static class Token {

64 

65     private int type;

66     private String value;

67 

68     /**

69      * Token type indicating an ATOM.

70      */

71     public static final int ATOM         = -1;

72 

73     /**

74      * Token type indicating a quoted string. The value 

75      * field contains the string without the quotes.

76       */

77     public static final int QUOTEDSTRING     = -2;

78 

79     /**

80      * Token type indicating a comment. The value field 

81      * contains the comment string without the comment 

82      * start and end symbols.

83      */

84     public static final int COMMENT        = -3;

85 

86     /**

87      * Token type indicating end of input.

88      */

89     public static final int  EOF         = -4;

90 

91     /**

92      * Constructor.

93      * @param    type    Token type

94      * @param    value    Token value

95      */

96     public Token(int type, String value) {

97          this.type = type;

98          this.value = value;

99     }

100 

101     /**

102      * Return the type of the token. If the token represents a

103      * delimiter or a control character, the type is that character

104      * itself, converted to an integer. Otherwise, it's value is 

105      * one of the following:

106      * <ul>

107      * <li><code>ATOM</code> A sequence of ASCII characters 

108      *    delimited by either SPACE, CTL, "(", &lt;"&gt; or the 

109      *    specified SPECIALS

110      * <li><code>QUOTEDSTRING</code> A sequence of ASCII characters

111      *    within quotes

112      * <li><code>COMMENT</code> A sequence of ASCII characters 

113      *    within "(" and ")".

114      * <li><code>EOF</code> End of header

115      * </ul>

116      *

117      * @return    the token type

118      */

119     public int getType() {

120         return type;

121     }

122 

123     /**

124      * Returns the value of the token just read. When the current

125      * token is a quoted string, this field contains the body of the

126      * string, without the quotes. When the current token is a comment,

127      * this field contains the body of the comment.

128      *

129      * @return    token value

130      */

131     public String getValue() {

132         return value;

133     }

134     }

135 

136     private String string; // the string to be tokenized

137     private boolean skipComments; // should comments be skipped ?

138     private String delimiters; // delimiter string

139     private int currentPos; // current parse position

140     private int maxPos; // string length

141     private int nextPos; // track start of next Token for next()

142     private int peekPos; // track start of next Token for peek()

143 

144     /**

145      * RFC822 specials

146      */

147     public final static String RFC822 = "()<>@,;:\\\"\t .[]";

148 

149     /**

150      * MIME specials

151      */

152     public final static String MIME = "()<>@,;:\\\"\t []/?=";

153 

154     // The EOF Token

155     private final static Token EOFToken = new Token(Token.EOF, null);

156 

157     /**

158      * Constructor that takes a rfc822 style header.

159      *

160      * @param    header    The rfc822 header to be tokenized

161      * @param    delimiters      Set of delimiter characters 

162      *                to be used to delimit ATOMS. These

163      *                are usually <code>RFC822</code> or 

164      *                <code>MIME</code>

165      * @param   skipComments  If true, comments are skipped and

166      *                not returned as tokens

167      */

168     public HeaderTokenizer(String header, String delimiters,

169                    boolean skipComments) {

170     string = (header == null) ? "" : header; // paranoia ?!

171     this.skipComments = skipComments;

172     this.delimiters = delimiters;

173     currentPos = nextPos = peekPos = 0;

174     maxPos = string.length();

175     }

176 

177     /**

178      * Constructor. Comments are ignored and not returned as tokens

179      *

180      * @param    header  The header that is tokenized

181      * @param    delimiters  The delimiters to be used

182      */

183     public HeaderTokenizer(String header, String delimiters) {

184     this(header, delimiters, true);

185     }

186 

187     /**

188      * Constructor. The RFC822 defined delimiters - RFC822 - are

189      * used to delimit ATOMS. Also comments are skipped and not

190      * returned as tokens

191      *

192      * @param    header    the header string

193      */

194     public HeaderTokenizer(String header)  {

195     this(header, RFC822);

196     }

197 

198     /**

199      * Parses the next token from this String. <p>

200      *

201      * Clients sit in a loop calling next() to parse successive

202      * tokens until an EOF Token is returned.

203      *

204      * @return        the next Token

205      * @exception    ParseException if the parse fails

206      */

207     public Token next() throws ParseException { 

208     return next('\0', false);

209     }

210 

211     /**

212      * Parses the next token from this String.

213      * If endOfAtom is not NUL, the token extends until the

214      * endOfAtom character is seen, or to the end of the header.

215      * This method is useful when parsing headers that don't

216      * obey the MIME specification, e.g., by failing to quote

217      * parameter values that contain spaces.

218      *

219      * @param    endOfAtom    if not NUL, character marking end of token

220      * @return        the next Token

221      * @exception    ParseException if the parse fails

222      * @since        JavaMail 1.5

223      */

224     public Token next(char endOfAtom) throws ParseException { 

225     return next(endOfAtom, false);

226     }

227 

228     /**

229      * Parses the next token from this String.

230      * endOfAtom is handled as above.  If keepEscapes is true,

231      * any backslash escapes are preserved in the returned string.

232      * This method is useful when parsing headers that don't

233      * obey the MIME specification, e.g., by failing to escape

234      * backslashes in the filename parameter.

235      *

236      * @param    endOfAtom    if not NUL, character marking end of token

237      * @param    keepEscapes    keep all backslashes in returned string?

238      * @return        the next Token

239      * @exception    ParseException if the parse fails

240      * @since        JavaMail 1.5

241      */

242     public Token next(char endOfAtom, boolean keepEscapes)

243                 throws ParseException { 

244     Token tk;

245 

246     currentPos = nextPos; // setup currentPos

247     tk = getNext(endOfAtom, keepEscapes);

248     nextPos = peekPos = currentPos; // update currentPos and peekPos

249     return tk;

250     }

251 

252     /**

253      * Peek at the next token, without actually removing the token

254      * from the parse stream. Invoking this method multiple times

255      * will return successive tokens, until <code>next()</code> is

256      * called. <p>

257      *

258      * @return        the next Token

259      * @exception    ParseException if the parse fails

260      */

261     public Token peek() throws ParseException {

262     Token tk;

263 

264     currentPos = peekPos; // setup currentPos

265     tk = getNext('\0', false);

266     peekPos = currentPos; // update peekPos

267     return tk;

268     }

269 

270     /**

271      * Return the rest of the Header.

272      *

273      * @return String    rest of header. null is returned if we are

274      *            already at end of header

275      */

276     public String getRemainder() {

277     if (nextPos >= string.length())

278         return null;

279     return string.substring(nextPos);

280     }

281 

282     /*

283      * Return the next token starting from 'currentPos'. After the

284      * parse, 'currentPos' is updated to point to the start of the 

285      * next token.

286      */

287     private Token getNext(char endOfAtom, boolean keepEscapes)

288                 throws ParseException {

289     // If we're already at end of string, return EOF

290     if (currentPos >= maxPos)

291         return EOFToken;

292 

293     // Skip white-space, position currentPos beyond the space

294     if (skipWhiteSpace() == Token.EOF)

295         return EOFToken;

296 

297     char c; 

298     int start; 

299     boolean filter = false;

300     

301     c = string.charAt(currentPos);

302 

303     // Check or Skip comments and position currentPos

304     // beyond the comment

305     while (c == '(') {

306         // Parsing comment ..

307         int nesting;

308         for (start = ++currentPos, nesting = 1; 

309          nesting > 0 && currentPos < maxPos;

310          currentPos++) {

311         c = string.charAt(currentPos);

312         if (c == '\\') {  // Escape sequence

313             currentPos++; // skip the escaped character

314             filter = true;

315         } else if (c == '\r')

316             filter = true;

317         else if (c == '(')

318             nesting++;

319         else if (c == ')')

320             nesting--;

321         }

322         if (nesting != 0)

323         throw new ParseException("Unbalanced comments");

324 

325         if (!skipComments) {

326         // Return the comment, if we are asked to.

327         // Note that the comment start & end markers are ignored.

328         String s;

329         if (filter) // need to go thru the token again.

330             s = filterToken(string, start, currentPos-1, keepEscapes);

331         else

332             s = string.substring(start,currentPos-1);

333 

334         return new Token(Token.COMMENT, s);

335         }

336 

337         // Skip any whitespace after the comment.

338         if (skipWhiteSpace() == Token.EOF)

339         return EOFToken;

340         c = string.charAt(currentPos);

341     }

342 

343     // Check for quoted-string and position currentPos 

344     //  beyond the terminating quote

345     if (c == '"') {

346         currentPos++;    // skip initial quote

347         return collectString('"', keepEscapes);

348     }

349     

350     // Check for SPECIAL or CTL

351     if (c < 040 || c >= 0177 || delimiters.indexOf(c) >= 0) {

352         if (endOfAtom > 0 && c != endOfAtom) {

353         // not expecting a special character here,

354         // pretend it's a quoted string

355         return collectString(endOfAtom, keepEscapes);

356         }

357         currentPos++; // re-position currentPos

358         char ch[] = new char[1];

359         ch[0] = c;

360         return new Token((int)c, new String(ch));

361     }

362 

363     // Check for ATOM

364     for (start = currentPos; currentPos < maxPos; currentPos++) {

365         c = string.charAt(currentPos);

366         // ATOM is delimited by either SPACE, CTL, "(", <"> 

367         // or the specified SPECIALS

368         if (c < 040 || c >= 0177 || c == '(' || c == ' ' ||

369             c == '"' || delimiters.indexOf(c) >= 0) {

370         if (endOfAtom > 0 && c != endOfAtom) {

371             // not the expected atom after all;

372             // back up and pretend it's a quoted string

373             currentPos = start;

374             return collectString(endOfAtom, keepEscapes);

375         }

376         break;

377         }

378     }

379     return new Token(Token.ATOM, string.substring(start, currentPos));

380     }

381 

382     private Token collectString(char eos, boolean keepEscapes)

383                 throws ParseException {

384     int start;

385     boolean filter = false;

386     for (start = currentPos; currentPos < maxPos; currentPos++) {

387         char c = string.charAt(currentPos);

388         if (c == '\\') { // Escape sequence

389         currentPos++;

390         filter = true;

391         } else if (c == '\r')

392         filter = true;

393         else if (c == eos) {

394         currentPos++;

395         String s;

396 

397         if (filter)

398             s = filterToken(string, start, currentPos-1, keepEscapes);

399         else

400             s = string.substring(start, currentPos-1);

401 

402         if (c != '"') {        // not a real quoted string

403             s = trimWhiteSpace(s);

404             currentPos--;    // back up before the eos char

405         }

406 

407         return new Token(Token.QUOTEDSTRING, s);

408         }

409     }

410 

411     // ran off the end of the string

412 

413     // if we're looking for a matching quote, that's an error

414     if (eos == '"')

415         throw new ParseException("Unbalanced quoted string");

416 

417     // otherwise, just return whatever's left

418     String s;

419     if (filter)

420         s = filterToken(string, start, currentPos, keepEscapes);

421     else

422         s = string.substring(start, currentPos);

423     s = trimWhiteSpace(s);

424     return new Token(Token.QUOTEDSTRING, s);

425     }

426 

427     // Skip SPACE, HT, CR and NL

428     private int skipWhiteSpace() {

429     char c;

430     for (; currentPos < maxPos; currentPos++)

431         if (((c = string.charAt(currentPos)) != ' ') && 

432         (c != '\t') && (c != '\r') && (c != '\n'))

433         return currentPos;

434     return Token.EOF;

435     }

436 

437     // Trim SPACE, HT, CR and NL from end of string

438     private static String trimWhiteSpace(String s) {

439     char c;

440     int i;

441     for (i = s.length() - 1; i >= 0; i--) {

442         if (((c = s.charAt(i)) != ' ') && 

443         (c != '\t') && (c != '\r') && (c != '\n'))

444         break;

445     }

446     if (i <= 0)

447         return "";

448     else

449         return s.substring(0, i + 1);

450     }

451 

452     /* Process escape sequences and embedded LWSPs from a comment or

453      * quoted string.

454      */

455     private static String filterToken(String s, int start, int end,

456                 boolean keepEscapes) {

457     StringBuffer sb = new StringBuffer();

458     char c;

459     boolean gotEscape = false;

460     boolean gotCR = false;

461 

462     for (int i = start; i < end; i++) {

463         c = s.charAt(i);

464         if (c == '\n' && gotCR) {

465         // This LF is part of an unescaped 

466         // CRLF sequence (i.e, LWSP). Skip it.

467         gotCR = false;

468         continue;

469         }

470 

471         gotCR = false;

472         if (!gotEscape) {

473         // Previous character was NOT '\'

474         if (c == '\\') // skip this character

475             gotEscape = true;

476         else if (c == '\r') // skip this character

477             gotCR = true;

478         else // append this character

479             sb.append(c);

480         } else {

481         // Previous character was '\'. So no need to 

482         // bother with any special processing, just 

483         // append this character.  If keepEscapes is

484         // set, keep the backslash.  IE6 fails to escape

485         // backslashes in quoted strings in HTTP headers,

486         // e.g., in the filename parameter.

487         if (keepEscapes)

488             sb.append('\\');

489         sb.append(c);

490         gotEscape = false;

491         }

492     }

493     return sb.toString();

494     }

495 }

496