Monitoring JavaMelody on _ip-10-0-13-255.ec2.internal

1 /* Woodstox XML processor

2  *

3  * Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi

4  *

5  * Licensed under the License specified in file LICENSE, included with

6  * the source code.

7  * You may not use this file except in compliance with the License.

8  *

9  * Unless required by applicable law or agreed to in writing, software

10  * distributed under the License is distributed on an "AS IS" BASIS,

11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

12  * See the License for the specific language governing permissions and

13  * limitations under the License.

14  */

15 

16 package com.ctc.wstx.sr;

17 

18 import java.io.FileNotFoundException;

19 import java.io.IOException;

20 import java.net.URL;

21 import java.text.MessageFormat;

22 import java.util.Collections;

23 import java.util.HashMap;

24 import java.util.Map;

25 

26 import javax.xml.stream.Location;

27 import javax.xml.stream.XMLInputFactory;

28 import javax.xml.stream.XMLReporter;

29 import javax.xml.stream.XMLResolver;

30 import javax.xml.stream.XMLStreamException;

31 

32 import org.codehaus.stax2.XMLReporter2;

33 import org.codehaus.stax2.XMLStreamLocation2;

34 import org.codehaus.stax2.validation.XMLValidationProblem;

35 

36 import com.ctc.wstx.api.ReaderConfig;

37 import com.ctc.wstx.cfg.ErrorConsts;

38 import com.ctc.wstx.cfg.InputConfigFlags;

39 import com.ctc.wstx.cfg.ParsingErrorMsgs;

40 import com.ctc.wstx.cfg.XmlConsts;

41 import com.ctc.wstx.dtd.MinimalDTDReader;

42 import com.ctc.wstx.ent.EntityDecl;

43 import com.ctc.wstx.ent.IntEntity;

44 import com.ctc.wstx.exc.*;

45 import com.ctc.wstx.io.DefaultInputResolver;

46 import com.ctc.wstx.io.WstxInputData;

47 import com.ctc.wstx.io.WstxInputLocation;

48 import com.ctc.wstx.io.WstxInputSource;

49 import com.ctc.wstx.util.ExceptionUtil;

50 import com.ctc.wstx.util.SymbolTable;

51 import com.ctc.wstx.util.TextBuffer;

52 

53 /**

54  * Abstract base class that defines some basic functionality that all

55  * Woodstox reader classes (main XML reader, DTD reader) extend from.

56  */

57 public abstract class StreamScanner

58     extends WstxInputData

59     implements InputProblemReporter,

60         InputConfigFlags, ParsingErrorMsgs

61 {

62 

63     // // // Some well-known chars:

64 

65     /**

66      * Last (highest) char code of the three, LF, CR and NULL

67      */

68     public final static char CHAR_CR_LF_OR_NULL = (char) 13;

69 

70     public final static int INT_CR_LF_OR_NULL = 13;

71 

72     /**

73      * Character that allows quick check of whether a char can potentially

74      * be some kind of markup, WRT input stream processing;

75      * has to contain linefeeds, {@code &}, {@code <} and {@code >} (note: {@code >} only matters when

76      * quoting text, as part of {@code ]]>})

77      */

78     protected final static char CHAR_FIRST_PURE_TEXT = (char) ('>' + 1);

79 

80 

81     /**

82      * First character in Unicode (ie one with lowest id) that is legal

83      * as part of a local name (all valid name chars minus ':'). Used

84      * for doing quick check for local name end; usually name ends in

85      * a whitespace or equals sign.

86      */

87     protected final static char CHAR_LOWEST_LEGAL_LOCALNAME_CHAR = '-';

88 

89     /*

90     ///////////////////////////////////////////////////////////////////////

91     // Character validity constants, structs

92     ///////////////////////////////////////////////////////////////////////

93      */

94 

95     /**

96      * We will only use validity array for first 256 characters, mostly

97      * because after those characters it's easier to do fairly simple

98      * block checks.

99      */

100     private final static int VALID_CHAR_COUNT = 0x100;

101 

102     private final static byte NAME_CHAR_INVALID_B = (byte) 0;

103     private final static byte NAME_CHAR_ALL_VALID_B = (byte) 1;

104     private final static byte NAME_CHAR_VALID_NONFIRST_B = (byte) -1;

105 

106     private final static byte[] sCharValidity = new byte[VALID_CHAR_COUNT];

107 

108     static {

109         // First, since all valid-as-first chars are also valid-as-other chars,

110         // we'll initialize common chars:

111         sCharValidity['_'] = NAME_CHAR_ALL_VALID_B;

112         for (int i = 0, last = ('z' - 'a'); i <= last; ++i) {

113             sCharValidity['A' + i] = NAME_CHAR_ALL_VALID_B;

114             sCharValidity['a' + i] = NAME_CHAR_ALL_VALID_B;

115         }

116         for (int i = 0xC0; i < 0xF6; ++i) { // not all are fully valid, but

117             sCharValidity[i] = NAME_CHAR_ALL_VALID_B;

118         }

119         // ... now we can 'revert' ones not fully valid:

120         sCharValidity[0xD7] = NAME_CHAR_INVALID_B;

121         sCharValidity[0xF7] = NAME_CHAR_INVALID_B;

122 

123         // And then we can proceed with ones only valid-as-other.

124         sCharValidity['-'] = NAME_CHAR_VALID_NONFIRST_B;

125         sCharValidity['.'] = NAME_CHAR_VALID_NONFIRST_B;

126         sCharValidity[0xB7] = NAME_CHAR_VALID_NONFIRST_B;

127         for (int i = '0'; i <= '9'; ++i) {

128             sCharValidity[i] = NAME_CHAR_VALID_NONFIRST_B;

129         }

130     }

131 

132     /**

133      * Public identifiers only use 7-bit ascii range.

134      */

135     private final static int VALID_PUBID_CHAR_COUNT = 0x80;

136     private final static byte[] sPubidValidity = new byte[VALID_PUBID_CHAR_COUNT];

137 //    private final static byte PUBID_CHAR_INVALID_B = (byte) 0;

138     private final static byte PUBID_CHAR_VALID_B = (byte) 1;

139     static {

140         for (int i = 0, last = ('z' - 'a'); i <= last; ++i) {

141             sPubidValidity['A' + i] = PUBID_CHAR_VALID_B;

142             sPubidValidity['a' + i] = PUBID_CHAR_VALID_B;

143         }

144         for (int i = '0'; i <= '9'; ++i) {

145             sPubidValidity[i] = PUBID_CHAR_VALID_B;

146         }

147 

148         // 3 main white space types are valid

149         sPubidValidity[0x0A] = PUBID_CHAR_VALID_B;

150         sPubidValidity[0x0D] = PUBID_CHAR_VALID_B;

151         sPubidValidity[0x20] = PUBID_CHAR_VALID_B;

152 

153         // And many of punctuation/separator ascii chars too:

154         sPubidValidity['-'] = PUBID_CHAR_VALID_B;

155         sPubidValidity['\''] = PUBID_CHAR_VALID_B;

156         sPubidValidity['('] = PUBID_CHAR_VALID_B;

157         sPubidValidity[')'] = PUBID_CHAR_VALID_B;

158         sPubidValidity['+'] = PUBID_CHAR_VALID_B;

159         sPubidValidity[','] = PUBID_CHAR_VALID_B;

160         sPubidValidity['.'] = PUBID_CHAR_VALID_B;

161         sPubidValidity['/'] = PUBID_CHAR_VALID_B;

162         sPubidValidity[':'] = PUBID_CHAR_VALID_B;

163         sPubidValidity['='] = PUBID_CHAR_VALID_B;

164         sPubidValidity['?'] = PUBID_CHAR_VALID_B;

165         sPubidValidity[';'] = PUBID_CHAR_VALID_B;

166         sPubidValidity['!'] = PUBID_CHAR_VALID_B;

167         sPubidValidity['*'] = PUBID_CHAR_VALID_B;

168         sPubidValidity['#'] = PUBID_CHAR_VALID_B;

169         sPubidValidity['@'] = PUBID_CHAR_VALID_B;

170         sPubidValidity['$'] = PUBID_CHAR_VALID_B;

171         sPubidValidity['_'] = PUBID_CHAR_VALID_B;

172         sPubidValidity['%'] = PUBID_CHAR_VALID_B;

173     }

174 

175     /*

176     ///////////////////////////////////////////////////////////////////////

177     // Basic configuration

178     ///////////////////////////////////////////////////////////////////////

179      */

180 

181     /**

182      * Copy of the configuration object passed by the factory.

183      * Contains immutable settings for this reader (or in case

184      * of DTD parsers, reader that uses it)

185      */

186     protected final ReaderConfig mConfig;

187 

188     // // // Various extracted settings:

189 

190     /**

191      * If true, Reader is namespace aware, and should do basic checks

192      * (usually enforcing limitations on having colons in names)

193      */

194     protected final boolean mCfgNsEnabled;

195 

196     // Extracted standard on/off settings:

197 

198     /**

199      * note: left non-final on purpose: sub-class may need to modify

200      * the default value after construction.

201      */

202     protected boolean mCfgReplaceEntities;

203 

204     /*

205     ///////////////////////////////////////////////////////////////////////

206     // Symbol handling, if applicable

207     ///////////////////////////////////////////////////////////////////////

208      */

209 

210     final SymbolTable mSymbols;

211 

212     /**

213      * Local full name for the event, if it has one (note: element events

214      * do NOT use this variable; those names are stored in element stack):

215      * target for processing instructions.

216      *<p>

217      * Currently used for proc. instr. target, and entity name (at least

218      * when current entity reference is null).

219      *<p>

220      * Note: this variable is generally not cleared, since it comes from

221      * a symbol table, ie. this won't be the only reference.

222      */

223     protected String mCurrName;

224 

225     /*

226     ///////////////////////////////////////////////////////////////////////

227     // Input handling

228     ///////////////////////////////////////////////////////////////////////

229      */

230 

231     /**

232      * Currently active input source; contains link to parent (nesting) input

233      * sources, if any.

234      */

235     protected WstxInputSource mInput;

236 

237     /**

238      * Top-most input source this reader can use; due to input source

239      * chaining, this is not necessarily the root of all input; for example,

240      * external DTD subset reader's root input still has original document

241      * input as its parent.

242      */

243     protected final WstxInputSource mRootInput;

244 

245     /**

246      * Custom resolver used to handle external entities that are to be expanded

247      * by this reader (external param/general entity expander)

248      */

249     protected XMLResolver mEntityResolver = null;

250 

251     /**

252      * This is the current depth of the input stack (same as what input

253      * element stack would return as its depth).

254      * It is used to enforce input scope constraints for nesting of

255      * elements (for xml reader) and dtd declaration (for dtd reader)

256      * with regards to input block (entity expansion) boundaries.

257      *<p>

258      * Basically this value is compared to {@link #mInputTopDepth}, which

259      * indicates what was the depth at the point where the currently active

260      * input scope/block was started.

261      */

262     protected int mCurrDepth;

263 

264     protected int mInputTopDepth;

265 

266     /**

267      * Number of times a parsed general entity has been expanded; used for

268      * (optionally) limiting number of expansion to guard against

269      * denial-of-service attacks like "Billion Laughs".

270      * 

271      * @since 4.3

272      */

273     protected int mEntityExpansionCount;

274     

275     /**

276      * Flag that indicates whether linefeeds in the input data are to

277      * be normalized or not.

278      * Xml specs mandate that the line feeds are only normalized

279      * when they are from the external entities (main doc, external

280      * general/parsed entities), so normalization has to be

281      * suppressed when expanding internal general/parsed entities.

282      */

283     protected boolean mNormalizeLFs;

284 

285     /**

286      * Flag that indicates whether all escaped chars are accepted in XML 1.0.

287      *

288      * @since 5.2

289      */

290     protected boolean mAllowXml11EscapedCharsInXml10;

291 

292     /*

293     ///////////////////////////////////////////////////////////////////////

294     // Buffer(s) for local name(s) and text content

295     ///////////////////////////////////////////////////////////////////////

296      */

297 

298     /**

299      * Temporary buffer used if local name can not be just directly

300      * constructed from input buffer (name is on a boundary or such).

301      */

302     protected char[] mNameBuffer = null;

303 

304     /*

305     ///////////////////////////////////////////////////////////////////////

306     // Information about starting location of event

307     // Reader is pointing to; updated on-demand

308     ///////////////////////////////////////////////////////////////////////

309      */

310 

311     // // // Location info at point when current token was started

312 

313     /**

314      * Total number of characters read before start of current token.

315      * For big (gigabyte-sized) sizes are possible, needs to be long,

316      * unlike pointers and sizes related to in-memory buffers.

317      */

318     protected long mTokenInputTotal = 0; 

319 

320     /**

321      * Input row on which current token starts, 1-based

322      */

323     protected int mTokenInputRow = 1;

324 

325     /**

326      * Column on input row that current token starts; 0-based (although

327      * in the end it'll be converted to 1-based)

328      */

329     protected int mTokenInputCol = 0;

330 

331     /*

332     ///////////////////////////////////////////////////////////////////////

333     // XML document information (from doc decl if one was found) common to

334     // all entities (main xml document, external DTD subset)

335     ///////////////////////////////////////////////////////////////////////

336      */

337 

338     /**

339      * Input stream encoding, if known (passed in, or determined by

340      * auto-detection); null if not.

341      */

342     protected String mDocInputEncoding = null;

343 

344     /**

345      * Character encoding from xml declaration, if any; null if no

346      * declaration, or it didn't specify encoding.

347      */

348     protected String mDocXmlEncoding = null;

349 

350     /**

351      * XML version as declared by the document; one of constants

352      * from {@link XmlConsts} (like {@link XmlConsts#XML_V_10}).

353      */

354     protected int mDocXmlVersion = XmlConsts.XML_V_UNKNOWN;

355     

356     /**

357      * Cache of internal character entities;

358      */

359     protected Map<String,IntEntity> mCachedEntities;

360     

361     /**

362      * Flag for whether or not character references should be treated as entities

363      */

364     protected boolean mCfgTreatCharRefsAsEntities;

365     

366     /**

367      * Entity reference stream currently points to.

368      */

369     protected EntityDecl mCurrEntity;

370 

371     /*

372     ///////////////////////////////////////////////////////////////////////

373     // Life-cycle

374     ///////////////////////////////////////////////////////////////////////

375      */

376 

377     /**

378      * Constructor used when creating a complete new (main-level) reader that

379      * does not share its input buffers or state with another reader.

380      */

381     protected StreamScanner(WstxInputSource input, ReaderConfig cfg,

382                             XMLResolver res)

383     {

384         super();

385         mInput = input;

386         // 17-Jun-2004, TSa: Need to know root-level input source

387         mRootInput = input;

388 

389         mConfig = cfg;

390         mSymbols = cfg.getSymbols();

391         int cf = cfg.getConfigFlags();

392         mCfgNsEnabled = (cf & CFG_NAMESPACE_AWARE) != 0;

393         mCfgReplaceEntities = (cf & CFG_REPLACE_ENTITY_REFS) != 0;

394 

395         mAllowXml11EscapedCharsInXml10 = mConfig.willAllowXml11EscapedCharsInXml10();

396 

397         mNormalizeLFs = mConfig.willNormalizeLFs();

398         mInputBuffer = null;

399         mInputPtr = mInputEnd = 0;

400         mEntityResolver = res;

401         

402         mCfgTreatCharRefsAsEntities = mConfig.willTreatCharRefsAsEnts();

403         if (mCfgTreatCharRefsAsEntities) {

404             mCachedEntities = new HashMap<String,IntEntity>();

405         } else {

406             mCachedEntities = Collections.emptyMap();

407         }

408     }

409 

410     /**

411      * @since 5.2

412      */

413     public ReaderConfig getConfig() {

414         return mConfig;

415     }

416 

417     /*

418     ///////////////////////////////////////////////////////////////////////

419     // Package API

420     ///////////////////////////////////////////////////////////////////////

421      */

422 

423     /**

424      * Method that returns location of the last character returned by this

425      * reader; that is, location "one less" than the currently pointed to

426      * location.

427      */

428     protected WstxInputLocation getLastCharLocation()

429     {

430         return mInput.getLocation(mCurrInputProcessed + mInputPtr - 1,

431                 mCurrInputRow, mInputPtr - mCurrInputRowStart);

432     }

433 

434     protected URL getSource() throws IOException {

435         return mInput.getSource();

436     }

437 

438     protected String getSystemId() {

439         return mInput.getSystemId();

440     }

441 

442     /*

443     ///////////////////////////////////////////////////////////////////////

444     // Partial `LocationInfo` implementation (not implemented

445     // by this base class, but is by some sub-classes)

446     ///////////////////////////////////////////////////////////////////////

447      */

448 

449     /**

450      * Returns location of last properly parsed token; as per StAX specs,

451      * apparently needs to be the end of current event, which is the same

452      * as the start of the following event (or EOF if that's next).

453      */

454     @Override

455     public abstract Location getLocation();

456 

457     public XMLStreamLocation2 getStartLocation()

458     {

459         // note: +1 is used as columns are 1-based...

460         return mInput.getLocation(mTokenInputTotal,

461                 mTokenInputRow, mTokenInputCol + 1);

462     }

463 

464     public XMLStreamLocation2 getCurrentLocation()

465     {

466         return mInput.getLocation(mCurrInputProcessed + mInputPtr,

467                 mCurrInputRow, mInputPtr - mCurrInputRowStart + 1);

468     }

469 

470     /*

471     ///////////////////////////////////////////////////////////////////////

472     // InputProblemReporter implementation

473     ///////////////////////////////////////////////////////////////////////

474      */

475 

476     public WstxException throwWfcException(String msg, boolean deferErrors)

477         throws WstxException

478     {

479         WstxException ex = constructWfcException(msg);

480         if (!deferErrors) {

481             throw ex;

482         }

483         return ex;

484     }

485 

486     @Override

487     public void throwParseError(String msg) throws XMLStreamException {

488         throwParseError(msg, null, null);

489     }

490 

491     /**

492      * Throws generic parse error with specified message and current parsing

493      * location.

494      *<p>

495      * Note: public access only because core code in other packages needs

496      * to access it.

497      */

498     @Override

499     public void throwParseError(String format, Object arg, Object arg2)

500         throws XMLStreamException

501     {

502         String msg = (arg == null && arg2 == null) ? format

503                 : MessageFormat.format(format, new Object[] { arg, arg2 });

504         throw constructWfcException(msg);

505     }

506 

507     public void reportProblem(String probType, String format, Object arg, Object arg2)

508         throws XMLStreamException

509     {

510         XMLReporter rep = mConfig.getXMLReporter();

511         if (rep != null) {

512             _reportProblem(rep, probType,

513                             MessageFormat.format(format, new Object[] { arg, arg2 }), null);

514         }

515     }

516 

517     @Override

518     public void reportProblem(Location loc, String probType,

519                               String format, Object arg, Object arg2)

520         throws XMLStreamException

521     {

522         XMLReporter rep = mConfig.getXMLReporter();

523         if (rep != null) {

524             String msg = (arg != null || arg2 != null) ?

525                 MessageFormat.format(format, new Object[] { arg, arg2 }) : format;

526             _reportProblem(rep, probType, msg, loc);

527         }

528     }

529 

530     protected void _reportProblem(XMLReporter rep, String probType, String msg, Location loc)

531         throws XMLStreamException

532     {

533         if (loc == null) {

534             loc = getLastCharLocation();

535         }

536         _reportProblem(rep, new XMLValidationProblem(loc, msg, XMLValidationProblem.SEVERITY_ERROR, probType));

537     }

538 

539     protected void _reportProblem(XMLReporter rep, XMLValidationProblem prob)

540         throws XMLStreamException

541     {

542         if (rep != null) {

543             Location loc = prob.getLocation();

544             if (loc == null) {

545                 loc = getLastCharLocation();

546                 prob.setLocation(loc);

547             }

548             // Backwards-compatibility fix: add non-null type, if missing:

549             if (prob.getType() == null) {

550                 prob.setType(ErrorConsts.WT_VALIDATION);

551             }

552             // [WSTX-154]: was catching and dropping thrown exception: shouldn't.

553             // [WTSX-157]: need to support XMLReporter2

554             if (rep instanceof XMLReporter2) {

555                 ((XMLReporter2) rep).report(prob);

556             } else {

557                 rep.report(prob.getMessage(), prob.getType(), prob, loc);

558             }

559         }

560     }

561 

562     /**

563      *<p>

564      * Note: this is the base implementation used for implementing

565      * <code>ValidationContext</code>

566      */

567     @Override

568     public void reportValidationProblem(XMLValidationProblem prob)

569         throws XMLStreamException

570     {

571         // !!! TBI: Fail-fast vs. deferred modes?

572         /* For now let's implement basic functionality: warnings get

573          * reported via XMLReporter, errors and fatal errors result in

574          * immediate exceptions.

575          */

576         /* 27-May-2008, TSa: [WSTX-153] Above is incorrect: as per Stax

577          *   javadocs for XMLReporter, both warnings and non-fatal errors

578          *   (which includes all validation errors) should be reported via

579          *   XMLReporter interface, and only fatals should cause an

580          *   immediate stream exception (by-passing reporter)

581          */

582         if (prob.getSeverity() > XMLValidationProblem.SEVERITY_ERROR) {

583             throw WstxValidationException.create(prob);

584         }

585         XMLReporter rep = mConfig.getXMLReporter();

586         if (rep != null) {

587             _reportProblem(rep, prob);

588         } else {

589             /* If no reporter, regular non-fatal errors are to be reported

590              * as exceptions as well, for backwards compatibility

591              */

592             if (prob.getSeverity() >= XMLValidationProblem.SEVERITY_ERROR) {

593                 throw WstxValidationException.create(prob);

594             }

595         }

596     }

597 

598     public void reportValidationProblem(String msg, int severity)

599         throws XMLStreamException

600     {

601         reportValidationProblem(new XMLValidationProblem(getLastCharLocation(),

602                                                          msg, severity));

603     }

604 

605     @Override

606     public void reportValidationProblem(String msg)

607         throws XMLStreamException

608     {

609         reportValidationProblem(new XMLValidationProblem(getLastCharLocation(), msg,

610                 XMLValidationProblem.SEVERITY_ERROR));

611     }

612 

613     public void reportValidationProblem(Location loc, String msg)

614         throws XMLStreamException

615     {

616         reportValidationProblem(new XMLValidationProblem(loc, msg));

617     }

618 

619     @Override

620     public void reportValidationProblem(String format, Object arg, Object arg2)

621         throws XMLStreamException

622     {

623         reportValidationProblem(MessageFormat.format(format, new Object[] { arg, arg2 }));

624     }

625 

626     /*

627     ///////////////////////////////////////////////////////////////////////

628     // Other error reporting methods

629     ///////////////////////////////////////////////////////////////////////

630      */

631 

632     protected WstxException constructWfcException(String msg)

633     {

634         return new WstxParsingException(msg, getLastCharLocation());

635     }

636 

637     /**

638      * Construct and return a {@link XMLStreamException} to throw

639      * as a result of a failed Typed Access operation (but one not

640      * caused by a Well-Formedness Constraint or Validation Constraint

641      * problem)

642      */

643     /*

644     protected WstxException _constructTypeException(String msg)

645     {

646         // Hmmh. Should there be a distinct sub-type?

647         return new WstxParsingException(msg, getLastCharLocation());

648     }

649     */

650 

651     protected WstxException constructFromIOE(IOException ioe)

652     {

653         return new WstxIOException(ioe);

654     }

655 

656     protected WstxException constructNullCharException()

657     {

658         return new WstxUnexpectedCharException("Illegal character (NULL, unicode 0) encountered: not valid in any content",

659                 getLastCharLocation(), CHAR_NULL);

660     }

661 

662     protected void throwUnexpectedChar(int i, String msg) throws WstxException

663     {

664         char c = (char) i;

665         String excMsg = "Unexpected character "+getCharDesc(c)+msg;

666         throw new WstxUnexpectedCharException(excMsg, getLastCharLocation(), c);

667     }

668 

669     protected void throwNullChar() throws WstxException {

670         throw constructNullCharException();

671     }

672 

673     protected void throwInvalidSpace(int i) throws WstxException {

674         throwInvalidSpace(i, false);

675     }

676 

677     protected WstxException throwInvalidSpace(int i, boolean deferErrors)

678         throws WstxException

679     {

680         char c = (char) i;

681         WstxException ex;

682         if (c == CHAR_NULL) {

683             ex = constructNullCharException();

684         } else {

685             String msg = "Illegal character ("+getCharDesc(c)+")";

686             if (mXml11) {

687                 msg += " [note: in XML 1.1, it could be included via entity expansion]";

688             }

689             ex = new WstxUnexpectedCharException(msg, getLastCharLocation(), c);

690         }

691         if (!deferErrors) {

692             throw ex;

693         }

694         return ex;

695     }

696 

697     protected void throwUnexpectedEOF(String msg)

698         throws WstxException

699     {

700         throw new WstxEOFException("Unexpected EOF"+(msg == null ? "" : msg),

701                 getLastCharLocation());

702     }

703 

704     /**

705      * Similar to {@link #throwUnexpectedEOF}, but only indicates ending

706      * of an input block. Used when reading a token that can not span

707      * input block boundaries (ie. can not continue past end of an

708      * entity expansion).

709      */

710     protected void throwUnexpectedEOB(String msg)

711         throws WstxException

712     {

713         throw new WstxEOFException("Unexpected end of input block"+(msg == null ? "" : msg),

714                 getLastCharLocation());

715     }

716 

717     protected void throwFromIOE(IOException ioe) throws WstxException {

718         throw new WstxIOException(ioe);

719     }

720 

721     protected void throwFromStrE(XMLStreamException strex)

722             throws WstxException

723     {

724         if (strex instanceof WstxException) {

725             throw (WstxException) strex;

726         }

727         throw new WstxException(strex);

728     }

729 

730     /**

731      * Method called to report an error, when caller's signature only

732      * allows runtime exceptions to be thrown.

733      */

734     protected void throwLazyError(Exception e)

735     {

736         if (e instanceof XMLStreamException) {

737             WstxLazyException.throwLazily((XMLStreamException) e);

738         }

739         ExceptionUtil.throwRuntimeException(e);

740     }

741 

742     protected String tokenTypeDesc(int type) {

743         return ErrorConsts.tokenTypeDesc(type);

744     }

745 

746     /*

747     ///////////////////////////////////////////////////////////////////////

748     // Input buffer handling

749     ///////////////////////////////////////////////////////////////////////

750      */

751 

752     /**

753      * Returns current input source this source uses.

754      *<p>

755      * Note: public only because some implementations are on different

756      * package.

757      */

758     public final WstxInputSource getCurrentInput() {

759         return mInput;

760     }

761 

762     protected final int inputInBuffer() {

763         return mInputEnd - mInputPtr;

764     }

765 

766     @SuppressWarnings("cast")

767     protected final int getNext() throws XMLStreamException

768     {

769         if (mInputPtr >= mInputEnd) {

770             if (!loadMore()) {

771                 return -1;

772             }

773         }

774         return (int) mInputBuffer[mInputPtr++];

775     }

776 

777     /**

778      * Similar to {@link #getNext}, but does not advance pointer

779      * in input buffer.

780      *<p>

781      * Note: this method only peeks within current input source;

782      * it does not close it and check nested input source (if any).

783      * This is necessary when checking keywords, since they can never

784      * cross input block boundary.

785      */

786     @SuppressWarnings("cast")

787     protected final int peekNext()

788         throws XMLStreamException

789     {

790         if (mInputPtr >= mInputEnd) {

791             if (!loadMoreFromCurrent()) {

792                 return -1;

793             }

794         }

795         return (int) mInputBuffer[mInputPtr];

796     }

797 

798     protected final char getNextChar(String errorMsg)

799         throws XMLStreamException

800     {

801         if (mInputPtr >= mInputEnd) {

802             loadMore(errorMsg);

803         }

804         return mInputBuffer[mInputPtr++];

805     }

806 

807     /**

808      * Similar to {@link #getNextChar}, but will not read more characters

809      * from parent input source(s) if the current input source doesn't

810      * have more content. This is often needed to prevent "runaway" content,

811      * such as comments that start in an entity but do not have matching

812      * close marker inside entity; XML specification specifically states

813      * such markup is not legal.

814      */

815     protected final char getNextCharFromCurrent(String errorMsg)

816         throws XMLStreamException

817     {

818         if (mInputPtr >= mInputEnd) {

819             loadMoreFromCurrent(errorMsg);

820         }

821         return mInputBuffer[mInputPtr++];

822     }

823 

824     /**

825      * Method that will skip through zero or more white space characters,

826      * and return either the character following white space, or -1 to

827      * indicate EOF (end of the outermost input source)/

828      */

829     @SuppressWarnings("cast")

830     protected final int getNextAfterWS()

831         throws XMLStreamException

832     {

833         if (mInputPtr >= mInputEnd) {

834             if (!loadMore()) {

835                 return -1;

836             }

837         }

838         char c = mInputBuffer[mInputPtr++];

839         while (c <= CHAR_SPACE) {

840             // Linefeed?

841             if (c == '\n' || c == '\r') {

842                 skipCRLF(c);

843             } else if (c != CHAR_SPACE && c != '\t') {

844                 throwInvalidSpace(c);

845             }

846             // Still a white space?

847             if (mInputPtr >= mInputEnd) {

848                 if (!loadMore()) {

849                     return -1;

850                 }

851             }

852             c = mInputBuffer[mInputPtr++];

853         }

854         return (int) c;

855     }

856 

857     protected final char getNextCharAfterWS(String errorMsg)

858         throws XMLStreamException

859     {

860         if (mInputPtr >= mInputEnd) {

861             loadMore(errorMsg);

862         }

863 

864         char c = mInputBuffer[mInputPtr++];

865         while (c <= CHAR_SPACE) {

866             // Linefeed?

867             if (c == '\n' || c == '\r') {

868                 skipCRLF(c);

869             } else if (c != CHAR_SPACE && c != '\t') {

870                 throwInvalidSpace(c);

871             }

872 

873             // Still a white space?

874             if (mInputPtr >= mInputEnd) {

875                 loadMore(errorMsg);

876             }

877             c = mInputBuffer[mInputPtr++];

878         }

879         return c;

880     }

881 

882     protected final char getNextInCurrAfterWS(String errorMsg)

883         throws XMLStreamException

884     {

885         return getNextInCurrAfterWS(errorMsg, getNextCharFromCurrent(errorMsg));

886     }

887 

888     protected final char getNextInCurrAfterWS(String errorMsg, char c)

889         throws XMLStreamException

890     {

891         while (c <= CHAR_SPACE) {

892             // Linefeed?

893             if (c == '\n' || c == '\r') {

894                 skipCRLF(c);

895             } else if (c != CHAR_SPACE && c != '\t') {

896                 throwInvalidSpace(c);

897             }

898 

899             // Still a white space?

900             if (mInputPtr >= mInputEnd) {

901                 loadMoreFromCurrent(errorMsg);

902             }

903             c = mInputBuffer[mInputPtr++];

904         }

905         return c;

906     }

907 

908     /**

909      * Method called when a CR has been spotted in input; checks if next

910      * char is LF, and if so, skips it. Note that next character has to

911      * come from the current input source, to qualify; it can never come

912      * from another (nested) input source.

913      *

914      * @return True, if passed in char is '\r' and next one is '\n'.

915      */

916     protected final boolean skipCRLF(char c) 

917         throws XMLStreamException

918     {

919         boolean result;

920 

921         if (c == '\r' && peekNext() == '\n') {

922             ++mInputPtr;

923             result = true;

924         } else {

925             result = false;

926         }

927         ++mCurrInputRow;

928         mCurrInputRowStart = mInputPtr;

929         return result;

930     }

931 

932     protected final void markLF() {

933         ++mCurrInputRow;

934         mCurrInputRowStart = mInputPtr;

935     }

936 

937     protected final void markLF(int inputPtr) {

938         ++mCurrInputRow;

939         mCurrInputRowStart = inputPtr;

940     }

941 

942     /**

943      * Method to push back last character read; can only be called once,

944      * that is, no more than one char can be guaranteed to be succesfully

945      * returned.

946      */

947     protected final void pushback() { --mInputPtr; }

948 

949     /*

950     ///////////////////////////////////////////////////////////////////////

951     // Sub-class overridable input handling methods

952     ///////////////////////////////////////////////////////////////////////

953      */

954 

955     /**

956      * Method called when an entity has been expanded (new input source

957      * has been created). Needs to initialize location information and change

958      * active input source.

959      *

960      * @param entityId Name of the entity being expanded

961      */

962     protected void initInputSource(WstxInputSource newInput, boolean isExt,

963                                    String entityId)

964         throws XMLStreamException

965     {

966         // Let's make sure new input will be read next time input is needed:

967         mInputPtr = 0;

968         mInputEnd = 0;

969         /* Plus, reset the input location so that'll be accurate for

970          * error reporting etc.

971          */

972         mInputTopDepth = mCurrDepth;

973 

974         // [WSTX-296]: Check for entity expansion depth against configurable limit

975         int entityDepth = mInput.getEntityDepth() + 1;

976         verifyLimit("Maximum entity expansion depth", mConfig.getMaxEntityDepth(), entityDepth);

977         mInput = newInput;

978         mInput.initInputLocation(this, mCurrDepth, entityDepth);

979 

980         /* 21-Feb-2006, TSa: Linefeeds are NOT normalized when expanding

981          *   internal entities (XML, 2.11)

982          */

983         if (isExt) {

984             mNormalizeLFs = true;

985         } else {

986             mNormalizeLFs = false;

987         }

988     }

989 

990     /**

991      * Method that will try to read one or more characters from currently

992      * open input sources; closing input sources if necessary.

993      *

994      * @return true if reading succeeded (or may succeed), false if

995      *   we reached EOF.

996      */

997     protected boolean loadMore()

998         throws XMLStreamException

999     {

1000         WstxInputSource input = mInput;

1001         do {

1002             /* Need to make sure offsets are properly updated for error

1003              * reporting purposes, and do this now while previous amounts

1004              * are still known.

1005              */

1006             mCurrInputProcessed += mInputEnd;

1007             verifyLimit("Maximum document characters", mConfig.getMaxCharacters(), mCurrInputProcessed);

1008             mCurrInputRowStart -= mInputEnd;

1009             int count;

1010             try {

1011                 count = input.readInto(this);

1012                 if (count > 0) {

1013                     return true;

1014                 }

1015                 input.close();

1016             } catch (IOException ioe) {

1017                 throw constructFromIOE(ioe);

1018             }

1019             if (input == mRootInput) {

1020                 /* Note: no need to check entity/input nesting in this

1021                  * particular case, since it will be handled by higher level

1022                  * parsing code (results in an unexpected EOF)

1023                  */

1024                 return false;

1025             }

1026             WstxInputSource parent = input.getParent();

1027             if (parent == null) { // sanity check!

1028                 throwNullParent(input);

1029             }

1030             /* 13-Feb-2006, TSa: Ok, do we violate a proper nesting constraints

1031              *   with this input block closure?

1032              */

1033             if (mCurrDepth != input.getScopeId()) {

1034                 handleIncompleteEntityProblem(input);

1035             }

1036 

1037             mInput = input = parent;

1038             input.restoreContext(this);

1039             mInputTopDepth = input.getScopeId();

1040             /* 21-Feb-2006, TSa: Since linefeed normalization needs to be

1041              *   suppressed for internal entity expansion, we may need to

1042              *   change the state...

1043              */

1044             if (!mNormalizeLFs) {

1045                 mNormalizeLFs = !input.fromInternalEntity();

1046             }

1047             // Maybe there are leftovers from that input in buffer now?

1048         } while (mInputPtr >= mInputEnd);

1049 

1050         return true;

1051     }

1052 

1053     protected final boolean loadMore(String errorMsg)

1054         throws XMLStreamException

1055     {

1056         if (!loadMore()) {

1057             throwUnexpectedEOF(errorMsg);

1058         }

1059         return true;

1060     }

1061 

1062     protected boolean loadMoreFromCurrent()

1063         throws XMLStreamException

1064     {

1065         // Need to update offsets properly

1066         mCurrInputProcessed += mInputEnd;

1067         mCurrInputRowStart -= mInputEnd;

1068         verifyLimit("Maximum document characters", mConfig.getMaxCharacters(), mCurrInputProcessed);

1069         try {

1070             int count = mInput.readInto(this);

1071             return (count > 0);

1072         } catch (IOException ie) {

1073             throw constructFromIOE(ie);

1074         }

1075     }

1076 

1077     protected final boolean loadMoreFromCurrent(String errorMsg)

1078         throws XMLStreamException

1079     {

1080         if (!loadMoreFromCurrent()) {

1081             throwUnexpectedEOB(errorMsg);

1082         }

1083         return true;

1084     }

1085 

1086     /**

1087      * Method called to make sure current main-level input buffer has at

1088      * least specified number of characters available consequtively,

1089      * without having to call {@link #loadMore}. It can only be called

1090      * when input comes from main-level buffer; further, call can shift

1091      * content in input buffer, so caller has to flush any data still

1092      * pending. In short, caller has to know exactly what it's doing. :-)

1093      *<p>

1094      * Note: method does not check for any other input sources than the

1095      * current one -- if current source can not fulfill the request, a

1096      * failure is indicated.

1097      *

1098      * @return true if there's now enough data; false if not (EOF)

1099      */

1100     protected boolean ensureInput(int minAmount)

1101         throws XMLStreamException

1102     {

1103         int currAmount = mInputEnd - mInputPtr;

1104         if (currAmount >= minAmount) {

1105             return true;

1106         }

1107         try {

1108             return mInput.readMore(this, minAmount);

1109         } catch (IOException ie) {

1110             throw constructFromIOE(ie);

1111         }

1112     }

1113 

1114     protected void closeAllInput(boolean force)

1115         throws XMLStreamException

1116     {

1117         WstxInputSource input = mInput;

1118         while (true) {

1119             try {

1120                 if (force) {

1121                     input.closeCompletely();

1122                 } else {

1123                     input.close();

1124                 }

1125             } catch (IOException ie) {

1126                 throw constructFromIOE(ie);

1127             }

1128             if (input == mRootInput) {

1129                 break;

1130             }

1131             WstxInputSource parent = input.getParent();

1132             if (parent == null) { // sanity check!

1133                 throwNullParent(input);

1134             }

1135             mInput = input = parent;

1136         }

1137     }

1138 

1139     /**

1140      * @param curr Input source currently in use 

1141      */

1142     protected void throwNullParent(WstxInputSource curr)

1143     {

1144         throw new IllegalStateException(ErrorConsts.ERR_INTERNAL);

1145         //throw new IllegalStateException("Internal error: null parent for input source '"+curr+"'; should never occur (should have stopped at root input '"+mRootInput+"').");

1146     }

1147 

1148     /*

1149     ///////////////////////////////////////////////////////////////////////

1150     // Entity resolution

1151     ///////////////////////////////////////////////////////////////////////

1152      */

1153 

1154     /**

1155      * Method that tries to resolve a character entity, or (if caller so

1156      * specifies), a pre-defined internal entity (lt, gt, amp, apos, quot).

1157      * It will succeed iff:

1158      * <ol>

1159      *  <li>Entity in question is a simple character entity (either one of

1160      *    5 pre-defined ones, or using decimal/hex notation), AND

1161      *   <li>

1162      *  <li>Entity fits completely inside current input buffer.

1163      *   <li>

1164      * </ol>

1165      * If so, character value of entity is returned. Character 0 is returned

1166      * otherwise; if so, caller needs to do full resolution.

1167      *<p>

1168      * Note: On entry we are guaranteed there are at least 3 more characters

1169      * in this buffer; otherwise we shouldn't be called.

1170      *

1171      * @param checkStd If true, will check pre-defined internal entities

1172      *   (gt, lt, amp, apos, quot); if false, will only check actual

1173      *   character entities.

1174      *

1175      * @return (Valid) character value, if entity is a character reference,

1176      *   and could be resolved from current input buffer (does not span

1177      *   buffer boundary); null char (code 0) if not (either non-char

1178      *   entity, or spans input buffer boundary).

1179      */

1180     protected int resolveSimpleEntity(boolean checkStd)

1181         throws XMLStreamException

1182     {

1183         char[] buf = mInputBuffer;

1184         int ptr = mInputPtr;

1185         char c = buf[ptr++];

1186 

1187         // Numeric reference?

1188         if (c == '#') {

1189             c = buf[ptr++];

1190             int value = 0;

1191             int inputLen = mInputEnd;

1192             if (c == 'x') { // hex

1193                 while (ptr < inputLen) {

1194                     c = buf[ptr++];

1195                     if (c == ';') {

1196                         break;

1197                     }

1198                     value = value << 4;

1199                     if (c <= '9' && c >= '0') {

1200                         value += (c - '0');

1201                     } else if (c >= 'a' && c <= 'f') {

1202                         value += (10 + (c - 'a'));

1203                     } else if (c >= 'A' && c <= 'F') {

1204                         value += (10 + (c - 'A'));

1205                     } else {

1206                         mInputPtr = ptr; // so error points to correct char

1207                         throwUnexpectedChar(c, "; expected a hex digit (0-9a-fA-F).");

1208                     }

1209                     /* Need to check for overflow; easiest to do right as

1210                      * it happens...

1211                      */

1212                     if (value > MAX_UNICODE_CHAR) {

1213                         reportUnicodeOverflow();

1214                     }

1215                 }

1216             } else { // numeric (decimal)

1217                 while (c != ';') {

1218                     if (c <= '9' && c >= '0') {

1219                         value = (value * 10) + (c - '0');

1220                         // Overflow?

1221                         if (value > MAX_UNICODE_CHAR) {

1222                             reportUnicodeOverflow();

1223                         }

1224                     } else {

1225                         mInputPtr = ptr; // so error points to correct char

1226                         throwUnexpectedChar(c, "; expected a decimal number.");

1227                     }

1228                     if (ptr >= inputLen) {

1229                         break;

1230                     }

1231                     c = buf[ptr++];

1232                 }

1233             }

1234             /* We get here either if we got it all, OR if we ran out of

1235              * input in current buffer.

1236              */

1237             if (c == ';') { // got the full thing

1238                 mInputPtr = ptr;

1239                 validateChar(value);

1240                 return value;

1241             }

1242 

1243             /* If we ran out of input, need to just fall back, gets

1244              * resolved via 'full' resolution mechanism.

1245              */

1246         } else if (checkStd) {

1247             /* Caller may not want to resolve these quite yet...

1248              * (when it wants separate events for non-char entities)

1249              */

1250             if (c == 'a') { // amp or apos?

1251                 c = buf[ptr++];

1252                 

1253                 if (c == 'm') { // amp?

1254                     if (buf[ptr++] == 'p') {

1255                         if (ptr < mInputEnd && buf[ptr++] == ';') {

1256                             mInputPtr = ptr;

1257                             return '&';

1258                         }

1259                     }

1260                 } else if (c == 'p') { // apos?

1261                     if (buf[ptr++] == 'o') {

1262                         int len = mInputEnd;

1263                         if (ptr < len && buf[ptr++] == 's') {

1264                             if (ptr < len && buf[ptr++] == ';') {

1265                                 mInputPtr = ptr;

1266                                 return '\'';

1267                             }

1268                         }

1269                     }

1270                 }

1271             } else if (c == 'g') { // gt?

1272                 if (buf[ptr++] == 't' && buf[ptr++] == ';') {

1273                     mInputPtr = ptr;

1274                     return '>';

1275                 }

1276             } else if (c == 'l') { // lt?

1277                 if (buf[ptr++] == 't' && buf[ptr++] == ';') {

1278                     mInputPtr = ptr;

1279                     return '<';

1280                 }

1281             } else if (c == 'q') { // quot?

1282                 if (buf[ptr++] == 'u' && buf[ptr++] == 'o') {

1283                     int len = mInputEnd;

1284                     if (ptr < len && buf[ptr++] == 't') {

1285                         if (ptr < len && buf[ptr++] == ';') {

1286                             mInputPtr = ptr;

1287                             return '"';

1288                         }

1289                     }

1290                 }

1291             }

1292         }

1293         return 0;

1294     }

1295 

1296     /**

1297      * Method called to resolve character entities, and only character

1298      * entities (except that pre-defined char entities -- amp, apos, lt,

1299      * gt, quote -- MAY be "char entities" in this sense, depending on

1300      * arguments).

1301      * Otherwise it is to return the null char; if so,

1302      * the input pointer will point to the same point as when method

1303      * entered (char after ampersand), plus the ampersand itself is

1304      * guaranteed to be in the input buffer (so caller can just push it

1305      * back if necessary).

1306      *<p>

1307      * Most often this method is called when reader is not to expand

1308      * non-char entities automatically, but to return them as separate

1309      * events.

1310      *<p>

1311      * Main complication here is that we need to do 5-char lookahead. This

1312      * is problematic if chars are on input buffer boundary. This is ok

1313      * for the root level input buffer, but not for some nested buffers.

1314      * However, according to XML specs, such split entities are actually

1315      * illegal... so we can throw an exception in those cases.

1316      *

1317      * @param checkStd If true, will check pre-defined internal entities

1318      *   (gt, lt, amp, apos, quot) as character entities; if false, will only

1319      *   check actual 'real' character entities.

1320      *

1321      * @return (Valid) character value, if entity is a character reference,

1322      *   and could be resolved from current input buffer (does not span

1323      *   buffer boundary); null char (code 0) if not (either non-char

1324      *   entity, or spans input buffer boundary).

1325      */

1326     protected int resolveCharOnlyEntity(boolean checkStd)

1327         throws XMLStreamException

1328     {

1329         //int avail = inputInBuffer();

1330         int avail = mInputEnd - mInputPtr;

1331         if (avail < 6) {

1332             // split entity, or buffer boundary

1333             /* Don't want to lose leading '&' (in case we can not expand

1334              * the entity), so let's push it back first

1335              */

1336             --mInputPtr;

1337             /* Shortest valid reference would be 3 chars ('&a;'); which

1338              * would only be legal from an expanded entity...

1339              */

1340             if (!ensureInput(6)) {

1341                 avail = inputInBuffer();

1342                 if (avail < 3) {

1343                     throwUnexpectedEOF(SUFFIX_IN_ENTITY_REF);

1344                 }

1345             } else {

1346                 avail = 6;

1347             }

1348             // ... and now we can move pointer back as well:

1349             ++mInputPtr;

1350         }

1351 

1352         /* Ok, now we have one more character to check, and that's enough

1353          * to determine type decisively.

1354          */

1355         char c = mInputBuffer[mInputPtr];

1356 

1357         // A char reference?

1358         if (c == '#') { // yup

1359             ++mInputPtr;

1360             return resolveCharEnt(null);

1361         }

1362 

1363         // nope... except may be a pre-def?

1364         if (checkStd) {

1365             if (c == 'a') {

1366                 char d = mInputBuffer[mInputPtr+1];

1367                 if (d == 'm') {

1368                     if (avail >= 4

1369                         && mInputBuffer[mInputPtr+2] == 'p'

1370                         && mInputBuffer[mInputPtr+3] == ';') {

1371                         mInputPtr += 4;

1372                         return '&';

1373                     }

1374                 } else if (d == 'p') {

1375                     if (avail >= 5

1376                         && mInputBuffer[mInputPtr+2] == 'o'

1377                         && mInputBuffer[mInputPtr+3] == 's'

1378                         && mInputBuffer[mInputPtr+4] == ';') {

1379                         mInputPtr += 5;

1380                         return '\'';

1381                     }

1382                 }

1383             } else if (c == 'l') {

1384                 if (avail >= 3

1385                     && mInputBuffer[mInputPtr+1] == 't'

1386                     && mInputBuffer[mInputPtr+2] == ';') {

1387                     mInputPtr += 3;

1388                     return '<';

1389                 }

1390             } else if (c == 'g') {

1391                 if (avail >= 3

1392                     && mInputBuffer[mInputPtr+1] == 't'

1393                     && mInputBuffer[mInputPtr+2] == ';') {

1394                     mInputPtr += 3;

1395                     return '>';

1396                 }

1397             } else if (c == 'q') {

1398                 if (avail >= 5

1399                     && mInputBuffer[mInputPtr+1] == 'u'

1400                     && mInputBuffer[mInputPtr+2] == 'o'

1401                     && mInputBuffer[mInputPtr+3] == 't'

1402                     && mInputBuffer[mInputPtr+4] == ';') {

1403                     mInputPtr += 5;

1404                     return '"';

1405                 }

1406             }

1407         }

1408         return 0;

1409     }

1410 

1411     /**

1412      * Reverse of {@link #resolveCharOnlyEntity}; will only resolve entity

1413      * if it is NOT a character entity (or pre-defined 'generic' entity;

1414      * amp, apos, lt, gt or quot). Only used in cases where entities

1415      * are to be separately returned unexpanded (in non-entity-replacing

1416      * mode); which means it's never called from dtd handler.

1417      */

1418     protected EntityDecl resolveNonCharEntity()

1419         throws XMLStreamException

1420     {

1421         //int avail = inputInBuffer();

1422         int avail = mInputEnd - mInputPtr;

1423         if (avail < 6) {

1424             // split entity, or buffer boundary

1425             /* Don't want to lose leading '&' (in case we can not expand

1426              * the entity), so let's push it back first

1427              */

1428             --mInputPtr;

1429 

1430             /* Shortest valid reference would be 3 chars ('&a;'); which

1431              * would only be legal from an expanded entity...

1432              */

1433             if (!ensureInput(6)) {

1434                 avail = inputInBuffer();

1435                 if (avail < 3) {

1436                     throwUnexpectedEOF(SUFFIX_IN_ENTITY_REF);

1437                 }

1438             } else {

1439                 avail = 6;

1440             }

1441             // ... and now we can move pointer back as well:

1442             ++mInputPtr;

1443         }

1444 

1445         // We don't care about char entities:

1446         char c = mInputBuffer[mInputPtr];

1447         if (c == '#') {

1448             return null;

1449         }

1450 

1451         /* 19-Aug-2004, TSa: Need special handling for pre-defined

1452          *   entities; they are not counted as 'real' general parsed

1453          *   entities, but more as character entities...

1454          */

1455 

1456         // have chars at least up to mInputPtr+4 by now

1457         if (c == 'a') {

1458             char d = mInputBuffer[mInputPtr+1];

1459             if (d == 'm') {

1460                 if (avail >= 4

1461                     && mInputBuffer[mInputPtr+2] == 'p'

1462                     && mInputBuffer[mInputPtr+3] == ';') {

1463                     // If not automatically expanding:

1464                     //return sEntityAmp;

1465                     // mInputPtr += 4;

1466                     return null;

1467                 }

1468             } else if (d == 'p') {

1469                 if (avail >= 5

1470                     && mInputBuffer[mInputPtr+2] == 'o'

1471                     && mInputBuffer[mInputPtr+3] == 's'

1472                     && mInputBuffer[mInputPtr+4] == ';') {

1473                     return null;

1474                 }

1475             }

1476         } else if (c == 'l') {

1477             if (avail >= 3

1478                 && mInputBuffer[mInputPtr+1] == 't'

1479                 && mInputBuffer[mInputPtr+2] == ';') {

1480                 return null;

1481             }

1482         } else if (c == 'g') {

1483             if (avail >= 3

1484                 && mInputBuffer[mInputPtr+1] == 't'

1485                 && mInputBuffer[mInputPtr+2] == ';') {

1486                 return null;

1487             }

1488         } else if (c == 'q') {

1489             if (avail >= 5

1490                 && mInputBuffer[mInputPtr+1] == 'u'

1491                 && mInputBuffer[mInputPtr+2] == 'o'

1492                 && mInputBuffer[mInputPtr+3] == 't'

1493                 && mInputBuffer[mInputPtr+4] == ';') {

1494                 return null;

1495             }

1496         }

1497 

1498         // Otherwise, let's just parse in generic way:

1499         ++mInputPtr; // since we already read the first letter

1500         String id = parseEntityName(c);

1501         mCurrName = id;

1502 

1503         return findEntity(id, null);

1504     }

1505 

1506     /**

1507      * Method that does full resolution of an entity reference, be it

1508      * character entity, internal entity or external entity, including

1509      * updating of input buffers, and depending on whether result is

1510      * a character entity (or one of 5 pre-defined entities), returns

1511      * char in question, or null character (code 0) to indicate it had

1512      * to change input source.

1513      *

1514      * @param allowExt If true, is allowed to expand external entities

1515      *   (expanding text); if false, is not (expanding attribute value).

1516      *

1517      * @return Either single-character replacement (which is NOT to be

1518      *    reparsed), or null char (0) to indicate expansion is done via

1519      *    input source.

1520      */

1521     protected int fullyResolveEntity(boolean allowExt)

1522         throws XMLStreamException

1523     {

1524         char c = getNextCharFromCurrent(SUFFIX_IN_ENTITY_REF);

1525         // Do we have a (numeric) character entity reference?

1526         if (c == '#') { // numeric

1527             final StringBuffer originalSurface = new StringBuffer("#");

1528             int ch = resolveCharEnt(originalSurface);

1529             if (mCfgTreatCharRefsAsEntities) {

1530                 final char[] originalChars = new char[originalSurface.length()];

1531                 originalSurface.getChars(0, originalSurface.length(), originalChars, 0);

1532                 mCurrEntity = getIntEntity(ch, originalChars);

1533                 return 0;

1534             }

1535             return ch;

1536         }

1537 

1538         String id = parseEntityName(c);

1539  

1540         // Perhaps we have a pre-defined char reference?

1541         c = id.charAt(0);

1542         /*

1543          * 16-May-2004, TSa: Should custom entities (or ones defined in int/ext subset) override

1544          * pre-defined settings for these?

1545          */

1546         char d = CHAR_NULL;

1547         if (c == 'a') { // amp or apos?

1548             if (id.equals("amp")) {

1549                 d = '&';

1550             } else if (id.equals("apos")) {

1551                 d = '\'';

1552             }

1553         } else if (c == 'g') { // gt?

1554             if (id.length() == 2 && id.charAt(1) == 't') {

1555                 d = '>';

1556             }

1557         } else if (c == 'l') { // lt?

1558             if (id.length() == 2 && id.charAt(1) == 't') {

1559                 d = '<';

1560             }

1561         } else if (c == 'q') { // quot?

1562             if (id.equals("quot")) {

1563                 d = '"';

1564             }

1565         }

1566 

1567         if (d != CHAR_NULL) {

1568             if (mCfgTreatCharRefsAsEntities) {

1569                 final char[] originalChars = new char[id.length()];

1570                 id.getChars(0, id.length(), originalChars, 0);

1571                 mCurrEntity = getIntEntity(d, originalChars);

1572                 return 0;

1573             }

1574             return d;

1575         }

1576 

1577         final EntityDecl e = expandEntity(id, allowExt, null);

1578         if (mCfgTreatCharRefsAsEntities) {

1579             mCurrEntity = e;

1580         }

1581         return 0;

1582     }

1583 

1584     /**

1585      * Returns an entity (possibly from cache) for the argument character using the encoded

1586      * representation in mInputBuffer[entityStartPos ... mInputPtr-1].

1587      */

1588     protected EntityDecl getIntEntity(int ch, final char[] originalChars)

1589     {

1590         String cacheKey = new String(originalChars);

1591 

1592         IntEntity entity = mCachedEntities.get(cacheKey);

1593         if (entity == null) {

1594             String repl;

1595             if (ch <= 0xFFFF) {

1596                 repl = Character.toString((char) ch);

1597             } else {

1598                 StringBuffer sb = new StringBuffer(2);

1599                 ch -= 0x10000;

1600                 sb.append((char) ((ch >> 10)  + 0xD800));

1601                 sb.append((char) ((ch & 0x3FF)  + 0xDC00));

1602                 repl = sb.toString();

1603             }

1604             entity = IntEntity.create(new String(originalChars), repl);

1605             mCachedEntities.put(cacheKey, entity);

1606         }

1607         return entity;

1608     }

1609 

1610 

1611     /**

1612      * Helper method that will try to expand a parsed entity (parameter or

1613      * generic entity).

1614      *<p>

1615      * note: called by sub-classes (dtd parser), needs to be protected.

1616      *

1617      * @param id Name of the entity being expanded 

1618      * @param allowExt Whether external entities can be expanded or not; if

1619      *   not, and the entity to expand would be external one, an exception

1620      *   will be thrown

1621      */

1622     protected EntityDecl expandEntity(String id, boolean allowExt,

1623                                       Object extraArg)

1624         throws XMLStreamException

1625     {

1626         mCurrName = id;

1627 

1628         EntityDecl ed = findEntity(id, extraArg);

1629 

1630         if (ed == null) {

1631             /* 30-Sep-2005, TSa: As per [WSTX-5], let's only throw exception

1632              *   if we have to resolve it (otherwise it's just best-effort, 

1633              *   and null is ok)

1634              */

1635             /* 02-Oct-2005, TSa: Plus, [WSTX-4] adds "undeclared entity

1636              *    resolver"

1637              */

1638             if (mCfgReplaceEntities) {

1639                 mCurrEntity = expandUnresolvedEntity(id);

1640             }

1641             return null;

1642         }

1643         

1644         if (!mCfgTreatCharRefsAsEntities || this instanceof MinimalDTDReader) {

1645             expandEntity(ed, allowExt);

1646         }

1647         

1648         return ed;

1649     }

1650 

1651     /**

1652      *<p>

1653      * note: defined as private for documentation, ie. it's just called

1654      * from within this class (not sub-classes), from one specific method

1655      * (see above)

1656      *

1657      * @param ed Entity to be expanded

1658      * @param allowExt Whether external entities are allowed or not.

1659      */

1660     private void expandEntity(EntityDecl ed, boolean allowExt)

1661         throws XMLStreamException

1662     {

1663         String id = ed.getName();

1664 

1665         /* Very first thing; we can immediately check if expanding

1666          * this entity would result in infinite recursion:

1667          */

1668         if (mInput.isOrIsExpandedFrom(id)) {

1669             throwRecursionError(id);

1670         }

1671 

1672         /* Should not refer unparsed entities from attribute values

1673          * or text content (except via notation mechanism, but that's

1674          * not parsed here)

1675          */

1676         if (!ed.isParsed()) {

1677             throwParseError("Illegal reference to unparsed external entity \"{0}\"", id, null);

1678         }

1679 

1680         // 28-Jun-2004, TSa: Do we support external entity expansion?

1681         boolean isExt = ed.isExternal();

1682         if (isExt) {

1683             if (!allowExt) { // never ok in attribute value...

1684                 throwParseError("Encountered a reference to external parsed entity \"{0}\" when expanding attribute value: not legal as per XML 1.0/1.1 #3.1", id, null);

1685             }

1686             if (!mConfig.willSupportExternalEntities()) {

1687                 throwParseError("Encountered a reference to external entity \"{0}\", but stream reader has feature \"{1}\" disabled",

1688                                 id, XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES);

1689             }

1690         }

1691         verifyLimit("Maximum entity expansion count", mConfig.getMaxEntityCount(), ++mEntityExpansionCount);

1692         // First, let's give current context chance to save its stuff

1693         WstxInputSource oldInput = mInput;

1694         oldInput.saveContext(this);

1695         WstxInputSource newInput = null;

1696         try {

1697             newInput = ed.expand(oldInput, mEntityResolver, mConfig, mDocXmlVersion);

1698         } catch (FileNotFoundException fex) {

1699             /* Let's catch and rethrow this just so we get more meaningful

1700              * description (with input source position etc)

1701              */

1702             throwParseError("(was {0}) {1}", fex.getClass().getName(), fex.getMessage());

1703         } catch (IOException ioe) {

1704             throw constructFromIOE(ioe);

1705         }

1706         /* And then we'll need to make sure new input comes from the new

1707          * input source

1708          */

1709         initInputSource(newInput, isExt, id);

1710     }

1711 

1712     /**

1713      *<p>

1714      * note: only called from the local expandEntity() method

1715      */

1716     private EntityDecl expandUnresolvedEntity(String id)

1717         throws XMLStreamException

1718     {

1719         XMLResolver resolver = mConfig.getUndeclaredEntityResolver();

1720         if (resolver != null) {

1721             /* Ok, we can check for recursion here; but let's only do that

1722              * if there is any chance that it might get resolved by

1723              * the special resolver (it must have been resolved this way

1724              * earlier, too...)

1725              */

1726             if (mInput.isOrIsExpandedFrom(id)) {

1727                 throwRecursionError(id);

1728             }

1729 

1730             WstxInputSource oldInput = mInput;

1731             oldInput.saveContext(this);

1732             // null, null -> no public or system ids

1733             int xmlVersion = mDocXmlVersion;

1734             // 05-Feb-2006, TSa: If xmlVersion not explicitly known, defaults to 1.0

1735             if (xmlVersion == XmlConsts.XML_V_UNKNOWN) {

1736                 xmlVersion = XmlConsts.XML_V_10;

1737             }

1738             WstxInputSource newInput;

1739             try {

1740                 newInput = DefaultInputResolver.resolveEntityUsing

1741                     (oldInput, id, null, null, resolver, mConfig, xmlVersion);

1742                 if (mCfgTreatCharRefsAsEntities) {

1743                     return new IntEntity(WstxInputLocation.getEmptyLocation(), newInput.getEntityId(),

1744                             newInput.getSource(), new char[]{}, WstxInputLocation.getEmptyLocation());

1745                 }

1746             } catch (IOException ioe) {

1747                 throw constructFromIOE(ioe);

1748             }

1749             if (newInput != null) {

1750                 // true -> is external

1751                 initInputSource(newInput, true, id);

1752                 return null;

1753             }

1754         }

1755         handleUndeclaredEntity(id);

1756         return null;

1757     }

1758 

1759     /*

1760     ///////////////////////////////////////////////////////////////////////

1761     // Abstract methods for sub-classes to implement

1762     ///////////////////////////////////////////////////////////////////////

1763      */

1764 

1765     /**

1766      * Abstract method for sub-classes to implement, for finding

1767      * a declared general or parsed entity.

1768      *

1769      * @param id Identifier of the entity to find

1770      * @param arg Optional argument passed from caller; needed by DTD

1771      *    reader.

1772      */

1773     protected abstract EntityDecl findEntity(String id, Object arg)

1774         throws XMLStreamException;

1775 

1776     /**

1777      * This method gets called if a declaration for an entity was not

1778      * found in entity expanding mode (enabled by default for xml reader,

1779      * always enabled for dtd reader).

1780      */

1781     protected abstract void handleUndeclaredEntity(String id)

1782         throws XMLStreamException;

1783 

1784     protected abstract void handleIncompleteEntityProblem(WstxInputSource closing)

1785         throws XMLStreamException;

1786 

1787     /*

1788     ///////////////////////////////////////////////////////////////////////

1789     // Basic tokenization

1790     ///////////////////////////////////////////////////////////////////////

1791      */

1792 

1793     /**

1794      * Method that will parse name token (roughly equivalent to XML specs;

1795      * although bit lenier for more efficient handling); either uri prefix,

1796      * or local name.

1797      *<p>

1798      * Much of complexity in this method has to do with the intention to 

1799      * try to avoid any character copies. In this optimal case algorithm

1800      * would be fairly simple. However, this only works if all data is

1801      * already in input buffer... if not, copy has to be made halfway

1802      * through parsing, and that complicates things.

1803      *<p>

1804      * One thing to note is that String returned has been canonicalized

1805      * and (if necessary) added to symbol table. It can thus be compared

1806      * against other such (usually id) Strings, with simple equality operator.

1807      *

1808      * @param c First character of the name; not yet checked for validity

1809      *

1810      * @return Canonicalized name String (which may have length 0, if

1811      *    EOF or non-name-start char encountered)

1812      */

1813     protected String parseLocalName(char c)

1814         throws XMLStreamException

1815     {

1816         /* Has to start with letter, or '_' (etc); we won't allow ':' as that

1817          * is taken as namespace separator; no use trying to optimize

1818          * heavily as it's 98% likely it is a valid char...

1819          */

1820         if (!isNameStartChar(c)) {

1821             if (c == ':') {

1822                 throwUnexpectedChar(c, " (missing namespace prefix?)");

1823             }

1824             throwUnexpectedChar(c, " (expected a name start character)");

1825         }

1826 

1827         int ptr = mInputPtr;

1828         int hash = c;

1829         final int inputLen = mInputEnd;

1830         int startPtr = ptr-1; // already read previous char

1831         final char[] inputBuf = mInputBuffer;

1832 

1833         /* After which there may be zero or more name chars

1834          * we have to consider

1835          */

1836         while (true) {

1837             if (ptr >= inputLen) {

1838                 /* Ok, identifier may continue past buffer end, need

1839                  * to continue with part 2 (separate method, as this is

1840                  * not as common as having it all in buffer)

1841                  */

1842                 mInputPtr = ptr;

1843                 return parseLocalName2(startPtr, hash);

1844             }

1845             // Ok, we have the char... is it a name char?

1846             c = inputBuf[ptr];

1847             if (c < CHAR_LOWEST_LEGAL_LOCALNAME_CHAR) {

1848                 break;

1849             }

1850             if (!isNameChar(c)) {

1851                 break;

1852             }

1853             hash = (hash * 31) + c;

1854             ++ptr;

1855         }

1856         mInputPtr = ptr;

1857         return mSymbols.findSymbol(mInputBuffer, startPtr, ptr - startPtr, hash);

1858     }

1859 

1860     /**

1861      * Second part of name token parsing; called when name can continue

1862      * past input buffer end (so only part was read before calling this

1863      * method to read the rest).

1864      *<p>

1865      * Note that this isn't heavily optimized, on assumption it's not

1866      * called very often.

1867      */

1868     protected String parseLocalName2(int start, int hash)

1869         throws XMLStreamException

1870     {

1871         int ptr = mInputEnd - start;

1872         // Let's assume fairly short names

1873         char[] outBuf = getNameBuffer(ptr+8);

1874 

1875         if (ptr > 0) {

1876             System.arraycopy(mInputBuffer, start, outBuf, 0, ptr);

1877         }

1878 

1879         int outLen = outBuf.length;

1880         while (true) {

1881             // note: names can not cross input block (entity) boundaries...

1882             if (mInputPtr >= mInputEnd) {

1883                 if (!loadMoreFromCurrent()) {

1884                     break;

1885                 }

1886             }

1887             char c = mInputBuffer[mInputPtr];

1888             if (c < CHAR_LOWEST_LEGAL_LOCALNAME_CHAR) {

1889                 break;

1890             }

1891             if (!isNameChar(c)) {

1892                 break;

1893             }

1894             ++mInputPtr;

1895             if (ptr >= outLen) {

1896                 mNameBuffer = outBuf = expandBy50Pct(outBuf);

1897                 outLen = outBuf.length;

1898             }

1899             outBuf[ptr++] = c;

1900             hash = (hash * 31) + c;

1901         }

1902         // Still need to canonicalize the name:

1903         return mSymbols.findSymbol(outBuf, 0, ptr, hash);

1904     }

1905 

1906     /**

1907      * Method that will parse 'full' name token; what full means depends on

1908      * whether reader is namespace aware or not. If it is, full name means

1909      * local name with no namespace prefix (PI target, entity/notation name);

1910      * if not, name can contain arbitrary number of colons. Note that

1911      * element and attribute names are NOT parsed here, so actual namespace

1912      * prefix separation can be handled properly there.

1913      *<p>

1914      * Similar to {@link #parseLocalName}, much of complexity stems from

1915      * trying to avoid copying name characters from input buffer.

1916      *<p>

1917      * Note that returned String will be canonicalized, similar to

1918      * {@link #parseLocalName}, but without separating prefix/local name.

1919       *

1920      * @return Canonicalized name String (which may have length 0, if

1921      *    EOF or non-name-start char encountered)

1922      */

1923     protected String parseFullName()

1924         throws XMLStreamException

1925     {

1926         if (mInputPtr >= mInputEnd) {

1927             loadMoreFromCurrent();

1928         }

1929         return parseFullName(mInputBuffer[mInputPtr++]);

1930     }

1931 

1932     protected String parseFullName(char c)

1933         throws XMLStreamException

1934     {

1935         // First char has special handling:

1936         if (!isNameStartChar(c)) {

1937             if (c == ':') { // no name.... generally an error:

1938                 if (mCfgNsEnabled) {

1939                     throwNsColonException(parseFNameForError());

1940                 }

1941                 // Ok, that's fine actually

1942             } else {

1943                 if (c <= CHAR_SPACE) {

1944                     throwUnexpectedChar(c, " (missing name?)");

1945                 }

1946                 throwUnexpectedChar(c, " (expected a name start character)");

1947             }

1948         }

1949 

1950         int ptr = mInputPtr;

1951         int hash = c;

1952         int inputLen = mInputEnd;

1953         int startPtr = ptr-1; // to account for the first char

1954 

1955         /* After which there may be zero or more name chars

1956          * we have to consider

1957          */

1958         while (true) {

1959             if (ptr >= inputLen) {

1960                 /* Ok, identifier may continue past buffer end, need

1961                  * to continue with part 2 (separate method, as this is

1962                  * not as common as having it all in buffer)

1963                  */

1964                 mInputPtr = ptr;

1965                 return parseFullName2(startPtr, hash);

1966             }

1967             c = mInputBuffer[ptr];

1968             if (c == ':') { // colon only allowed in non-NS mode

1969                 if (mCfgNsEnabled) {

1970                     mInputPtr = ptr;

1971                     throwNsColonException(new String(mInputBuffer, startPtr, ptr - startPtr) + parseFNameForError());

1972                 }

1973             } else {

1974                 if (c < CHAR_LOWEST_LEGAL_LOCALNAME_CHAR) {

1975                     break;

1976                 }

1977                 if (!isNameChar(c)) {

1978                     break;

1979                 }

1980             }

1981             hash = (hash * 31) + c;

1982             ++ptr;

1983         }

1984         mInputPtr = ptr;

1985         return mSymbols.findSymbol(mInputBuffer, startPtr, ptr - startPtr, hash);

1986     }

1987 

1988     @SuppressWarnings("cast")

1989     protected String parseFullName2(int start, int hash)

1990         throws XMLStreamException

1991     {

1992         int ptr = mInputEnd - start;

1993         // Let's assume fairly short names

1994         char[] outBuf = getNameBuffer(ptr+8);

1995 

1996         if (ptr > 0) {

1997             System.arraycopy(mInputBuffer, start, outBuf, 0, ptr);

1998         }

1999 

2000         int outLen = outBuf.length;

2001         while (true) {

2002             /* 06-Sep-2004, TSa: Name tokens are not allowed to continue

2003              *   past entity expansion ranges... that is, all characters

2004              *   have to come from the same input source. Thus, let's only

2005              *   load things from same input level

2006              */

2007             if (mInputPtr >= mInputEnd) {

2008                 if (!loadMoreFromCurrent()) {

2009                     break;

2010                 }

2011             }

2012             char c = mInputBuffer[mInputPtr];

2013             if (c == ':') { // colon only allowed in non-NS mode

2014                 if (mCfgNsEnabled) {

2015                     throwNsColonException(new String(outBuf, 0, ptr) + c + parseFNameForError());

2016                 }

2017             } else if (c < CHAR_LOWEST_LEGAL_LOCALNAME_CHAR) {

2018                 break;

2019             } else if (!isNameChar(c)) {

2020                 break;

2021             }

2022             ++mInputPtr;

2023 

2024             if (ptr >= outLen) {

2025                 mNameBuffer = outBuf = expandBy50Pct(outBuf);

2026                 outLen = outBuf.length;

2027             }

2028             outBuf[ptr++] = c;

2029             hash = (hash * 31) + (int) c;

2030         }

2031 

2032         // Still need to canonicalize the name:

2033         return mSymbols.findSymbol(outBuf, 0, ptr, hash);

2034     }

2035 

2036     /**

2037      * Method called to read in full name, including unlimited number of

2038      * namespace separators (':'), for the purpose of displaying name in

2039      * an error message. Won't do any further validations, and parsing

2040      * is not optimized: main need is just to get more meaningful error

2041      * messages.

2042      */

2043     protected String parseFNameForError()

2044         throws XMLStreamException

2045     {

2046         StringBuilder sb = new StringBuilder(100);

2047         while (true) {

2048             char c;

2049 

2050             if (mInputPtr < mInputEnd) {

2051                 c = mInputBuffer[mInputPtr++];

2052             } else { // can't error here, so let's accept EOF for now:

2053                 int i = getNext();

2054                 if (i < 0) {

2055                     break;

2056                 }

2057                 c = (char) i;

2058             }

2059             if (c != ':' && !isNameChar(c)) {

2060                 --mInputPtr;

2061                 break;

2062             }

2063             sb.append(c);

2064         }

2065         return sb.toString();

2066     }

2067 

2068     protected final String parseEntityName(char c)

2069         throws XMLStreamException

2070     {

2071         String id = parseFullName(c);

2072         // Needs to be followed by a semi-colon, too.. from same input source:

2073         if (mInputPtr >= mInputEnd) {

2074             if (!loadMoreFromCurrent()) {

2075                 throwParseError("Missing semicolon after reference for entity \"{0}\"", id, null);

2076             }

2077         }

2078         c = mInputBuffer[mInputPtr++];

2079         if (c != ';') {

2080             throwUnexpectedChar(c, "; expected a semi-colon after the reference for entity '"+id+"'");

2081         }

2082         return id;

2083     }

2084     

2085     /**

2086      * Note: does not check for number of colons, amongst other things.

2087      * Main idea is to skip through what superficially seems like a valid

2088      * id, nothing more. This is only done when really skipping through

2089      * something we do not care about at all: not even whether names/ids

2090      * would be valid (for example, when ignoring internal DTD subset).

2091      *

2092      * @return Length of skipped name.

2093      */

2094     protected int skipFullName(char c)

2095         throws XMLStreamException

2096     {

2097         if (!isNameStartChar(c)) {

2098             --mInputPtr;

2099             return 0;

2100         }

2101 

2102         /* After which there may be zero or more name chars

2103          * we have to consider

2104          */

2105         int count = 1;

2106         while (true) {

2107             c = (mInputPtr < mInputEnd) ?

2108                 mInputBuffer[mInputPtr++] : getNextChar(SUFFIX_EOF_EXP_NAME);

2109             if (c != ':' && !isNameChar(c)) {

2110                 break;

2111             }

2112             ++count;

2113         }

2114         return count;

2115     }

2116 

2117     /**

2118      * Simple parsing method that parses system ids, which are generally

2119      * used in entities (from DOCTYPE declaration to internal/external

2120      * subsets).

2121      *<p>

2122      * NOTE: returned String is not canonicalized, on assumption that

2123      * external ids may be longish, and are not shared all that often, as

2124      * they are generally just used for resolving paths, if anything.

2125      *<br>

2126      * Also note that this method is not heavily optimized, as it's not

2127      * likely to be a bottleneck for parsing.

2128      */

2129     protected final String parseSystemId(char quoteChar, boolean convertLFs,

2130                                          String errorMsg)

2131         throws XMLStreamException

2132     {

2133         char[] buf = getNameBuffer(-1);

2134         int ptr = 0;

2135 

2136         while (true) {

2137             char c = (mInputPtr < mInputEnd) ?

2138                 mInputBuffer[mInputPtr++] : getNextChar(errorMsg);

2139             if (c == quoteChar) {

2140                 break;

2141             }

2142             /* ??? 14-Jun-2004, TSa: Should we normalize linefeeds or not?

2143              *   It seems like we should, for all input... so that's the way it

2144              *   works.

2145              */

2146             if (c == '\n') {

2147                 markLF();

2148             } else if (c == '\r') {

2149                 if (peekNext() == '\n') {

2150                     ++mInputPtr;

2151                     if (!convertLFs) {

2152                         /* The only tricky thing; need to preserve 2-char LF; need to

2153                          * output one char from here, then can fall back to default:

2154                          */

2155                         if (ptr >= buf.length) {

2156                             buf = expandBy50Pct(buf);

2157                         }

2158                         buf[ptr++] = '\r';

2159                     }

2160                     c = '\n';

2161                 } else if (convertLFs) {

2162                     c = '\n';

2163                 }

2164             }

2165 

2166             // Other than that, let's just append it:

2167             if (ptr >= buf.length) {

2168                 buf = expandBy50Pct(buf);

2169             }

2170             buf[ptr++] = c;

2171         }

2172 

2173         return (ptr == 0) ? "" : new String(buf, 0, ptr);

2174     }

2175 

2176     /**

2177      * Simple parsing method that parses system ids, which are generally

2178      * used in entities (from DOCTYPE declaration to internal/external

2179      * subsets).

2180      *<p>

2181      * As per xml specs, the contents are actually normalized.

2182      *<p>

2183      * NOTE: returned String is not canonicalized, on assumption that

2184      * external ids may be longish, and are not shared all that often, as

2185      * they are generally just used for resolving paths, if anything.

2186      *<br>

2187      * Also note that this method is not heavily optimized, as it's not

2188      * likely to be a bottleneck for parsing.

2189      */

2190     protected final String parsePublicId(char quoteChar, String errorMsg)

2191         throws XMLStreamException

2192     {

2193         char[] buf = getNameBuffer(-1);

2194         int ptr = 0;

2195         boolean spaceToAdd = false;

2196 

2197         while (true) {

2198             char c = (mInputPtr < mInputEnd) ?

2199                 mInputBuffer[mInputPtr++] : getNextChar(errorMsg);

2200             if (c == quoteChar) {

2201                 break;

2202             }

2203             if (c == '\n') {

2204                 markLF();

2205                 spaceToAdd = true;

2206                 continue;

2207             } else if (c == '\r') {

2208                 if (peekNext() == '\n') {

2209                     ++mInputPtr;

2210                 }

2211                 spaceToAdd = true;

2212                 continue;

2213             } else if (c == CHAR_SPACE) {

2214                 spaceToAdd = true;

2215                 continue;

2216             } else {

2217                 // Verify it's a legal pubid char (see XML spec, #13, from 2.3)

2218                 if ((c >= VALID_PUBID_CHAR_COUNT)

2219                     || sPubidValidity[c] != PUBID_CHAR_VALID_B) {

2220                     throwUnexpectedChar(c, " in public identifier");

2221                 }

2222             }

2223         

2224             // Other than that, let's just append it:

2225             if (ptr >= buf.length) {

2226                 buf = expandBy50Pct(buf);

2227             }

2228             /* Space-normalization means scrapping leading and trailing

2229              * white space, and coalescing remaining ws into single spaces.

2230              */

2231             if (spaceToAdd) { // pending white space to add?

2232                 if (c == CHAR_SPACE) { // still a space; let's skip

2233                     continue;

2234                 }

2235                 /* ok: if we have non-space, we'll either forget about

2236                  * space(s) (if nothing has been output, ie. leading space),

2237                  * or output a single space (in-between non-white space)

2238                  */

2239                 spaceToAdd = false;

2240                 if (ptr > 0) {

2241                     buf[ptr++] = CHAR_SPACE;

2242                     if (ptr >= buf.length) {

2243                         buf = expandBy50Pct(buf);

2244                     }

2245                 }

2246             }

2247             buf[ptr++] = c;

2248         }

2249       

2250         return (ptr == 0) ? "" : new String(buf, 0, ptr);

2251     }

2252 

2253     protected final void parseUntil(TextBuffer tb, char endChar, boolean convertLFs,

2254                                     String errorMsg)

2255         throws XMLStreamException

2256     {

2257         // Let's first ensure we have some data in there...

2258         if (mInputPtr >= mInputEnd) {

2259             loadMore(errorMsg);

2260         }

2261         while (true) {

2262             // Let's loop consequtive 'easy' spans:

2263             char[] inputBuf = mInputBuffer;

2264             int inputLen = mInputEnd;

2265             int ptr = mInputPtr;

2266             int startPtr = ptr;

2267             while (ptr < inputLen) {

2268                 char c = inputBuf[ptr++];

2269                 if (c == endChar) {

2270                     int thisLen = ptr - startPtr - 1;

2271                     if (thisLen > 0) {

2272                         tb.append(inputBuf, startPtr, thisLen);

2273                     }

2274                     mInputPtr = ptr;

2275                     return;

2276                 }

2277                 if (c == '\n') {

2278                     mInputPtr = ptr; // markLF() requires this

2279                     markLF();

2280                 } else if (c == '\r') {

2281                     if (!convertLFs && ptr < inputLen) {

2282                         if (inputBuf[ptr] == '\n') {

2283                             ++ptr;

2284                         }

2285                         mInputPtr = ptr;

2286                         markLF();

2287                     } else {

2288                         int thisLen = ptr - startPtr - 1;

2289                         if (thisLen > 0) {

2290                             tb.append(inputBuf, startPtr, thisLen);

2291                         }

2292                         mInputPtr = ptr;

2293                         c = getNextChar(errorMsg);

2294                         if (c != '\n') {

2295                             --mInputPtr; // pusback

2296                             tb.append(convertLFs ? '\n' : '\r');

2297                         } else {

2298                             if (convertLFs) {

2299                                 tb.append('\n');

2300                             } else {

2301                                 tb.append('\r');

2302                                 tb.append('\n');

2303                             }

2304                         }

2305                         startPtr = ptr = mInputPtr;

2306                         markLF();

2307                     }

2308                 }

2309             }

2310             int thisLen = ptr - startPtr;

2311             if (thisLen > 0) {

2312                 tb.append(inputBuf, startPtr, thisLen);

2313             }

2314             loadMore(errorMsg);

2315             startPtr = ptr = mInputPtr;

2316             inputBuf = mInputBuffer;

2317             inputLen = mInputEnd;

2318         }

2319     }

2320 

2321     /*

2322     ///////////////////////////////////////////////////////////////////////

2323     // Internal methods

2324     ///////////////////////////////////////////////////////////////////////

2325      */

2326 

2327     private int resolveCharEnt(StringBuffer originalCharacters)

2328         throws XMLStreamException

2329     {

2330         int value = 0;

2331         char c = getNextChar(SUFFIX_IN_ENTITY_REF);

2332         

2333         if (originalCharacters != null) {

2334             originalCharacters.append(c);

2335         }

2336         

2337         if (c == 'x') { // hex

2338             while (true) {

2339                 c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++]

2340                     : getNextCharFromCurrent(SUFFIX_IN_ENTITY_REF);

2341                 if (c == ';') {

2342                     break;

2343                 }

2344                 

2345                 if (originalCharacters != null) {

2346                     originalCharacters.append(c);

2347                 }

2348                 value = value << 4;

2349                 if (c <= '9' && c >= '0') {

2350                     value += (c - '0');

2351                 } else if (c >= 'a' && c <= 'f') {

2352                     value += 10 + (c - 'a');

2353                 } else if (c >= 'A' && c <= 'F') {

2354                     value += 10 + (c - 'A');

2355                 } else {

2356                     throwUnexpectedChar(c, "; expected a hex digit (0-9a-fA-F).");

2357                 }

2358                 // Overflow?

2359                 if (value > MAX_UNICODE_CHAR) {

2360                     reportUnicodeOverflow();

2361                 }

2362             }

2363         } else { // numeric (decimal)

2364             while (c != ';') {

2365                 if (c <= '9' && c >= '0') {

2366                     value = (value * 10) + (c - '0');

2367                     // Overflow?

2368                     if (value > MAX_UNICODE_CHAR) {

2369                         reportUnicodeOverflow();

2370                     }

2371                 } else {

2372                     throwUnexpectedChar(c, "; expected a decimal number.");

2373                 }

2374                 c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++]

2375                     : getNextCharFromCurrent(SUFFIX_IN_ENTITY_REF);

2376                 

2377                 if (originalCharacters != null && c != ';') {

2378                     originalCharacters.append(c);

2379                 }

2380             }

2381         }

2382         validateChar(value);

2383         return value;

2384     }

2385 

2386     /**

2387      * Method that will verify that expanded Unicode codepoint is a valid

2388      * XML content character.

2389      */

2390     private final void validateChar(int value)

2391         throws XMLStreamException

2392     {

2393         /* 24-Jan-2006, TSa: Ok, "high" Unicode chars are problematic,

2394          *   need to be reported by a surrogate pair..

2395          */

2396         if (value >= 0xD800) {

2397             if (value < 0xE000) { // no surrogates via entity expansion

2398                 reportIllegalChar(value);

2399             }

2400             if (value > 0xFFFF) {

2401                 // Within valid range at all?

2402                 if (value > MAX_UNICODE_CHAR) {

2403                     reportUnicodeOverflow();

2404                 }

2405             } else if (value >= 0xFFFE) { // 0xFFFE and 0xFFFF are illegal too

2406                 reportIllegalChar(value);

2407             }

2408             // Ok, fine as is

2409         } else if (value < 32) {

2410             if (value == 0) {

2411                 throwParseError("Invalid character reference: null character not allowed in XML content.");

2412             }

2413             // XML 1.1 allows most other chars; 1.0 does not:

2414             if (!mXml11 && !mAllowXml11EscapedCharsInXml10

2415                 && (value != 0x9 && value != 0xA && value != 0xD)) {

2416                 reportIllegalChar(value);

2417             }

2418         }

2419     }

2420 

2421     protected final char[] getNameBuffer(int minSize)

2422     {

2423         char[] buf = mNameBuffer;

2424         

2425         if (buf == null) {

2426             mNameBuffer = buf = new char[(minSize > 48) ? (minSize+16) : 64];

2427         } else if (minSize >= buf.length) { // let's allow one char extra...

2428             int len = buf.length;

2429             len += (len >> 1); // grow by 50%

2430             mNameBuffer = buf = new char[(minSize >= len) ? (minSize+16) : len];

2431         }

2432         return buf;

2433     }

2434     

2435     protected final char[] expandBy50Pct(char[] buf)

2436     {

2437         int len = buf.length;

2438         char[] newBuf = new char[len + (len >> 1)];

2439         System.arraycopy(buf, 0, newBuf, 0, len);

2440         return newBuf;

2441     }

2442 

2443     /**

2444      * Method called to throw an exception indicating that a name that

2445      * should not be namespace-qualified (PI target, entity/notation name)

2446      * is one, and reader is namespace aware.

2447      */

2448     private void throwNsColonException(String name)

2449         throws XMLStreamException

2450     {

2451         throwParseError("Illegal name \"{0}\" (PI target, entity/notation name): can not contain a colon (XML Namespaces 1.0#6)", name, null);

2452     }

2453 

2454     private void throwRecursionError(String entityName)

2455         throws XMLStreamException

2456     {

2457         throwParseError("Illegal entity expansion: entity \"{0}\" expands itself recursively.", entityName, null);

2458     }

2459 

2460     private void reportUnicodeOverflow()

2461         throws XMLStreamException

2462     {

2463         throwParseError("Illegal character entity: value higher than max allowed (0x{0})", Integer.toHexString(MAX_UNICODE_CHAR), null);

2464     }

2465 

2466     private void reportIllegalChar(int value)

2467         throws XMLStreamException

2468     {

2469         throwParseError("Illegal character entity: expansion character (code 0x{0}", Integer.toHexString(value), null);

2470     }

2471 

2472     protected void verifyLimit(String type, long maxValue, long currentValue)

2473         throws XMLStreamException

2474     {

2475         if (currentValue > maxValue) {

2476             throw constructLimitViolation(type, maxValue);

2477         }

2478     }

2479 

2480     protected XMLStreamException constructLimitViolation(String type, long limit)

2481         throws XMLStreamException

2482     {

2483         return new XMLStreamException(type+" limit ("+limit+") exceeded");

2484     }

2485 }

2486