Monitoring JavaMelody on _ip-10-0-10-247.ec2.internal

1 /* Woodstox XML processor

2  *

3  * Copyright (c) 2004- Tatu Saloranta, tatu.saloranta@iki.fi

4  *

5  * Licensed under the License specified in file LICENSE, included with

6  * the source code.

7  * You may not use this file except in compliance with the License.

8  *

9  * Unless required by applicable law or agreed to in writing, software

10  * distributed under the License is distributed on an "AS IS" BASIS,

11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

12  * See the License for the specific language governing permissions and

13  * limitations under the License.

14  */

15 

16 package com.ctc.wstx.sr;

17 

18 import java.io.*;

19 import java.text.MessageFormat;

20 import java.util.Map;

21 

22 import org.xml.sax.Attributes;

23 import org.xml.sax.ContentHandler;

24 import org.xml.sax.SAXException;

25 import org.xml.sax.ext.LexicalHandler;

26 

27 import javax.xml.namespace.NamespaceContext;

28 import javax.xml.namespace.QName;

29 import javax.xml.stream.Location;

30 import javax.xml.stream.XMLStreamException;

31 

32 import org.codehaus.stax2.AttributeInfo;

33 import org.codehaus.stax2.DTDInfo;

34 import org.codehaus.stax2.LocationInfo;

35 import org.codehaus.stax2.XMLStreamLocation2;

36 import org.codehaus.stax2.XMLStreamReader2;

37 import org.codehaus.stax2.typed.TypedXMLStreamException;

38 import org.codehaus.stax2.validation.*;

39 

40 import com.ctc.wstx.api.ReaderConfig;

41 import com.ctc.wstx.api.WstxInputProperties;

42 import com.ctc.wstx.cfg.ErrorConsts;

43 import com.ctc.wstx.cfg.XmlConsts;

44 import com.ctc.wstx.dtd.MinimalDTDReader;

45 import com.ctc.wstx.ent.EntityDecl;

46 import com.ctc.wstx.exc.WstxException;

47 import com.ctc.wstx.io.*;

48 import com.ctc.wstx.util.DefaultXmlSymbolTable;

49 import com.ctc.wstx.util.ExceptionUtil;

50 import com.ctc.wstx.util.TextBuffer;

51 import com.ctc.wstx.util.TextBuilder;

52 

53 /**

54  * Partial implementation of {@link XMLStreamReader2} consisting of

55  * all functionality other than DTD-validation-specific parts, and

56  * Typed Access API (Stax2 v3.0), which are implemented at

57  * sub-classes.

58  *

59  * @author Tatu Saloranta

60  */

61 public abstract class BasicStreamReader

62     extends StreamScanner

63     implements StreamReaderImpl, DTDInfo, LocationInfo

64 {

65     /*

66     ///////////////////////////////////////////////////////////////////////

67     // Constants

68     ///////////////////////////////////////////////////////////////////////

69      */

70 

71     // // // Standalone values:

72 

73     final static int DOC_STANDALONE_UNKNOWN = 0;

74     final static int DOC_STANDALONE_YES = 1;

75     final static int DOC_STANDALONE_NO = 2;

76 

77     // // // Main state consts:

78 

79     final static int STATE_PROLOG = 0; // Before root element

80     final static int STATE_TREE = 1; // Parsing actual XML tree

81     final static int STATE_EPILOG = 2; // After root element has been closed

82     final static int STATE_MULTIDOC_HACK = 3; // State "between" multiple documents (in multi-doc mode)

83     final static int STATE_CLOSED = 4; // After reader has been closed

84 

85     // // // Tokenization state consts:

86 

87     // no idea as to what comes next (unknown type):

88     final static int TOKEN_NOT_STARTED = 0;

89 

90     // token type figured out, but not long enough:

91     final static int TOKEN_STARTED = 1;

92 

93     /* minimum token length returnable achieved; only used for

94      * CHARACTERS event which allow fragments to be returned (and for

95      * CDATA in some limited cases)

96      */

97     final static int TOKEN_PARTIAL_SINGLE = 2;

98 

99     /* a single physical event has been successfully tokenized; as with

100      * partial, only used with CDATA and CHARACTERS (meaningless for others,

101      * which should only use TOKEN_FULL_COALESCED, TOKEN_NOT_STARTED or

102      * TOKEN_STARTED.

103      */

104     final static int TOKEN_FULL_SINGLE = 3;

105 

106     /* all adjacent (text) events have been tokenized and coalesced (for

107      * CDATA and CHARACTERS), or that the full event has been parsed (for

108      * others)

109      */

110     final static int TOKEN_FULL_COALESCED = 4;

111 

112     // // // Bit masks used for quick type comparisons

113 

114     /**

115      * This mask covers all types for which basic {@link #getText} method

116      * can be called.

117      */

118     final protected static int MASK_GET_TEXT = 

119         (1 << CHARACTERS) | (1 << CDATA) | (1 << SPACE)

120         | (1 << COMMENT) | (1 << DTD) | (1 << ENTITY_REFERENCE);

121 

122     /**

123      * This mask covers all types for which extends <code>getTextXxx</code>

124      * methods can be called; which is less than those for which 

125      * {@link #getText} can be called. Specifically, <code>DTD</code> and

126      * <code>ENTITY_REFERENCE</code> types do not support these extended

127      */

128     final protected static int MASK_GET_TEXT_XXX =

129         (1 << CHARACTERS) | (1 << CDATA) | (1 << SPACE) | (1 << COMMENT);

130 

131     /**

132      * This mask is used with Stax2 getText() method (one that takes

133      * Writer as an argument): accepts even wider range of event types.

134      */

135     final protected static int MASK_GET_TEXT_WITH_WRITER = 

136         (1 << CHARACTERS) | (1 << CDATA) | (1 << SPACE)

137         | (1 << COMMENT) | (1 << DTD) | (1 << ENTITY_REFERENCE)

138         | (1 << PROCESSING_INSTRUCTION);

139 

140     final protected static int MASK_GET_ELEMENT_TEXT = 

141         (1 << CHARACTERS) | (1 << CDATA) | (1 << SPACE)

142         | (1 << ENTITY_REFERENCE);

143 

144 

145     // // // Indicator of type of text in text event (WRT white space)

146 

147     final static int ALL_WS_UNKNOWN = 0x0000;

148     final static int ALL_WS_YES = 0x0001;

149     final static int ALL_WS_NO = 0x0002;

150 

151     /* 2 magic constants used for enabling/disabling indentation checks:

152      * (to minimize negative impact for both small docs, and large

153      * docs with non-regular white space)

154      */

155 

156     private final static int INDENT_CHECK_START = 16;

157 

158     private final static int INDENT_CHECK_MAX = 40;

159 

160     // // // Shared namespace symbols

161 

162     final protected static String sPrefixXml = DefaultXmlSymbolTable.getXmlSymbol();

163 

164     final protected static String sPrefixXmlns = DefaultXmlSymbolTable.getXmlnsSymbol();

165 

166     /*

167     ///////////////////////////////////////////////////////////////////////

168     // Configuration

169     ///////////////////////////////////////////////////////////////////////

170      */

171 

172     // note: mConfig defined in base class

173 

174     /**

175      * Set of locally stored configuration flags

176      */

177     protected final int mConfigFlags;

178 

179     // // // Various extracted settings:

180 

181     protected final boolean mCfgCoalesceText;

182 

183     protected final boolean mCfgReportTextAsChars;

184     protected final boolean mCfgLazyParsing;

185 

186     /**

187      * Minimum number of characters parser can return as partial text

188      * segment, IF it's not required to coalesce adjacent text

189      * segments.

190      */

191     protected final int mShortestTextSegment;

192 

193     /*

194     ///////////////////////////////////////////////////////////////////////

195     // Symbol handling

196     ///////////////////////////////////////////////////////////////////////

197      */

198 

199     /**

200      * Object to notify about shared stuff, such as symbol tables, as well

201      * as to query for additional config settings if necessary.

202      */

203     final protected ReaderCreator mOwner;

204 

205     /*

206     ///////////////////////////////////////////////////////////////////////

207     // Additional XML document information, in addition to what StreamScanner has

208     ///////////////////////////////////////////////////////////////////////

209      */

210 

211     /**

212      * Status about "stand-aloneness" of document; set to 'yes'/'no'/'unknown'

213      * based on whether there was xml declaration, and if so, whether

214      * it had standalone attribute.

215      */

216     protected int mDocStandalone = DOC_STANDALONE_UNKNOWN;

217 

218     /*

219     ///////////////////////////////////////////////////////////////////////

220     // DOCTYPE information from document type declaration (if any found)

221     ///////////////////////////////////////////////////////////////////////

222      */

223 

224     /**

225      * Prefix of root element, as dictated by DOCTYPE declaration; null

226      * if no DOCTYPE declaration, or no root prefix

227      */

228     protected String mRootPrefix;

229 

230     /**

231      * Local name of root element, as dictated by DOCTYPE declaration; null

232      * if no DOCTYPE declaration.

233      */

234     protected String mRootLName;

235 

236     /**

237      * Public id of the DTD, if one exists and has been parsed.

238      */

239     protected String mDtdPublicId;

240 

241     /**

242      * System id of the DTD, if one exists and has been parsed.

243      */

244     protected String mDtdSystemId;

245 

246     /*

247     ///////////////////////////////////////////////////////////////////////

248     // Information about currently open subtree, content

249     ///////////////////////////////////////////////////////////////////////

250      */

251 

252     /**

253      * TextBuffer mostly used to collect non-element textual content

254      * (text, CDATA, comment content, pi data)

255      */

256     final protected TextBuffer mTextBuffer;

257 

258     /**

259      * Currently open element tree

260      */

261     final protected InputElementStack mElementStack;

262 

263     /**

264      * Object that stores information about currently accessible attributes.

265      */

266     final protected AttributeCollector mAttrCollector;

267 

268     /*

269     ///////////////////////////////////////////////////////////////////////

270     // Tokenization state

271     ///////////////////////////////////////////////////////////////////////

272      */

273 

274     /// Flag set when DOCTYPE declaration has been parsed

275     protected boolean mStDoctypeFound = false;

276 

277     /**

278      * State of the current token; one of M_ - constants from above.

279      *<p>

280      * Initially set to fully tokenized, since it's the virtual

281      * START_DOCUMENT event that we fully know by now (parsed by

282      * bootstrapper)

283      */

284     protected int mTokenState = TOKEN_FULL_COALESCED;

285 

286     /**

287      * Threshold value that defines tokenization state that needs to be

288      * achieved to "finish" current <b>logical</b> text segment (which

289      * may consist of adjacent CDATA and text segments; or be a complete

290      * physical segment; or just even a fragment of such a segment)

291      */

292     protected final int mStTextThreshold;

293 

294     /**

295      * Sized of currentTextLength for CDATA, CHARACTERS, WHITESPACE.

296      * When segmenting, this records to size of all the segments

297      * so we can track if the text length has exceeded limits.

298      */

299     protected int mCurrTextLength;

300 

301     /// Flag that indicates current start element is an empty element

302     protected boolean mStEmptyElem = false;

303 

304     /**

305      * Main parsing/tokenization state (STATE_xxx)

306      */

307     protected int mParseState;

308 

309     /**

310      * Current state of the stream, ie token value returned by

311      * {@link #getEventType}. Needs to be initialized to START_DOCUMENT,

312      * since that's the state it starts in.

313      */

314     protected int mCurrToken = START_DOCUMENT;

315 

316     /**

317      * Additional information sometimes stored (when generating dummy

318      * events in multi-doc mode, for example) temporarily when

319      * {@link #mCurrToken} is already populated.

320      */

321     protected int mSecondaryToken = START_DOCUMENT;

322 

323     /**

324      * Status of current (text) token's "whitespaceness", that is,

325      * whether it is or is not all white space.

326      */

327     protected int mWsStatus;

328 

329     /**

330      * Flag that indicates that textual content (CDATA, CHARACTERS) is to

331      * be validated within current element's scope. Enabled if one of

332      * validators returns {@link XMLValidator#CONTENT_ALLOW_VALIDATABLE_TEXT},

333      * and will prevent lazy parsing of text.

334      */

335     protected boolean mValidateText = false;

336 

337     /**

338      * Counter used for determining whether we are to try to heuristically

339      * "intern" white space that seems to be used for indentation purposes

340      */

341     protected int mCheckIndentation;

342 

343     /**

344      * Due to the way Stax API does not allow throwing stream exceptions

345      * from many methods for which Woodstox would need to throw one

346      * (especially <code>getText</code> and its variations), we may need

347      * to delay throwing an exception until {@link #next} is called next

348      * time. If so, this variable holds the pending stream exception.

349      */

350     protected XMLStreamException mPendingException = null;

351 

352     /*

353     ///////////////////////////////////////////////////////////////////////

354     // DTD information (entities, content spec stub)

355     ///////////////////////////////////////////////////////////////////////

356      */

357 

358     /**

359      * Entities parsed from internal/external DTD subsets. Although it

360      * will remain null for this class, extended classes make use of it,

361      * plus, to be able to share some of entity resolution code, instance

362      * is left here even though it semantically belongs to the sub-class.

363      */

364     protected Map<String, EntityDecl> mGeneralEntities = null;

365 

366     /**

367      * Mode information needed at this level; mostly to check what kind

368      * of textual content (if any) is allowed in current element

369      * context. Constants come from

370      * {@link XMLValidator},

371      * (like {@link XMLValidator#CONTENT_ALLOW_VALIDATABLE_TEXT}).

372      * Only used inside tree; ignored for prolog/epilog (which

373      * have straight-forward static rules).

374      */

375     protected int mVldContent = XMLValidator.CONTENT_ALLOW_ANY_TEXT;

376     

377     /**

378      * Configuration from {@code WstxInputProperties#RETURN_NULL_FOR_DEFAULT_NAMESPACE}

379      * 

380      * @since 4.1.2

381      */

382     protected boolean mReturnNullForDefaultNamespace;

383 

384     /*

385     ///////////////////////////////////////////////////////////////////////

386     // Instance construction, initialization

387     ///////////////////////////////////////////////////////////////////////

388      */

389 

390     /**

391      * @param elemStack Input element stack to use; if null, will create

392      *   instance locally.

393      * @param forER Override indicator; if true, this stream reader will be

394      *   used by an event reader, and should modify some of the base config

395      *   settings appropriately. If false, configuration settings are to

396      *   be used as is.

397      */

398     protected BasicStreamReader(InputBootstrapper bs,

399                                 BranchingReaderSource input, ReaderCreator owner,

400                                 ReaderConfig cfg, InputElementStack elemStack,

401                                 boolean forER)

402         throws XMLStreamException

403     {

404         super(input, cfg, cfg.getEntityResolver());

405 

406         mOwner = owner;

407 

408         mTextBuffer = TextBuffer.createRecyclableBuffer(cfg);

409 

410         // // // First, configuration settings:

411 

412         mConfigFlags = cfg.getConfigFlags();

413         mCfgCoalesceText = (mConfigFlags & CFG_COALESCE_TEXT) != 0;

414         mCfgReportTextAsChars = (mConfigFlags & CFG_REPORT_CDATA) == 0;

415         mXml11 = cfg.isXml11();

416 

417         // Can only use canonical white space if we are normalizing lfs

418         mCheckIndentation = mNormalizeLFs ? 16 : 0;

419 

420         /* 30-Sep-2005, TSa: Let's not do lazy parsing when access is via

421          *   Event API. Reason is that there will be no performance benefit

422          *   (event objects always access full info right after traversal),

423          *   but the wrapping of stream exceptions within runtime exception

424          *   wrappers would happen, which is inconvenient (loss of stack trace,

425          *   not catching all exceptions as expected)

426          */

427         mCfgLazyParsing = !forER && ((mConfigFlags & CFG_LAZY_PARSING) != 0);

428 

429         /* There are a few derived settings used during tokenization that

430          * need to be initialized now...

431          */

432         if (mCfgCoalesceText) {

433             mStTextThreshold =  TOKEN_FULL_COALESCED;

434             mShortestTextSegment = Integer.MAX_VALUE;

435         } else {

436             mStTextThreshold =  TOKEN_PARTIAL_SINGLE;

437             if (forER) {

438                 /* 30-Sep-2005, TSa: No point in returning runt segments for event readers

439                  *   (due to event object overhead, less convenient); let's just force

440                  *   returning of full length segments.

441                  */

442                 mShortestTextSegment = Integer.MAX_VALUE;

443             } else {

444                 mShortestTextSegment = cfg.getShortestReportedTextSegment();

445             }

446         }

447 

448         // // // Then handling of xml declaration data:

449 

450         mDocXmlVersion = bs.getDeclaredVersion();

451         mDocInputEncoding = bs.getInputEncoding();

452         mDocXmlEncoding = bs.getDeclaredEncoding();

453 

454         String sa = bs.getStandalone();

455         if (sa == null) {

456             mDocStandalone = DOC_STANDALONE_UNKNOWN;

457         } else {

458             if (XmlConsts.XML_SA_YES.equals(sa)) {

459                 mDocStandalone = DOC_STANDALONE_YES;

460             } else {

461                 mDocStandalone = DOC_STANDALONE_NO;

462             }

463         }

464 

465         /* Ok; either we got declaration or not, but in either case we can

466          * now initialize prolog parsing settings, without having to really

467          * parse anything more.

468          */

469         /* 07-Oct-2005, TSa: Except, if we are in fragment mode, in which

470          *   case we are kind of "in tree" mode...

471          */

472         mParseState = mConfig.inputParsingModeFragment() ?

473             STATE_TREE : STATE_PROLOG;

474 

475         // // // And then connecting element stack and attribute collector

476 

477         mElementStack = elemStack;

478         mAttrCollector = elemStack.getAttrCollector();

479 

480         // And finally, location information may have offsets:

481         input.initInputLocation(this, mCurrDepth, 0);

482 

483         elemStack.connectReporter(this);

484         mReturnNullForDefaultNamespace = mConfig.returnNullForDefaultNamespace();

485     }

486 

487     protected static InputElementStack createElementStack(ReaderConfig cfg)

488     {

489         return new InputElementStack(cfg, cfg.willSupportNamespaces());

490     }

491 

492     /*

493     ///////////////////////////////////////////////////////////////////////

494     // XMLStreamReader, document info

495     ///////////////////////////////////////////////////////////////////////

496      */

497 

498     /**

499      * As per Stax (1.0) specs, needs to return whatever xml declaration

500      * claimed encoding is, if any; or null if no xml declaration found.

501      *<p>

502      * Note: method name is rather confusing (compare to {@link #getEncoding}).

503      */

504     @Override

505     public String getCharacterEncodingScheme() {

506         return mDocXmlEncoding;

507     }

508 

509     /**

510      * As per Stax (1.0) specs, needs to return whatever parser determined

511      * the encoding was, if it was able to figure it out. If not (there are

512      * cases where this can not be found; specifically when being passed a

513      * {@link Reader}), it should return null.

514      */

515     @Override

516     public String getEncoding() {

517         return mDocInputEncoding;

518     }

519 

520     @Override

521     public String getVersion()

522     {

523         if (mDocXmlVersion == XmlConsts.XML_V_10) {

524             return XmlConsts.XML_V_10_STR;

525         }

526         if (mDocXmlVersion == XmlConsts.XML_V_11) {

527             return XmlConsts.XML_V_11_STR;

528         }

529         return null; // unknown

530     }

531 

532     @Override

533     public boolean isStandalone() {

534         return mDocStandalone == DOC_STANDALONE_YES;

535     }

536 

537     @Override

538     public boolean standaloneSet() {

539         return mDocStandalone != DOC_STANDALONE_UNKNOWN;

540     }

541 

542     /*

543     ///////////////////////////////////////////////////////////////////////

544     // Public API, configuration

545     ///////////////////////////////////////////////////////////////////////

546      */

547 

548     @Override

549     public Object getProperty(String name)

550     {

551         /* 18-Nov-2008, TSa: As per [WSTX-50], should report the

552          *   actual Base URL. It can be overridden by matching

553          *   setProperty, but if not, is set to actual source

554          *   of content being parsed.

555          */

556         if (WstxInputProperties.P_BASE_URL.equals(name)) {

557             try {

558                 return mInput.getSource();

559             } catch (IOException e) { // not optimal but...

560                 throw new IllegalStateException(e);

561             }

562         }

563         /* 23-Apr-2008, TSa: Let's NOT throw IllegalArgumentException

564          *   for unknown property; JavaDocs do not suggest it needs

565          *   to be done (different from that of XMLInputFactory

566          *   and XMLStreamWriter specification)

567          */

568         return mConfig.safeGetProperty(name);

569     }

570 

571     /*

572     ///////////////////////////////////////////////////////////////////////

573     // XMLStreamReader, current state

574     ///////////////////////////////////////////////////////////////////////

575      */

576 

577     // // // Attribute access:

578 

579     @Override

580     public int getAttributeCount() {

581         if (mCurrToken != START_ELEMENT) {

582             throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_STELEM);

583         }

584         return mAttrCollector.getCount();

585     }

586 

587     @Override

588     public String getAttributeLocalName(int index) {

589         if (mCurrToken != START_ELEMENT) {

590             throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_STELEM);

591         }

592         return mAttrCollector.getLocalName(index);

593     }

594 

595     @Override

596     public QName getAttributeName(int index) {

597         if (mCurrToken != START_ELEMENT) {

598             throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_STELEM);

599         }

600         return mAttrCollector.getQName(index);

601     }

602 

603     @Override

604     public String getAttributeNamespace(int index) {

605         if (mCurrToken != START_ELEMENT) {

606             throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_STELEM);

607         }

608         // Internally it's marked as null, externally need to see ""

609         String uri = mAttrCollector.getURI(index);

610         return (uri == null) ? XmlConsts.ATTR_NO_NS_URI : uri;

611     }

612 

613     @Override

614     public String getAttributePrefix(int index) {

615         if (mCurrToken != START_ELEMENT) {

616             throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_STELEM);

617         }

618         // Internally it's marked as null, externally need to see ""

619         String p = mAttrCollector.getPrefix(index);

620         return (p == null) ? XmlConsts.ATTR_NO_PREFIX : p;

621     }

622 

623     @Override

624     public String getAttributeType(int index) {

625         if (mCurrToken != START_ELEMENT) {

626             throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_STELEM);

627         }

628         // Attr. collector doesn't know it, elem stack does:

629         return mElementStack.getAttributeType(index);

630     }

631 

632     @Override

633     public String getAttributeValue(int index) {

634         if (mCurrToken != START_ELEMENT) {

635             throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_STELEM);

636         }

637         return mAttrCollector.getValue(index);

638     }

639 

640     @Override

641     public String getAttributeValue(String nsURI, String localName) {

642         if (mCurrToken != START_ELEMENT) {

643             throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_STELEM);

644         }

645         // 22-Aug-2018, tatu: As per [woodstox-core#53], need different logic

646         //   for `null` namespace URI argument

647         if (nsURI == null) {

648             return mAttrCollector.getValueByLocalName(localName);

649         }

650         return mAttrCollector.getValue(nsURI, localName);

651     }

652 

653     /**

654      * From StAX specs:

655      *<blockquote>

656      * Reads the content of a text-only element, an exception is thrown if

657      * this is not a text-only element.

658      * Regardless of value of javax.xml.stream.isCoalescing this method always

659      * returns coalesced content.

660      *<br>Precondition: the current event is START_ELEMENT.

661      *<br>Postcondition: the current event is the corresponding END_ELEMENT. 

662      *</blockquote>

663      */

664     @Override

665     public String getElementText()

666         throws XMLStreamException

667     {

668         if (mCurrToken != START_ELEMENT) {

669              throwParseError(ErrorConsts.ERR_STATE_NOT_STELEM, null, null);

670         }

671         /* Ok, now: with START_ELEMENT we know that it's not partially

672          * processed; that we are in-tree (not prolog or epilog).

673          * The only possible complication would be:

674          */

675         if (mStEmptyElem) {

676             /* And if so, we'll then get 'virtual' close tag; things

677              * are simple as location info was set when dealing with

678              * empty start element; and likewise, validation (if any)

679              * has been taken care of

680              */

681             mStEmptyElem = false;

682             mCurrToken = END_ELEMENT;

683             return "";

684         }

685 

686         // First need to find a textual event

687         while (true) {

688             int type = next();

689             if (type == END_ELEMENT) {

690                 return "";

691             }

692             if (type == COMMENT || type == PROCESSING_INSTRUCTION) {

693                 continue;

694             }

695             if (((1 << type) & MASK_GET_ELEMENT_TEXT) == 0) {

696                 throw _constructUnexpectedInTyped(type);

697             }

698             break;

699         }

700 

701         if (mTokenState < TOKEN_FULL_COALESCED) {

702             readCoalescedText(mCurrToken, false);

703         }

704 

705         /* Ok: then a quick check; if it looks like we are directly

706          * followed by the end tag, we need not construct String

707          * quite yet.

708          */

709         if ((mInputPtr + 1) < mInputEnd &&

710             mInputBuffer[mInputPtr] == '<' && mInputBuffer[mInputPtr+1] == '/') {

711             // Note: next() has validated text, no need for more validation

712             mInputPtr += 2;

713             mCurrToken = END_ELEMENT;

714             // must first get text, as call to readEndElem may break it:

715             String result = mTextBuffer.contentsAsString();

716             // Can by-pass next(), nextFromTree(), in this case:

717             readEndElem();

718             // and then return results

719             return result;

720         }

721 

722         // Otherwise, we'll need to do slower processing

723         int extra = 1 + (mTextBuffer.size() >> 1); // let's add 50% space

724         StringBuilder sb = mTextBuffer.contentsAsStringBuilder(extra);

725         int type;

726 

727         while ((type = next()) != END_ELEMENT) {

728             if (((1 << type) & MASK_GET_ELEMENT_TEXT) != 0) {

729                 if (mTokenState < mStTextThreshold) {

730                     finishToken(false);

731                 }

732                 verifyLimit("Text size", mConfig.getMaxTextLength(), sb.length());

733                 mTextBuffer.contentsToStringBuilder(sb);

734                 continue;

735             }

736             if (type != COMMENT && type != PROCESSING_INSTRUCTION) {

737                 throw _constructUnexpectedInTyped(type);

738             }

739         }

740         // Note: calls next() have validated text, no need for more validation

741         return sb.toString();

742     }

743 

744     /**

745      * Returns type of the last event returned; or START_DOCUMENT before

746      * any events has been explicitly returned.

747      */

748     @Override

749     public int getEventType()

750     {

751         /* Only complication -- multi-part coalesced text is to be reported

752          * as CHARACTERS always, never as CDATA (StAX specs).

753          */

754         if (mCurrToken == CDATA) {

755             if (mCfgCoalesceText || mCfgReportTextAsChars) {

756                 return CHARACTERS;

757             }

758         }

759         return mCurrToken;

760     }

761 

762     @Override

763     public String getLocalName()

764     {

765         // Note: for this we need not (yet) finish reading element

766         if (mCurrToken == START_ELEMENT || mCurrToken == END_ELEMENT) {

767             return mElementStack.getLocalName();

768         }

769         if (mCurrToken == ENTITY_REFERENCE) {

770             /* 30-Sep-2005, TSa: Entity will be null in non-expanding mode

771              *   if no definition was found:

772              */

773             return (mCurrEntity == null) ? mCurrName: mCurrEntity.getName();

774         }

775         throw new IllegalStateException("Current state not START_ELEMENT, END_ELEMENT or ENTITY_REFERENCE");

776     }

777 

778     // // // getLocation() defined in StreamScanner

779 

780     @Override

781     public QName getName()

782     {

783         if (mCurrToken != START_ELEMENT && mCurrToken != END_ELEMENT) {

784             throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_ELEM);

785         }

786         return mElementStack.getCurrentElementName();

787     }

788 

789     // // // Namespace access

790 

791     @Override

792     public NamespaceContext getNamespaceContext() {

793         /* Unlike other getNamespaceXxx methods, this is available

794          * for all events.

795          * Note that the context is "live", ie. remains active (but not

796          * static) even through calls to next(). StAX compliant apps

797          * should not count on this behaviour, however.         

798          */

799         return mElementStack;

800     }

801 

802     @Override

803     public int getNamespaceCount() {

804         if (mCurrToken != START_ELEMENT && mCurrToken != END_ELEMENT) {

805             throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_ELEM);

806         }

807         return mElementStack.getCurrentNsCount();

808     }

809 

810     @Override

811     public String getNamespacePrefix(int index) {

812         if (mCurrToken != START_ELEMENT && mCurrToken != END_ELEMENT) {

813             throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_ELEM);

814         }

815         // Internally it's marked as null, externally need to see "" or null, depending

816         String p = mElementStack.getLocalNsPrefix(index);

817         if (p == null) {

818             return mReturnNullForDefaultNamespace ? null : XmlConsts.ATTR_NO_PREFIX;

819         }

820         return p;

821     }

822 

823     @Override

824     public String getNamespaceURI() {

825         if (mCurrToken != START_ELEMENT && mCurrToken != END_ELEMENT) {

826             throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_ELEM);

827         }

828         // Internally it's marked as null, externally need to see ""

829         String uri = mElementStack.getNsURI();

830         return (uri == null) ? XmlConsts.ELEM_NO_NS_URI : uri;

831     }

832 

833     @Override

834     public String getNamespaceURI(int index)

835     {

836         if (mCurrToken != START_ELEMENT && mCurrToken != END_ELEMENT) {

837             throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_ELEM);

838         }

839         // Internally it's marked as null, externally need to see ""

840         String uri = mElementStack.getLocalNsURI(index);

841         return (uri == null) ? XmlConsts.ATTR_NO_NS_URI : uri;

842     }

843 

844     @Override

845     public String getNamespaceURI(String prefix)

846     {

847         if (mCurrToken != START_ELEMENT && mCurrToken != END_ELEMENT) {

848             throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_ELEM);

849         }

850         /* Note: this will need to return null if no URI found for

851          * the prefix, so we can't mask it.

852          */

853         return mElementStack.getNamespaceURI(prefix);

854     }

855 

856     @Override

857     public String getPIData() {

858         if (mCurrToken != PROCESSING_INSTRUCTION) {

859             throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_PI);

860         }

861         if (mTokenState <= TOKEN_STARTED) {

862             safeFinishToken();

863         }

864         return mTextBuffer.contentsAsString();

865     }

866 

867     @Override

868     public String getPITarget() {

869         if (mCurrToken != PROCESSING_INSTRUCTION) {

870             throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_PI);

871         }

872         // Target is always parsed automatically, not lazily...

873         return mCurrName;

874     }

875 

876     @Override

877     public String getPrefix() {

878         if (mCurrToken != START_ELEMENT && mCurrToken != END_ELEMENT) {

879             throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_ELEM);

880         }

881         // Internally it's marked as null, externally need to see ""

882         String p = mElementStack.getPrefix();

883         return (p == null) ? XmlConsts.ELEM_NO_PREFIX : p;

884     }

885 

886     @Override

887     public String getText()

888     {

889         final int currToken = mCurrToken;

890         if (((1 << currToken) & MASK_GET_TEXT) == 0) {

891             throwNotTextual(currToken);

892         }

893         if (mTokenState < mStTextThreshold) {

894             safeFinishToken();

895         }

896         if (currToken == ENTITY_REFERENCE) {

897             return (mCurrEntity == null) ? null : mCurrEntity.getReplacementText();

898         }

899         if (currToken == DTD) {

900             // 16-Aug-2004, TSa: Hmmh. Specs are bit ambiguous on whether this

901             //   should return just the internal subset, or the whole thing...

902             return getDTDInternalSubset();

903         }

904         return mTextBuffer.contentsAsString();

905     }

906 

907     @Override

908     public char[] getTextCharacters()

909     {

910         final int currToken = mCurrToken;

911         if (((1 << currToken) & MASK_GET_TEXT_XXX) == 0) {

912             throwNotTextXxx(currToken);

913         }

914         if (mTokenState < mStTextThreshold) {

915             safeFinishToken();

916         }

917         if (currToken == ENTITY_REFERENCE) {

918             return mCurrEntity.getReplacementChars();

919         }

920         if (currToken == DTD) {

921             return getDTDInternalSubsetArray();

922         }

923         return mTextBuffer.getTextBuffer();

924     }

925 

926     @Override

927     public int getTextCharacters(int sourceStart, char[] target, int targetStart, int len)

928     {

929         final int currToken = mCurrToken;

930         if (((1 << currToken) & MASK_GET_TEXT_XXX) == 0) {

931             throwNotTextXxx(currToken);

932         }

933         if (mTokenState < mStTextThreshold) {

934             safeFinishToken();

935         }

936         return mTextBuffer.contentsToArray(sourceStart, target, targetStart, len);

937     }

938 

939     @Override

940     public int getTextLength()

941     {

942         final int currToken = mCurrToken;

943         if (((1 << currToken) & MASK_GET_TEXT_XXX) == 0) {

944             throwNotTextXxx(currToken);

945         }

946         if (mTokenState < mStTextThreshold) {

947             safeFinishToken();

948         }

949         return mTextBuffer.size();

950     }

951 

952     @Override

953     public int getTextStart()

954     {

955         final int currToken = mCurrToken;

956         if (((1 << currToken) & MASK_GET_TEXT_XXX) == 0) {

957             throwNotTextXxx(currToken);

958         }

959         if (mTokenState < mStTextThreshold) {

960             safeFinishToken();

961         }

962         return mTextBuffer.getTextStart();

963     }

964 

965     @Override

966     public boolean hasName() {

967         return (mCurrToken == START_ELEMENT) || (mCurrToken == END_ELEMENT);

968     }

969 

970     @Override

971     public boolean hasNext() {

972         // 08-Oct-2005, TSa: In multi-doc mode, we have different criteria...

973         return (mCurrToken != END_DOCUMENT)

974             || (mParseState == STATE_MULTIDOC_HACK);

975     }

976 

977     @Override

978     public boolean hasText() {

979         return (((1 << mCurrToken) & MASK_GET_TEXT) != 0);

980     }

981 

982     @Override

983     public boolean isAttributeSpecified(int index)

984     {

985         /* No need to check for ATTRIBUTE since we never return that...

986          */

987         if (mCurrToken != START_ELEMENT) {

988             throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_STELEM);

989         }

990         return mAttrCollector.isSpecified(index);

991     }

992 

993     @Override

994     public boolean isCharacters()

995     {

996         /* 21-Dec-2005, TSa: Changed for 3.0 to work the same way as stax

997          *    ref impl.

998          */

999         //return (mCurrToken == CHARACTERS || mCurrToken == CDATA || mCurrToken == SPACE);

1000         /* 21-Apr-2009, TSa: As per [WSTX-201], should be consistent with

1001          *   what getEventType() returns (affects CDATA, SPACE, in

1002          *   coalescing mode or when explicitly asked to return CDATA

1003          *   as CHARACTERS)

1004          */

1005         return (CHARACTERS == getEventType());

1006     }

1007 

1008     @Override

1009     public boolean isEndElement() {

1010         return (mCurrToken == END_ELEMENT);

1011     }

1012 

1013     @Override

1014     public boolean isStartElement() {

1015         return (mCurrToken == START_ELEMENT);

1016     }

1017 

1018     /**

1019      *<p>

1020      * 05-Apr-2004, TSa: Could try to determine status when text is actually

1021      *   read. That'd prevent double reads... but would it slow down that

1022      *   one reading so that net effect would be negative?

1023      */

1024     @Override

1025     public boolean isWhiteSpace()

1026     {

1027         final int currToken = mCurrToken;

1028         if (currToken == CHARACTERS || currToken == CDATA) {

1029             if (mTokenState < mStTextThreshold) {

1030                 safeFinishToken();

1031             }

1032             if (mWsStatus == ALL_WS_UNKNOWN) {

1033                 mWsStatus = mTextBuffer.isAllWhitespace() ?

1034                     ALL_WS_YES : ALL_WS_NO;

1035             }

1036             return mWsStatus == ALL_WS_YES;

1037         }

1038         return (currToken == SPACE);

1039     }

1040 

1041     @Override

1042     public void require(int type, String nsUri, String localName)

1043         throws XMLStreamException

1044     {

1045         int curr = mCurrToken;

1046 

1047         /* There are some special cases; specifically, CDATA

1048          * is sometimes reported as CHARACTERS. Let's be lenient by

1049          * allowing both 'real' and reported types, for now.

1050          */

1051         if (curr != type) {

1052             if (curr == CDATA) {

1053                 if (mCfgCoalesceText || mCfgReportTextAsChars) {

1054                     curr = CHARACTERS;

1055                 }

1056             } else if (curr == SPACE) {

1057                 // Hmmh. Should we require it to be empty or something?

1058                 //curr = CHARACTERS;

1059                 // For now, let's not change the check

1060             }

1061         }

1062 

1063         if (type != curr) {

1064             throwParseError("Expected type "+tokenTypeDesc(type)

1065                             +", current type "

1066                             +tokenTypeDesc(curr));

1067         }

1068 

1069         if (localName != null) {

1070             if (curr != START_ELEMENT && curr != END_ELEMENT

1071                 && curr != ENTITY_REFERENCE) {

1072                 throwParseError("Expected non-null local name, but current token not a START_ELEMENT, END_ELEMENT or ENTITY_REFERENCE (was "+tokenTypeDesc(mCurrToken)+")");

1073             }

1074             String n = getLocalName();

1075             if (n != localName && !n.equals(localName)) {

1076                 throwParseError("Expected local name '"+localName+"'; current local name '"+n+"'.");

1077             }

1078         }

1079         if (nsUri != null) {

1080             if (curr != START_ELEMENT && curr != END_ELEMENT) {

1081                 throwParseError("Expected non-null NS URI, but current token not a START_ELEMENT or END_ELEMENT (was "+tokenTypeDesc(curr)+")");

1082             }

1083             String uri = mElementStack.getNsURI();

1084             // No namespace?

1085             if (nsUri.length() == 0) {

1086                 if (uri != null && uri.length() > 0) {

1087                     throwParseError("Expected empty namespace, instead have '"+uri+"'.");

1088                 }

1089             } else {

1090                 if ((nsUri != uri) && !nsUri.equals(uri)) {

1091                     throwParseError("Expected namespace '"+nsUri+"'; have '"

1092                                     +uri+"'.");

1093                 }

1094             }

1095         }

1096         // Ok, fine, all's good

1097     }

1098 

1099     /*

1100     ///////////////////////////////////////////////////////////////////////

1101     // XMLStreamReader, iterating

1102     ///////////////////////////////////////////////////////////////////////

1103      */

1104 

1105     @Override

1106     public final int next() throws XMLStreamException

1107     {

1108         /* 24-Sep-2006, TSa: We may have deferred an exception that occurred

1109          *   during parsing of the previous event. If so, now it needs to

1110          *   be thrown.

1111          */

1112         if (mPendingException != null) {

1113             XMLStreamException strEx = mPendingException;

1114             mPendingException = null;

1115             throw strEx;

1116         }

1117 

1118         /* Note: can not yet accurately record the location, since the

1119          * previous event might not yet be completely finished...

1120          */

1121         if (mParseState == STATE_TREE) {

1122             int type = nextFromTree();

1123             mCurrToken = type;

1124             if (mTokenState < mStTextThreshold) { // incomplete?

1125                 /* Can remain incomplete if lazy parsing is enabled,

1126                  * and this is not a validatable text segment; otherwise

1127                  * must finish

1128                  */

1129                 if (!mCfgLazyParsing ||

1130                     (mValidateText && (type == CHARACTERS || type == CDATA))) {

1131                     finishToken(false);

1132                 }

1133             }

1134 

1135             /* Special cases -- sometimes (when coalescing text, or

1136              * when specifically configured to do so), CDATA and SPACE are

1137              * to be reported as CHARACTERS, although we still will

1138              * internally keep track of the real type.

1139              */

1140             if (type == CDATA) {

1141                 if (mValidateText) {

1142                     mElementStack.validateText(mTextBuffer, false);

1143                 }

1144                 if (mCfgCoalesceText || mCfgReportTextAsChars) {

1145                     return CHARACTERS;

1146                 }

1147                 /*

1148                   } else if (type == SPACE) {

1149                   //if (mValidateText) { throw new IllegalStateException("Internal error: trying to validate SPACE event"); }

1150                   */

1151                 mCurrTextLength += mTextBuffer.size();

1152                 verifyLimit("Text size", mConfig.getMaxTextLength(), mCurrTextLength);

1153             } else if (type == CHARACTERS) {

1154                 if (mValidateText) {

1155                     /* We may be able to determine that there will be

1156                      * no more text coming for this element: but only

1157                      * seeing the end tag marker ("</") is certain

1158                      * (PIs and comments won't do, nor CDATA; start

1159                      * element possibly... but that indicates mixed

1160                      * content that's generally non-validatable)

1161                          */

1162                     if ((mInputPtr+1) < mInputEnd

1163                         && mInputBuffer[mInputPtr] == '<'

1164                         && mInputBuffer[mInputPtr+1] == '/') {

1165                         // yup, it's all there is

1166                         mElementStack.validateText(mTextBuffer, true);

1167                     } else {

1168                         mElementStack.validateText(mTextBuffer, false);

1169                     }

1170                 }

1171                 mCurrTextLength += mTextBuffer.size();

1172                 verifyLimit("Text size", mConfig.getMaxTextLength(), mCurrTextLength);

1173             } else if (type == START_ELEMENT || type == END_ELEMENT) {

1174                 this.mCurrTextLength = 0;

1175             }

1176             return type;

1177         }

1178 

1179         if (mParseState == STATE_PROLOG) {

1180             nextFromProlog(true);

1181         } else if (mParseState == STATE_EPILOG) {

1182             if (nextFromProlog(false)) {

1183                 // We'll return END_DOCUMENT, need to mark it 'as consumed'

1184                 mSecondaryToken = 0;

1185                 

1186             }

1187         } else if (mParseState == STATE_MULTIDOC_HACK) {

1188             mCurrToken = nextFromMultiDocState();

1189         } else { // == STATE_CLOSED

1190             if (mSecondaryToken == END_DOCUMENT) { // marker

1191                 mSecondaryToken = 0; // mark end doc as consumed

1192                 return END_DOCUMENT;

1193             }

1194             throw new java.util.NoSuchElementException();

1195         }

1196         return mCurrToken;

1197     }

1198 

1199     @Override

1200     public int nextTag() throws XMLStreamException

1201     {

1202         while (true) {

1203             int next = next();

1204 

1205             switch (next) {

1206             case SPACE:

1207             case COMMENT:

1208             case PROCESSING_INSTRUCTION:

1209                 continue;

1210             case CDATA:

1211             case CHARACTERS:

1212                 // inlined version of "isWhiteSpace()", so that exceptions can be passed as-is

1213                 // without suppression

1214                 if (mTokenState < mStTextThreshold) {

1215                     finishToken(false);

1216                 }

1217                 if (mWsStatus == ALL_WS_UNKNOWN) {

1218                     mWsStatus = mTextBuffer.isAllWhitespace() ? ALL_WS_YES : ALL_WS_NO;

1219                 }

1220                 if (mWsStatus == ALL_WS_YES) {

1221                     continue;

1222                 }

1223                 throwParseError("Received non-all-whitespace CHARACTERS or CDATA event in nextTag().");

1224                 break; // never gets here, but jikes complains without

1225             case START_ELEMENT:

1226             case END_ELEMENT:

1227                 return next;

1228             }

1229             throwParseError("Received event "+ErrorConsts.tokenTypeDesc(next)

1230                             +", instead of START_ELEMENT or END_ELEMENT.");

1231         }

1232     }

1233 

1234     /**

1235      *<p>

1236      * Note: as per StAX 1.0 specs, this method does NOT close the underlying

1237      * input reader. That is, unless the new StAX2 property

1238      * {@link org.codehaus.stax2.XMLInputFactory2#P_AUTO_CLOSE_INPUT} is

1239      * set to true.

1240      */

1241     @Override

1242     public void close() throws XMLStreamException

1243     {

1244         if (mParseState != STATE_CLOSED) {

1245             mParseState = STATE_CLOSED;

1246             /* Let's see if we should notify factory that symbol table

1247              * has new entries, and may want to reuse this symbol table

1248              * instead of current root.

1249              */

1250             if (mCurrToken != END_DOCUMENT) {

1251                 mCurrToken = mSecondaryToken = END_DOCUMENT;

1252                 if (mSymbols.isDirty()) {

1253                     mOwner.updateSymbolTable(mSymbols);

1254                 }

1255             }

1256             /* Hmmh. Actually, we need to close all the dependant input

1257              * sources, first, and then also call close() 

1258              * on the root input source object; it

1259              * will only do real close if that was enabled earlier.

1260              * The root input source also prevents multiple close() calls

1261              * for the underlying source, so we need not check that here.

1262              */

1263             closeAllInput(false);

1264             // And finally, can now recycle low-level (text) buffers

1265             mTextBuffer.recycle(true);

1266         }

1267     }

1268 

1269     /*

1270     ///////////////////////////////////////////////////////////////////////

1271     // XMLStreamReader2 (StAX2) implementation

1272     ///////////////////////////////////////////////////////////////////////

1273      */

1274 

1275     @Override

1276     @Deprecated

1277     public Object getFeature(String name)  {

1278         throw new IllegalArgumentException(MessageFormat.format(ErrorConsts.ERR_UNKNOWN_FEATURE, new Object[] { name })); 

1279     }

1280 

1281     @Override

1282     @Deprecated

1283     public void setFeature(String name, Object value) {

1284         throw new IllegalArgumentException(MessageFormat.format(ErrorConsts.ERR_UNKNOWN_FEATURE, new Object[] { name })); 

1285     }

1286 

1287     // NOTE: getProperty() defined in Stax 1.0 interface

1288 

1289     @Override

1290     public boolean isPropertySupported(String name) {

1291         // !!! TBI: not all these properties are really supported

1292         return mConfig.isPropertySupported(name);

1293     }

1294 

1295     /**

1296      * @param name Name of the property to set

1297      * @param value Value to set property to.

1298      *

1299      * @return True, if the specified property was <b>succesfully</b>

1300      *    set to specified value; false if its value was not changed

1301      */

1302     @Override

1303     public boolean setProperty(String name, Object value)

1304     {

1305         boolean ok = mConfig.setProperty(name, value);

1306         /* To make [WSTX-50] work fully dynamically (i.e. allow

1307          * setting BASE_URL after stream reader has been constructed)

1308          * need to force

1309          */

1310         if (ok && WstxInputProperties.P_BASE_URL.equals(name)) {

1311             // Easiest to just access from config: may come in as a String etc

1312             mInput.overrideSource(mConfig.getBaseURL());

1313         }

1314         return ok;

1315     }

1316 

1317     // // // StAX2, additional traversal methods

1318 

1319     @Override

1320     public void skipElement() throws XMLStreamException

1321     {

1322         if (mCurrToken != START_ELEMENT) {

1323             throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_STELEM);

1324         }

1325         int nesting = 1; // need one more end elements than start elements

1326 

1327         while (true) {

1328             int type = next();

1329             if (type == START_ELEMENT) {

1330                 ++nesting;

1331             } else if (type == END_ELEMENT) {

1332                 if (--nesting == 0) {

1333                     break;

1334                 }

1335             }

1336         }

1337     }

1338 

1339     // // // StAX2, additional attribute access

1340 

1341     @Override

1342     public AttributeInfo getAttributeInfo() throws XMLStreamException

1343     {

1344         if (mCurrToken != START_ELEMENT) {

1345             throw new IllegalStateException(ErrorConsts.ERR_STATE_NOT_STELEM);

1346         }

1347         /* Although attribute collector knows about specific parsed

1348          * information, the element stack has DTD-derived information (if

1349          * any)... and knows how to call attribute collector when necessary.

1350          */

1351         return mElementStack;

1352     }

1353 

1354     // // // StAX2, Additional DTD access

1355 

1356     /**

1357      * Since this class implements {@link DTDInfo}, method can just

1358      * return <code>this</code>.

1359      */

1360     @Override

1361     public DTDInfo getDTDInfo() throws XMLStreamException

1362     {

1363         /* Let's not allow it to be accessed during other events -- that

1364          * way callers won't count on it being available afterwards.

1365          */

1366         if (mCurrToken != DTD) {

1367             return null;

1368         }

1369         if (mTokenState < TOKEN_FULL_SINGLE) { // need to fully read it in now

1370             finishToken(false);

1371         }

1372         return this;

1373     }

1374 

1375     // // // StAX2, Additional location information

1376 

1377     /**

1378      * Location information is always accessible, for this reader.

1379      */

1380     @Override

1381     public final LocationInfo getLocationInfo() {

1382         return this;

1383     }

1384 

1385     // // // StAX2, Pass-through text accessors

1386 

1387 

1388     /**

1389      * Method similar to {@link #getText()}, except

1390      * that it just uses provided Writer to write all textual content.

1391      * For further optimization, it may also be allowed to do true

1392      * pass-through, thus possibly avoiding one temporary copy of the

1393      * data.

1394      *<p>

1395      * TODO: try to optimize to allow completely streaming pass-through:

1396      * currently will still read all data in memory buffers before

1397      * outputting

1398      * 

1399      * @param w Writer to use for writing textual contents

1400      * @param preserveContents If true, reader has to preserve contents

1401      *   so that further calls to <code>getText</code> will return

1402      *   proper conntets. If false, reader is allowed to skip creation

1403      *   of such copies: this can improve performance, but it also means

1404      *   that further calls to <code>getText</code> is not guaranteed to

1405      *   return meaningful data.

1406      *

1407      * @return Number of characters written to the reader

1408      */

1409     @Override

1410     public int getText(Writer w, boolean preserveContents)

1411         throws IOException, XMLStreamException

1412     {

1413         final int currToken = mCurrToken;

1414         if (((1 << currToken) & MASK_GET_TEXT_WITH_WRITER) == 0) {

1415             throwNotTextual(currToken);

1416         }

1417         /* May need to be able to do fully streaming... but only for

1418          * text events that have not yet been fully read; for other

1419          * types there's less benefit, and for fully read ones, we

1420          * already have everything ready.

1421          */

1422         if (!preserveContents) {

1423             if (currToken == CHARACTERS) {

1424                 int count = mTextBuffer.rawContentsTo(w);

1425                 /* Let's also clear whatever was collected (as allowed by

1426                  * method contract) previously, to both save memory, and

1427                  * to ensure caller doesn't accidentally try to access it

1428                  * (and get otherwise 'random' results).

1429                  */

1430                 mTextBuffer.resetWithEmpty();

1431                 if (mTokenState < TOKEN_FULL_SINGLE) {

1432                     count += readAndWriteText(w);

1433                 }

1434                 if (mCfgCoalesceText &&

1435                     (mTokenState < TOKEN_FULL_COALESCED)) {

1436                     if (mCfgCoalesceText) {

1437                         count += readAndWriteCoalesced(w, false);

1438                     }

1439                 }

1440                 return count;

1441             } else if (currToken == CDATA) {

1442                 int count = mTextBuffer.rawContentsTo(w);

1443                 mTextBuffer.resetWithEmpty(); // same as with CHARACTERS

1444                 if (mTokenState < TOKEN_FULL_SINGLE) {

1445                     count += readAndWriteCData(w);

1446                 }

1447                 if (mCfgCoalesceText &&

1448                     (mTokenState < TOKEN_FULL_COALESCED)) {

1449                     if (mCfgCoalesceText) {

1450                         count += readAndWriteCoalesced(w, true);

1451                     }

1452                 }

1453                 return count;

1454             }

1455         }

1456         if (mTokenState < mStTextThreshold) {

1457             /* Otherwise, let's just finish the token; and due to guarantee

1458              * by streaming method, let's try ensure we get it all.

1459              */

1460             finishToken(false); // false -> shouldn't defer errors

1461         }

1462         if (currToken == ENTITY_REFERENCE) {

1463             return mCurrEntity.getReplacementText(w);

1464         }

1465         if (currToken == DTD) {

1466             char[] ch = getDTDInternalSubsetArray();

1467             if (ch != null) {

1468                 w.write(ch);

1469                 return ch.length;

1470             }

1471             return 0;

1472         }

1473         return mTextBuffer.rawContentsTo(w);

1474     }

1475 

1476     // // // StAX 2, Other accessors

1477 

1478     /**

1479      * @return Number of open elements in the stack; 0 when parser is in

1480      *  prolog/epilog, 1 inside root element and so on.

1481      */

1482     @Override

1483     public int getDepth() {

1484         /* Note: we can not necessarily use mCurrDepth, since it is

1485          * directly synchronized to the input (to catch unbalanced entity

1486          * expansion WRT element nesting), and not to actual token values

1487          * returned.

1488          */

1489         return mElementStack.getDepth();

1490     }

1491 

1492     /**

1493      * @return True, if cursor points to a start or end element that is

1494      *    constructed from 'empty' element (ends with {@code '/>'});

1495      *    false otherwise.

1496      */

1497     @Override

1498     public boolean isEmptyElement() throws XMLStreamException {

1499         return (mCurrToken == START_ELEMENT) ? mStEmptyElem : false;

1500     }

1501 

1502     @Override

1503     public NamespaceContext getNonTransientNamespaceContext() {

1504         // null -> no Location info, not needed with basic API

1505         return mElementStack.createNonTransientNsContext(null);

1506     }

1507 

1508     @Override

1509     public String getPrefixedName()

1510     {

1511         switch (mCurrToken) {

1512         case START_ELEMENT:

1513         case END_ELEMENT:

1514             {

1515                 String prefix = mElementStack.getPrefix();

1516                 String ln = mElementStack.getLocalName();

1517 

1518                 if (prefix == null) {

1519                     return ln;

1520                 }

1521                 StringBuilder sb = new StringBuilder(ln.length() + 1 + prefix.length());

1522                 sb.append(prefix);

1523                 sb.append(':');

1524                 sb.append(ln);

1525                 return sb.toString();

1526             }

1527         case ENTITY_REFERENCE:

1528             return getLocalName();

1529         case PROCESSING_INSTRUCTION:

1530             return getPITarget();

1531         case DTD:

1532             return getDTDRootName();

1533 

1534         }

1535         throw new IllegalStateException("Current state not START_ELEMENT, END_ELEMENT, ENTITY_REFERENCE, PROCESSING_INSTRUCTION or DTD");

1536     }

1537 

1538     @Override

1539     public void closeCompletely() throws XMLStreamException {

1540         closeAllInput(true);

1541     }

1542 

1543     /*

1544     ///////////////////////////////////////////////////////////////////////

1545     // DTDInfo implementation (StAX 2)

1546     ///////////////////////////////////////////////////////////////////////

1547      */

1548 

1549     /**

1550      *<p>

1551      * Note: DTD-handling sub-classes need to override this method.

1552      */

1553     @Override

1554     public Object getProcessedDTD() {

1555         return null;

1556     }

1557 

1558     @Override

1559     public String getDTDRootName() {

1560         if (mRootPrefix == null) {

1561             return mRootLName;

1562         }

1563         return mRootPrefix + ":" + mRootLName;

1564     }

1565 

1566     @Override

1567     public String getDTDPublicId() {

1568         return mDtdPublicId;

1569     }

1570 

1571     @Override

1572     public String getDTDSystemId() {

1573         return mDtdSystemId;

1574     }

1575 

1576     /**

1577      * @return Internal subset portion of the DOCTYPE declaration, if any;

1578      *   empty String if none

1579      */

1580     @Override

1581     public String getDTDInternalSubset() {

1582         if (mCurrToken != DTD) {

1583             return null;

1584         }

1585         return mTextBuffer.contentsAsString();

1586     }

1587 

1588     /**

1589      * Internal method used by implementation

1590      */

1591     private char[] getDTDInternalSubsetArray() {

1592         /* Note: no checks for current state, but only because it's

1593          * an internal method and callers are known to ensure it's ok

1594          * to call this

1595          */

1596         return mTextBuffer.contentsAsArray();

1597     }

1598 

1599     // // StAX2, v2.0

1600 

1601     /**

1602      * Sub-class will override this method

1603      */

1604     @Override

1605     public DTDValidationSchema getProcessedDTDSchema() {

1606         return null;

1607     }

1608 

1609     /*

1610     ///////////////////////////////////////////////////////////////////////

1611     // LocationInfo implementation (StAX 2)

1612     ///////////////////////////////////////////////////////////////////////

1613      */

1614 

1615     // // // First, the "raw" offset accessors:

1616 

1617     @Override

1618     public long getStartingByteOffset() {

1619         /* 15-Apr-2005, TSa: No way to reliably keep track of byte offsets,

1620          *   at least for variable-length encodings... so let's just

1621          *   return -1 for now

1622          */

1623         return -1L;

1624     }

1625 

1626     @Override

1627     public long getStartingCharOffset() {

1628         return mTokenInputTotal;

1629     }

1630 

1631     @Override

1632     public long getEndingByteOffset() throws XMLStreamException

1633     {

1634         /* 15-Apr-2005, TSa: No way to reliably keep track of byte offsets,

1635          *   at least for variable-length encodings... so let's just

1636          *   return -1 for now

1637          */

1638         return -1;

1639     }

1640 

1641     @Override

1642     public long getEndingCharOffset() throws XMLStreamException

1643     {

1644         // Need to get to the end of the token, if not there yet

1645         if (mTokenState < mStTextThreshold) {

1646             finishToken(false);

1647         }

1648         return mCurrInputProcessed + mInputPtr;

1649     }

1650 

1651     // // // and then the object-based access methods:

1652 

1653     @Override

1654     public final Location getLocation() {

1655         return getStartLocation();

1656     }

1657 

1658     // public XMLStreamLocation2 getStartLocation() // from base class

1659     // public XMLStreamLocation2 getCurrentLocation() // - "" -

1660 

1661     @Override

1662     public final XMLStreamLocation2 getEndLocation()

1663         throws XMLStreamException

1664     {

1665         // Need to get to the end of the token, if not there yet

1666         if (mTokenState < mStTextThreshold) {

1667             finishToken(false);

1668         }

1669         // And then we just need the current location!

1670         return getCurrentLocation();

1671     }

1672 

1673     /*

1674     ///////////////////////////////////////////////////////////////////////

1675     // Stax2 validation

1676     ///////////////////////////////////////////////////////////////////////

1677      */

1678 

1679     @Override

1680     public XMLValidator validateAgainst(XMLValidationSchema schema)

1681         throws XMLStreamException

1682     {

1683         // Not implemented by the basic reader:

1684         return null;

1685     }

1686 

1687     @Override

1688     public XMLValidator stopValidatingAgainst(XMLValidationSchema schema)

1689         throws XMLStreamException

1690     {

1691         // Not implemented by the basic reader:

1692         return null;

1693     }

1694 

1695     @Override

1696     public XMLValidator stopValidatingAgainst(XMLValidator validator)

1697         throws XMLStreamException

1698     {

1699         // Not implemented by the basic reader:

1700         return null;

1701     }

1702 

1703     @Override

1704     public ValidationProblemHandler setValidationProblemHandler(ValidationProblemHandler h)

1705     {

1706         // Not implemented by the basic reader:

1707         return null;

1708     }

1709 

1710     /*

1711     ///////////////////////////////////////////////////////////////////////

1712     // StreamReaderImpl implementation

1713     ///////////////////////////////////////////////////////////////////////

1714      */

1715 

1716     @Override

1717     public EntityDecl getCurrentEntityDecl() {

1718         return mCurrEntity;

1719     }

1720 

1721     /**

1722      * Method called by {@link com.ctc.wstx.evt.DefaultEventAllocator}

1723      * to get double-indirection necessary for constructing start element

1724      * events.

1725      *

1726      * @return Null, if stream does not point to start element; whatever

1727      *    callback returns otherwise.

1728      */

1729     @Override

1730     public Object withStartElement(ElemCallback cb, Location loc)

1731     {

1732         if (mCurrToken != START_ELEMENT) {

1733             return null;

1734         }

1735         return cb.withStartElement(loc, getName(), 

1736                 mElementStack.createNonTransientNsContext(loc),

1737                 mAttrCollector.buildAttrOb(),

1738                 mStEmptyElem);

1739     }

1740 

1741     @Override

1742     public boolean isNamespaceAware() {

1743         return mCfgNsEnabled;

1744     }

1745 

1746     /**

1747      * Method needed by classes (like stream writer implementations)

1748      * that want to have efficient direct access to element stack

1749      * implementation

1750      */

1751     @Override

1752     public InputElementStack getInputElementStack() {

1753         return mElementStack;

1754     }

1755 

1756     /**

1757      * Method needed by classes (like stream writer implementations)

1758      * that want to have efficient direct access to attribute collector

1759      * Object, for optimal attribute name and value access.

1760      */

1761     @Override

1762     public AttributeCollector getAttributeCollector() {

1763         return mAttrCollector;

1764     }

1765 

1766     /*

1767     ///////////////////////////////////////////////////////////////////////

1768     // Support for SAX XMLReader implementation

1769     ///////////////////////////////////////////////////////////////////////

1770      */

1771 

1772     public void fireSaxStartElement(ContentHandler h, Attributes attrs)

1773         throws SAXException

1774     {

1775         if (h != null) {

1776             // First; any ns declarations?

1777             int nsCount = mElementStack.getCurrentNsCount();

1778             for (int i = 0; i < nsCount; ++i) {

1779                 String prefix = mElementStack.getLocalNsPrefix(i);

1780                 String uri = mElementStack.getLocalNsURI(i);

1781                 h.startPrefixMapping((prefix == null) ? "" : prefix, uri);

1782             }

1783 

1784             // Then start-elem event itself:

1785             String uri = mElementStack.getNsURI();

1786             // Sax requires "" (not null) for ns uris...

1787             h.startElement((uri == null) ? "" : uri,

1788                            mElementStack.getLocalName(), getPrefixedName(), attrs);

1789         }

1790     }

1791 

1792     public void fireSaxEndElement(ContentHandler h)

1793         throws SAXException

1794     {

1795         if (h != null) {

1796             /* Order of events is reversed (wrt. start-element): first

1797              * the end tag event, then unbound prefixes

1798              */

1799             String uri = mElementStack.getNsURI();

1800             // Sax requires "" (not null) for ns uris...

1801             h.endElement((uri == null) ? "" : uri,

1802                          mElementStack.getLocalName(), getPrefixedName());

1803             // Any expiring ns declarations?

1804             int nsCount = mElementStack.getCurrentNsCount();

1805             for (int i = 0; i < nsCount; ++i) {

1806                 String prefix = mElementStack.getLocalNsPrefix(i);

1807                 //String nsUri = mElementStack.getLocalNsURI(i);

1808                 h.endPrefixMapping((prefix == null) ? "" : prefix);

1809             }

1810         }

1811     }

1812 

1813     public void fireSaxCharacterEvents(ContentHandler h)

1814         throws XMLStreamException, SAXException

1815     {

1816         if (h != null) {

1817             if (mPendingException != null) {

1818                 XMLStreamException sex = mPendingException;

1819                 mPendingException = null;

1820                 throw sex;

1821             }

1822             /* Let's not defer errors; SAXTest implies

1823              * it's expected errors are thrown right away

1824              */

1825             if (mTokenState < mStTextThreshold) {

1826                 finishToken(false);

1827             }

1828             mTextBuffer.fireSaxCharacterEvents(h);

1829         }

1830     }

1831 

1832     public void fireSaxSpaceEvents(ContentHandler h)

1833         throws XMLStreamException, SAXException

1834     {

1835         if (h != null) {

1836             if (mTokenState < mStTextThreshold) {

1837                 finishToken(false); // no error deferring

1838             }

1839             mTextBuffer.fireSaxSpaceEvents(h);

1840         }

1841     }

1842 

1843     public void fireSaxCommentEvent(LexicalHandler h)

1844         throws XMLStreamException, SAXException

1845     {

1846         if (h != null) {

1847             if (mTokenState < mStTextThreshold) {

1848                 finishToken(false); // no error deferring

1849             }

1850             mTextBuffer.fireSaxCommentEvent(h);

1851         }

1852     }

1853 

1854     public void fireSaxPIEvent(ContentHandler h)

1855         throws XMLStreamException, SAXException

1856     {

1857         if (h != null) {

1858             if (mTokenState < mStTextThreshold) {

1859                 finishToken(false); // no error deferring

1860             }

1861             h.processingInstruction(mCurrName, mTextBuffer.contentsAsString());

1862         }

1863     }

1864 

1865     /*

1866     ///////////////////////////////////////////////////////////////////////

1867     // Internal methods, config access

1868     ///////////////////////////////////////////////////////////////////////

1869      */

1870 

1871     protected final boolean hasConfigFlags(int flags) {

1872         return (mConfigFlags & flags) == flags;

1873     }

1874 

1875     /*

1876     ///////////////////////////////////////////////////////////////////////

1877     // Internal methods, parsing helper methods

1878     ///////////////////////////////////////////////////////////////////////

1879      */

1880 

1881     /**

1882      * @return Null, if keyword matches ok; String that contains erroneous

1883      *   keyword if not.

1884      */

1885     protected String checkKeyword(char c, String expected)

1886         throws XMLStreamException

1887     {

1888       int ptr = 0;

1889       int len = expected.length();

1890 

1891       while (expected.charAt(ptr) == c && ++ptr < len) {

1892           if (mInputPtr < mInputEnd) {

1893               c = mInputBuffer[mInputPtr++];

1894           } else {

1895               int ci = getNext();

1896               if (ci < 0) { // EOF

1897                   break;

1898               }

1899               c = (char) ci;

1900           }

1901       }

1902       

1903       if (ptr == len) {

1904           // Probable match... but let's make sure keyword is finished:

1905           int i = peekNext();

1906           if (i < 0 || (!isNameChar((char) i) && i != ':')) {

1907               return null;

1908           }

1909           // Nope, continues, need to find the rest:

1910       }

1911       

1912       StringBuilder sb = new StringBuilder(expected.length() + 16);

1913       sb.append(expected.substring(0, ptr));

1914       if (ptr < len) {

1915           sb.append(c);

1916       }

1917 

1918       while (true) {

1919           if (mInputPtr < mInputEnd) {

1920               c = mInputBuffer[mInputPtr++];

1921           } else {

1922               int ci = getNext();

1923               if (ci < 0) { // EOF

1924                   break;

1925               }

1926               c = (char) ci;

1927           }

1928           if (!isNameChar(c)) {

1929               // Let's push it back then

1930               --mInputPtr;

1931               break;

1932           }

1933           sb.append(c);

1934       }

1935 

1936       return sb.toString();

1937     }

1938 

1939     protected void checkCData() throws XMLStreamException

1940     {

1941         String wrong = checkKeyword(getNextCharFromCurrent(SUFFIX_IN_CDATA), "CDATA");

1942         if (wrong != null) {

1943             throwParseError("Unrecognized XML directive '"+wrong+"'; expected 'CDATA'.");

1944         }

1945         // Plus, need the bracket too:

1946         char c = getNextCharFromCurrent(SUFFIX_IN_CDATA);

1947         if (c != '[') {

1948             throwUnexpectedChar(c, "excepted '[' after '<![CDATA'");

1949         }

1950         // Cool, that's it!

1951     }

1952 

1953     /**

1954      * Method that will parse an attribute value enclosed in quotes, using

1955      * an {@link TextBuilder} instance. Will normalize white space inside

1956      * attribute value using default XML rules (change linefeeds to spaces

1957      * etc.; but won't use DTD information for further coalescing).

1958      *

1959      * @param openingQuote Quote character (single or double quote) for

1960      *   this attribute value

1961      * @param tb TextBuilder into which attribute value will be added

1962      */

1963     private final void parseAttrValue(char openingQuote, TextBuilder tb)

1964         throws XMLStreamException

1965     {

1966         char[] outBuf = tb.getCharBuffer();

1967         int outPtr = tb.getCharSize();

1968         // important! Underlying buffer may be shared, does not necessarily start from 0

1969         final int startingOffset = outPtr;

1970         final int maxAttrSize = mConfig.getMaxAttributeSize();

1971         int outLimit = Math.min(startingOffset+maxAttrSize, outBuf.length);

1972         final WstxInputSource currScope = mInput;

1973 

1974         while (true) {

1975             char c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++]

1976                 : getNextChar(SUFFIX_IN_ATTR_VALUE);

1977             // Let's do a quick for most attribute content chars:

1978             if (c <= '\'') {

1979                 if (c < CHAR_SPACE) {

1980                     if (c == '\n') {

1981                         markLF();

1982                     } else if (c == '\r') {

1983                         // 04-Mar-2006, TSa: Linefeed normalization only done if enabled -

1984                         //   specifically, 2-char lfs from int. entities are not coalesced.

1985                         //   Now... whether to try to count them as one or not... easier not to;

1986                         //   esp. since we may not be able to distinguish char entity originated ones

1987                         //   from real ones.

1988                         if (mNormalizeLFs) {

1989                             c = getNextChar(SUFFIX_IN_ATTR_VALUE);

1990                             if (c != '\n') { // nope, not 2-char lf (Mac?)

1991                                 --mInputPtr;

1992                             }

1993                         }

1994                         markLF();

1995                     } else if (c != '\t') {

1996                         throwInvalidSpace(c);

1997                     }

1998                     // Whatever it was, it'll be 'normal' space now.

1999                     c = CHAR_SPACE;

2000                 } else if (c == openingQuote) {

2001                     // 06-Aug-2004, TSa: Can get these via entities; only "real" end quotes in same

2002                     //    scope count. Note, too, that since  this will only be done at root level,

2003                     //    there's no need  to check for "runaway" values; they'll hit EOF

2004                     if (mInput == currScope) {

2005                         break;

2006                     }

2007                 } else if (c == '&') { // an entity of some sort...

2008                     int ch;

2009                     if (inputInBuffer() >= 3

2010                         && (ch = resolveSimpleEntity(true)) != 0) {

2011                         // Ok, fine, c is whatever it is

2012                         ;

2013                     } else { // full entity just changes buffer...

2014                         ch = fullyResolveEntity(false);

2015                         if (ch == 0) {

2016                             // need to skip output, thusly (expanded to new input source)

2017                             continue;

2018                         }

2019                     }

2020                     if (ch <= 0xFFFF) {

2021                         c = (char) ch;

2022                     } else {

2023                         ch -= 0x10000;

2024                         if (outPtr >= outLimit) {

2025                             outBuf = _checkAttributeLimit(tb, outBuf, outPtr, outPtr - startingOffset, maxAttrSize);

2026                             outLimit = Math.min(startingOffset+maxAttrSize, outBuf.length);

2027                         }

2028                         outBuf[outPtr++] = (char) ((ch >> 10)  + 0xD800);

2029                         c = (char) ((ch & 0x3FF)  + 0xDC00);

2030                     }

2031                 }

2032             } else if (c == '<') {

2033                 throwUnexpectedChar(c, SUFFIX_IN_ATTR_VALUE);

2034             }

2035 

2036             // Ok, let's just add char in, whatever it was

2037             if (outPtr >= outLimit) {

2038                 outBuf = _checkAttributeLimit(tb, outBuf, outPtr, outPtr - startingOffset, maxAttrSize);

2039                 outLimit = Math.min(startingOffset+maxAttrSize, outBuf.length);

2040             }

2041             outBuf[outPtr++] = c;

2042         }

2043 

2044         // Fine; let's tell TextBuild we're done:

2045         tb.setBufferSize(outPtr);

2046     }

2047 

2048     private final char[] _checkAttributeLimit(TextBuilder tb,

2049             char[] outBuf, int outPtr, int currAttrSize, int maxAttrSize)

2050         throws XMLStreamException

2051     {

2052         // Add +1 since we are at point where we are to append (at least) one more character

2053         verifyLimit("Maximum attribute size", maxAttrSize , currAttrSize+1);

2054         // just sanity check

2055         if (outPtr < outBuf.length) {

2056             ExceptionUtil.throwInternal("Expected either attr limit ("+maxAttrSize

2057                     +") >= currAttrSize ("+currAttrSize+") OR >= outBuf.length ("+outBuf.length+")");

2058         }

2059         return tb.bufferFull(1);

2060     }

2061 

2062     /*

2063     ///////////////////////////////////////////////////////////////////////

2064     // Internal methods, parsing prolog (before root) and epilog

2065     ///////////////////////////////////////////////////////////////////////

2066      */

2067     

2068     /**

2069      * Method called to find type of next token in prolog; either reading

2070      * just enough information to know the type (lazy parsing), or the

2071      * full contents (non-lazy)

2072      *

2073      * @return True if we hit EOI, false otherwise

2074      */

2075     private boolean nextFromProlog(boolean isProlog)

2076         throws XMLStreamException

2077     {

2078         int i;

2079 

2080         // First, do we need to finish currently open token?

2081         if (mTokenState < mStTextThreshold) {

2082             mTokenState = TOKEN_FULL_COALESCED;

2083             i = skipToken();

2084             // note: skipToken() updates the start location

2085         } else {

2086             // Need to update the start location...

2087             mTokenInputTotal = mCurrInputProcessed + mInputPtr;

2088             mTokenInputRow = mCurrInputRow;

2089             mTokenInputCol = mInputPtr - mCurrInputRowStart;

2090             i = getNext();

2091         }

2092 

2093         // Any white space to parse or skip?

2094         if (i <= CHAR_SPACE && i >= 0) {

2095             // Need to return as an event?

2096             if (hasConfigFlags(CFG_REPORT_PROLOG_WS)) {

2097                 mCurrToken = SPACE;

2098                 if (readSpacePrimary((char) i, true)) {

2099                     /* no need to worry about coalescing, since CDATA is not

2100                      * allowed at this level...

2101                      */

2102                     mTokenState = TOKEN_FULL_COALESCED;

2103                 } else {

2104                     if (mCfgLazyParsing) {

2105                         /* Let's not even bother checking if it's

2106                          * "long enough"; shouldn't usually matter, but few

2107                          * apps care to get multiple adjacent SPACE events...

2108                          */

2109                         mTokenState = TOKEN_STARTED;

2110                     } else {

2111                         readSpaceSecondary(true);

2112                         mTokenState = TOKEN_FULL_COALESCED;

2113                     }

2114                 }

2115                 return false;

2116             }

2117             // If not, can skip it right away

2118             --mInputPtr; // to handle linefeeds gracefully

2119             i = getNextAfterWS();

2120             if (i >= 0) {

2121                 // ... after which location has to be reset properly:

2122                 /* 11-Apr-2005, TSa: But note that we need to "move back"

2123                  *   column and total offset values by one, to compensate

2124                  *   for the char that was read (row can not have changed,

2125                  *   since it's non-WS, and thus non-lf/cr char)

2126                  */

2127                 mTokenInputTotal = mCurrInputProcessed + mInputPtr - 1;

2128                 mTokenInputRow = mCurrInputRow;

2129                 mTokenInputCol = mInputPtr - mCurrInputRowStart - 1;

2130             }

2131         }

2132 

2133         // Did we hit EOI?

2134         if (i < 0) {

2135             handleEOF(isProlog);

2136             mParseState = STATE_CLOSED;

2137             return true;

2138         }

2139 

2140         // Now we better have a lt...

2141         if (i != '<') {

2142             throwUnexpectedChar(i, (isProlog ? SUFFIX_IN_PROLOG : SUFFIX_IN_EPILOG)

2143                                 +"; expected '<'");

2144         }

2145 

2146         // And then it should be easy to figure out type:

2147         char c = getNextChar(isProlog ? SUFFIX_IN_PROLOG : SUFFIX_IN_EPILOG);

2148 

2149         if (c == '?') { // proc. inst

2150             mCurrToken = readPIPrimary();

2151         } else  if (c == '!') { // DOCTYPE or comment (or CDATA, but not legal here)

2152             // Need to figure out bit more first...

2153             nextFromPrologBang(isProlog);

2154         } else if (c == '/') { // end tag not allowed...

2155             if (isProlog) {

2156                 throwParseError("Unexpected character combination '</' in prolog.");

2157             }

2158             throwParseError("Unexpected character combination '</' in epilog (extra close tag?).");

2159         } else if (c == ':' || isNameStartChar(c)) {

2160             // Root element, only allowed after prolog

2161             if (!isProlog) {

2162                 /* This call will throw an exception if there's a problem;

2163                  * otherwise set up everything properly

2164                  */

2165                 mCurrToken = handleExtraRoot(c); // will check input parsing mode...

2166                 return false;

2167             }

2168             handleRootElem(c);

2169             mCurrToken = START_ELEMENT;

2170         } else {

2171             throwUnexpectedChar(c, (isProlog ? SUFFIX_IN_PROLOG : SUFFIX_IN_EPILOG)

2172                                 +", after '<'.");

2173         }

2174 

2175         // Ok; final twist, maybe we do NOT want lazy parsing?

2176         if (!mCfgLazyParsing && mTokenState < mStTextThreshold) {

2177             finishToken(false);

2178         }

2179 

2180         return false;

2181     }

2182 

2183     protected void handleRootElem(char c)

2184         throws XMLStreamException

2185     {

2186         mParseState = STATE_TREE;

2187         initValidation();

2188         handleStartElem(c);

2189         // Does name match with DOCTYPE declaration (if any)?

2190         // 20-Jan-2006, TSa: Only check this is we are (DTD) validating...

2191         if (mRootLName != null) {

2192             if (hasConfigFlags(CFG_VALIDATE_AGAINST_DTD)) {

2193                 if (!mElementStack.matches(mRootPrefix, mRootLName)) {

2194                     String actual = (mRootPrefix == null) ? mRootLName

2195                         : (mRootPrefix + ":" + mRootLName);

2196                     reportValidationProblem(ErrorConsts.ERR_VLD_WRONG_ROOT, actual, mRootLName);

2197                 }

2198             }

2199         }

2200     }

2201 

2202     /**

2203      * Method called right before the document root element is handled.

2204      * The default implementation is empty; validating stream readers

2205      * should override the method and do whatever initialization is

2206      * necessary

2207      */

2208     protected void initValidation()

2209         throws XMLStreamException

2210     {

2211         ; // nothing to do here

2212     }

2213 

2214     protected int handleEOF(boolean isProlog)

2215         throws XMLStreamException

2216     {

2217         /* 19-Aug-2006, TSa: mSecondaryToken needs to be initialized to

2218          *   END_DOCUMENT so we'll know it hasn't been yet accessed.

2219          */

2220         mCurrToken = mSecondaryToken = END_DOCUMENT;

2221 

2222         /* Although buffers have most likely already been recycled,

2223          * let's call this again just in case. At this point we can

2224          * safely discard any contents

2225          */

2226         mTextBuffer.recycle(true); // true -> clean'n recycle

2227         // It's ok to get EOF from epilog but not from prolog

2228         if (isProlog) {

2229             throwUnexpectedEOF(SUFFIX_IN_PROLOG);

2230         }

2231         return mCurrToken;

2232     }

2233 

2234     /**

2235      * Method called if a root-level element is found after the main

2236      * root element was closed. This is legal in multi-doc parsing

2237      * mode (and in fragment mode), but not in the default single-doc

2238      * mode. 

2239      * @param c Character passed in (not currently used)

2240      *

2241      * @return Token to return

2242      */

2243     private int handleExtraRoot(char c)

2244         throws XMLStreamException

2245     {

2246         if (!mConfig.inputParsingModeDocuments()) {

2247             /* Has to be single-doc mode, since fragment mode

2248              * should never get here (since fragment mode never has epilog

2249              * or prolog modes)

2250              */

2251             throwParseError("Illegal to have multiple roots (start tag in epilog?).");

2252         }

2253         // Need to push back the char, since it is the first char of elem name

2254         --mInputPtr;

2255         return handleMultiDocStart(START_ELEMENT);

2256     }

2257 

2258     /**

2259      * Method called when an event was encountered that indicates document

2260      * boundary in multi-doc mode. Needs to trigger dummy

2261      * END_DOCUMENT/START_DOCUMENT event combination, followed by the

2262      * handling of the original event.

2263      *

2264      * @return Event type to return

2265      */

2266     protected int handleMultiDocStart(int nextEvent)

2267     {

2268         mParseState = STATE_MULTIDOC_HACK;

2269         mTokenState = TOKEN_FULL_COALESCED; // this is a virtual event after all...

2270         mSecondaryToken = nextEvent;

2271         return END_DOCUMENT;

2272     }

2273 

2274     /**

2275      * Method called to get the next event when we are "multi-doc hack" mode,

2276      * during which extra END_DOCUMENT/START_DOCUMENT events need to be

2277      * returned.

2278      */

2279     private int nextFromMultiDocState()

2280         throws XMLStreamException

2281     {

2282         if (mCurrToken == END_DOCUMENT) {

2283             /* Ok; this is the initial step; need to advance: need to parse

2284              * xml declaration if that was the cause, otherwise just clear

2285              * up values.

2286              */

2287             if (mSecondaryToken == START_DOCUMENT) {

2288                 handleMultiDocXmlDecl();

2289             } else { // Nah, DOCTYPE or start element... just need to clear decl info:

2290                 mDocXmlEncoding = null;

2291                 mDocXmlVersion = XmlConsts.XML_V_UNKNOWN;

2292                 mDocStandalone = DOC_STANDALONE_UNKNOWN;

2293             }

2294             return START_DOCUMENT;

2295         }

2296         if (mCurrToken == START_DOCUMENT) {

2297             mParseState = STATE_PROLOG; // yup, we are now officially in prolog again...

2298 

2299             // Had an xml decl (ie. "real" START_DOCUMENT event)

2300             if (mSecondaryToken == START_DOCUMENT) { // was a real xml decl

2301                 nextFromProlog(true);

2302                 return mCurrToken;

2303             }

2304             // Nah, start elem or DOCTYPE

2305             if (mSecondaryToken == START_ELEMENT) {

2306                 handleRootElem(getNextChar(SUFFIX_IN_ELEMENT));

2307                 return START_ELEMENT;

2308             }

2309             if (mSecondaryToken == DTD) {

2310                 mStDoctypeFound = true;

2311                 startDTD();

2312                 return DTD;

2313             }

2314         }

2315         throw new IllegalStateException("Internal error: unexpected state; current event "

2316                                         +tokenTypeDesc(mCurrToken)+", sec. state: "+tokenTypeDesc(mSecondaryToken));

2317     }

2318 

2319     protected void handleMultiDocXmlDecl()

2320         throws XMLStreamException

2321     {

2322         // Let's default these first

2323         mDocStandalone = DOC_STANDALONE_UNKNOWN;

2324         mDocXmlEncoding = null;

2325 

2326         char c = getNextInCurrAfterWS(SUFFIX_IN_XML_DECL);

2327         String wrong = checkKeyword(c, XmlConsts.XML_DECL_KW_VERSION);

2328         if (wrong != null) {

2329             throwParseError(ErrorConsts.ERR_UNEXP_KEYWORD, wrong, XmlConsts.XML_DECL_KW_VERSION);

2330         }

2331         c = skipEquals(XmlConsts.XML_DECL_KW_VERSION, SUFFIX_IN_XML_DECL);

2332         TextBuffer tb = mTextBuffer;

2333         tb.resetInitialized();

2334         parseQuoted(XmlConsts.XML_DECL_KW_VERSION, c, tb);

2335         

2336         if (tb.equalsString(XmlConsts.XML_V_10_STR)) {

2337             mDocXmlVersion = XmlConsts.XML_V_10;

2338             mXml11 = false;

2339         } else if (tb.equalsString(XmlConsts.XML_V_11_STR)) {

2340             mDocXmlVersion = XmlConsts.XML_V_11;

2341             mXml11 = true;

2342         } else {

2343             mDocXmlVersion = XmlConsts.XML_V_UNKNOWN;

2344             mXml11 = false;

2345             throwParseError("Unexpected xml version '"+tb.toString()+"'; expected '"+XmlConsts.XML_V_10_STR+"' or '"+XmlConsts.XML_V_11_STR+"'");

2346         }

2347         

2348         c = getNextInCurrAfterWS(SUFFIX_IN_XML_DECL);

2349         

2350         if (c != '?') { // '?' signals end...

2351             if (c == 'e') { // encoding

2352                 wrong = checkKeyword(c, XmlConsts.XML_DECL_KW_ENCODING);

2353                 if (wrong != null) {

2354                     throwParseError(ErrorConsts.ERR_UNEXP_KEYWORD, wrong, XmlConsts.XML_DECL_KW_ENCODING);

2355                 }

2356                 c = skipEquals(XmlConsts.XML_DECL_KW_ENCODING, SUFFIX_IN_XML_DECL);

2357                 tb.resetWithEmpty();

2358                 parseQuoted(XmlConsts.XML_DECL_KW_ENCODING, c, tb);

2359                 mDocXmlEncoding = tb.toString();

2360                 /* should we verify encoding at this point? let's not, for now;

2361                  * since it's for information only, first declaration from

2362                  * bootstrapper is used for the whole stream.

2363                  */

2364                 c = getNextInCurrAfterWS(SUFFIX_IN_XML_DECL);

2365             } else if (c != 's') {

2366                 throwUnexpectedChar(c, " in xml declaration; expected either 'encoding' or 'standalone' pseudo-attribute");

2367             }

2368             

2369             // Standalone?

2370             if (c == 's') {

2371                 wrong = checkKeyword(c, XmlConsts.XML_DECL_KW_STANDALONE);

2372                 if (wrong != null) {

2373                     throwParseError(ErrorConsts.ERR_UNEXP_KEYWORD, wrong, XmlConsts.XML_DECL_KW_STANDALONE);

2374                 }

2375                 c = skipEquals(XmlConsts.XML_DECL_KW_STANDALONE, SUFFIX_IN_XML_DECL);

2376                 tb.resetWithEmpty();

2377                 parseQuoted(XmlConsts.XML_DECL_KW_STANDALONE, c, tb);

2378                 if (tb.equalsString(XmlConsts.XML_SA_YES)) {

2379                     mDocStandalone = DOC_STANDALONE_YES;

2380                 } else if (tb.equalsString(XmlConsts.XML_SA_NO)) {

2381                     mDocStandalone = DOC_STANDALONE_NO;

2382                 } else {

2383                     throwParseError("Unexpected xml '"+XmlConsts.XML_DECL_KW_STANDALONE+"' pseudo-attribute value '"

2384                                     +tb.toString()+"'; expected '"+XmlConsts.XML_SA_YES+"' or '"+

2385                                     XmlConsts.XML_SA_NO+"'");

2386                }

2387                 c = getNextInCurrAfterWS(SUFFIX_IN_XML_DECL);

2388             }

2389         }

2390         

2391         if (c != '?') {

2392             throwUnexpectedChar(c, " in xml declaration; expected '?>' as the end marker");

2393         }

2394         c = getNextCharFromCurrent(SUFFIX_IN_XML_DECL);

2395         if (c != '>') {

2396             throwUnexpectedChar(c, " in xml declaration; expected '>' to close the declaration");

2397         }

2398     }

2399 

2400     /**

2401      * Method that checks that input following is of form

2402      * '[S]* '=' [S]*' (as per XML specs, production #25).

2403      * Will push back non-white space characters as necessary, in

2404      * case no equals char is encountered.

2405      */

2406     protected final char skipEquals(String name, String eofMsg)

2407         throws XMLStreamException

2408     {

2409         char c = getNextInCurrAfterWS(eofMsg);

2410         if (c != '=') {

2411             throwUnexpectedChar(c, " in xml declaration; expected '=' to follow pseudo-attribute '"+name+"'");

2412         }

2413         // trailing space?

2414         return getNextInCurrAfterWS(eofMsg);

2415     }

2416 

2417     /**

2418      * Method called to parse quoted xml declaration pseudo-attribute values.

2419      * Works similar to attribute value parsing, except no entities can be

2420      * included, and in general need not be as picky (since caller is to

2421      * verify contents). One exception is that we do check for linefeeds

2422      * and lt chars, since they generally would indicate problems and

2423      * are useful to catch early on (can happen if a quote is missed etc)

2424      *<p>

2425      * Note: since it'll be called at most 3 times per document, this method

2426      * is not optimized too much.

2427      */

2428     protected final void parseQuoted(String name, char quoteChar, TextBuffer tbuf)

2429         throws XMLStreamException

2430     {

2431         if (quoteChar != '"' && quoteChar != '\'') {

2432             throwUnexpectedChar(quoteChar, " in xml declaration; waited ' or \" to start a value for pseudo-attribute '"+name+"'");

2433         }

2434         char[] outBuf = tbuf.getCurrentSegment();

2435         int outPtr = 0;

2436 

2437         while (true) {

2438             char c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++]

2439                 : getNextChar(SUFFIX_IN_XML_DECL);

2440             

2441             if (c == quoteChar) {

2442                 break;

2443             }

2444             if (c < CHAR_SPACE || c == '<') {

2445                 throwUnexpectedChar(c, SUFFIX_IN_XML_DECL);

2446             } else if (c == CHAR_NULL) {

2447                 throwNullChar();

2448             }

2449             if (outPtr >= outBuf.length) {

2450                 outBuf = tbuf.finishCurrentSegment();

2451                 outPtr = 0;

2452             }

2453             outBuf[outPtr++] = c;

2454         }

2455         tbuf.setCurrentLength(outPtr);

2456     }

2457 

2458     /**

2459      * Called after character sequence '&lt;!' has been found; expectation is

2460      * that it'll either be DOCTYPE declaration (if we are in prolog and

2461      * haven't yet seen one), or a comment. CDATA is not legal here;

2462      * it would start same way otherwise.

2463      */

2464     private void nextFromPrologBang(boolean isProlog)

2465         throws XMLStreamException

2466     {

2467         int i = getNext();

2468         if (i < 0) {

2469             throwUnexpectedEOF(SUFFIX_IN_PROLOG);

2470         }

2471         if (i == 'D') { // Doctype declaration?

2472             String keyw = checkKeyword('D', "DOCTYPE");

2473             if (keyw != null) {

2474                 throwParseError("Unrecognized XML directive '<!"+keyw+"' (misspelled DOCTYPE?).");

2475             }

2476             

2477             if (!isProlog) {

2478                 // Still possibly ok in multidoc mode...

2479                 if (mConfig.inputParsingModeDocuments()) {

2480                     if (!mStDoctypeFound) {

2481                         mCurrToken = handleMultiDocStart(DTD);

2482                         return;

2483                     }

2484                 } else {

2485                     throwParseError(ErrorConsts.ERR_DTD_IN_EPILOG);

2486                 }

2487             }

2488             if (mStDoctypeFound) {

2489                 throwParseError(ErrorConsts.ERR_DTD_DUP);

2490             }

2491             mStDoctypeFound = true;

2492             // Ok; let's read main input (all but internal subset)

2493             mCurrToken = DTD;

2494             startDTD();

2495             return;

2496         } else if (i == '-') { // comment

2497             char c = getNextChar(isProlog ? SUFFIX_IN_PROLOG : SUFFIX_IN_EPILOG);

2498             if (c != '-') {

2499                 throwUnexpectedChar(i, " (malformed comment?)");

2500             }

2501             // Likewise, let's delay actual parsing/skipping.

2502             mTokenState = TOKEN_STARTED;

2503             mCurrToken = COMMENT;

2504             return;

2505         } else if (i == '[') { // erroneous CDATA?

2506             i = peekNext();

2507             // Let's just add bit of heuristics, to get better error msg

2508             if (i == 'C') {

2509                 throwUnexpectedChar(i, ErrorConsts.ERR_CDATA_IN_EPILOG);

2510             }

2511         }

2512 

2513         throwUnexpectedChar(i, " after '<!' (malformed comment?)");

2514     }

2515 

2516     /**

2517      * Method called to parse through most of DOCTYPE declaration; excluding

2518      * optional internal subset.

2519      */

2520     private void startDTD()

2521         throws XMLStreamException

2522     {

2523         /* 21-Nov-2004, TSa: Let's make sure that the buffer gets cleared

2524          *   at this point. Need not start branching yet, however, since

2525          *   DTD event is often skipped.

2526          */

2527         mTextBuffer.resetInitialized();

2528 

2529         /* So, what we need is:<code>

2530          *  <!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>

2531          *</code>. And we have already read the DOCTYPE token.

2532          */

2533 

2534         char c = getNextInCurrAfterWS(SUFFIX_IN_DTD);

2535         if (mCfgNsEnabled) {

2536             String str = parseLocalName(c);

2537             c = getNextChar(SUFFIX_IN_DTD);

2538             if (c == ':') { // Ok, got namespace and local name

2539                 mRootPrefix = str;

2540                 mRootLName = parseLocalName(getNextChar(SUFFIX_EOF_EXP_NAME));

2541             } else if (c <= CHAR_SPACE || c == '[' || c == '>') {

2542                 // ok to get white space or '[', or closing '>'

2543                 --mInputPtr; // pushback

2544                 mRootPrefix = null;

2545                 mRootLName = str;

2546             } else {

2547                 throwUnexpectedChar(c, " in DOCTYPE declaration; expected '[' or white space.");

2548             }

2549         } else {

2550             mRootLName = parseFullName(c);

2551             mRootPrefix = null;

2552         }

2553 

2554         // Ok, fine, what next?

2555         c = getNextInCurrAfterWS(SUFFIX_IN_DTD);

2556         if (c != '[' && c != '>') {

2557             String keyw = null;

2558             

2559             if (c == 'P') {

2560                 keyw = checkKeyword(getNextChar(SUFFIX_IN_DTD), "UBLIC");

2561                 if (keyw != null) {

2562                     keyw = "P" + keyw;

2563                 } else {

2564                     if (!skipWS(getNextChar(SUFFIX_IN_DTD))) {

2565                         throwUnexpectedChar(c, SUFFIX_IN_DTD+"; expected a space between PUBLIC keyword and public id");

2566                     }

2567                     c = getNextCharFromCurrent(SUFFIX_IN_DTD);

2568                     if (c != '"' && c != '\'') {

2569                         throwUnexpectedChar(c, SUFFIX_IN_DTD+"; expected a public identifier.");

2570                     }

2571                     mDtdPublicId = parsePublicId(c, SUFFIX_IN_DTD);

2572                     if (mDtdPublicId.length() == 0) {

2573                         // According to XML specs, this isn't illegal?

2574                         // however, better report it as empty, not null.

2575                         //mDtdPublicId = null;

2576                     }

2577                     if (!skipWS(getNextChar(SUFFIX_IN_DTD))) {

2578                         throwUnexpectedChar(c, SUFFIX_IN_DTD+"; expected a space between public and system identifiers");

2579                     }

2580                     c = getNextCharFromCurrent(SUFFIX_IN_DTD);

2581                     if (c != '"' && c != '\'') {

2582                         throwParseError(SUFFIX_IN_DTD+"; expected a system identifier.");

2583                     }

2584                     mDtdSystemId = parseSystemId(c, mNormalizeLFs, SUFFIX_IN_DTD);

2585                     if (mDtdSystemId.length() == 0) {

2586                         // According to XML specs, this isn't illegal?

2587                         // however, better report it as empty, not null.

2588                         //mDtdSystemId = null;

2589                     }

2590                 }

2591             } else if (c == 'S') {

2592                 mDtdPublicId = null;

2593                 keyw = checkKeyword(getNextChar(SUFFIX_IN_DTD), "YSTEM");

2594                 if (keyw != null) {

2595                     keyw = "S" + keyw;

2596                 } else {

2597                     c = getNextInCurrAfterWS(SUFFIX_IN_DTD);

2598                     if (c != '"' && c != '\'') {

2599                         throwUnexpectedChar(c, SUFFIX_IN_DTD+"; expected a system identifier.");

2600                     }

2601                     mDtdSystemId = parseSystemId(c, mNormalizeLFs, SUFFIX_IN_DTD);

2602                     if (mDtdSystemId.length() == 0) {

2603                         // According to XML specs, this isn't illegal?

2604                         mDtdSystemId = null;

2605                     }

2606                 }

2607             } else {

2608                 if (!isNameStartChar(c)) {

2609                     throwUnexpectedChar(c, SUFFIX_IN_DTD+"; expected keywords 'PUBLIC' or 'SYSTEM'.");

2610                 } else {

2611                     --mInputPtr;

2612                     keyw = checkKeyword(c, "SYSTEM"); // keyword passed in doesn't matter

2613                 }

2614             }

2615             

2616             if (keyw != null) { // error:

2617                 throwParseError("Unexpected keyword '"+keyw+"'; expected 'PUBLIC' or 'SYSTEM'");

2618             }

2619             

2620             // Ok, should be done with external DTD identifier:

2621             c = getNextInCurrAfterWS(SUFFIX_IN_DTD);

2622         }

2623         

2624         if (c == '[') { // internal subset

2625             ;

2626         } else {

2627             if (c != '>') {

2628                 throwUnexpectedChar(c, SUFFIX_IN_DTD+"; expected closing '>'.");

2629             }

2630         }

2631         

2632         /* Actually, let's just push whatever char it is, back; this way

2633          * we can lazily initialize text buffer with DOCTYPE declaration

2634          * if/as necessary, even if there's no internal subset.

2635          */

2636         --mInputPtr; // pushback

2637         mTokenState = TOKEN_STARTED;

2638     }

2639 

2640     /**

2641      * This method gets called to handle remainder of DOCTYPE declaration,

2642      * essentially the optional internal subset. This class implements the

2643      * basic "ignore it" functionality, but can optionally still store copy

2644      * of the contents to the read buffer.

2645      *<p>

2646      * NOTE: Since this default implementation will be overridden by

2647      * some sub-classes, make sure you do NOT change the method signature.

2648      *

2649      * @param copyContents If true, will copy contents of the internal

2650      *   subset of DOCTYPE declaration

2651      *   in the text buffer; if false, will just completely ignore the

2652      *   subset (if one found).

2653      */

2654     protected void finishDTD(boolean copyContents)

2655         throws XMLStreamException

2656     {

2657         /* We know there are no spaces, as this char was read and pushed

2658          * back earlier...

2659          */

2660         char c = getNextChar(SUFFIX_IN_DTD);

2661         if (c == '[') {

2662             // Do we need to get contents as text too?

2663             if (copyContents) {

2664                 ((BranchingReaderSource) mInput).startBranch(mTextBuffer, mInputPtr, mNormalizeLFs);

2665             }

2666 

2667             try {

2668                 MinimalDTDReader.skipInternalSubset(this, mInput, mConfig);

2669             } finally {

2670                 /* Let's close branching in any and every case (may allow

2671                  * graceful recovery in error cases in future

2672                  */

2673                 if (copyContents) {

2674                     /* Need to "push back" ']' got in the succesful case

2675                      * (that's -1 part below);

2676                      * in error case it'll just be whatever last char was.

2677                      */

2678                     ((BranchingReaderSource) mInput).endBranch(mInputPtr-1);

2679                 }

2680             }

2681 

2682             // And then we need closing '>'

2683             c = getNextCharAfterWS(SUFFIX_IN_DTD_INTERNAL);

2684         }

2685 

2686         if (c != '>') {

2687             throwUnexpectedChar(c, "; expected '>' to finish DOCTYPE declaration.");

2688         }

2689     }

2690 

2691     /*

2692     ///////////////////////////////////////////////////////////////////////

2693     // Internal methods, main parsing (inside root)

2694     ///////////////////////////////////////////////////////////////////////

2695      */

2696 

2697     /**

2698      * Method called to parse beginning of the next event within

2699      * document tree, and return its type.

2700      */

2701     private final int nextFromTree()

2702         throws XMLStreamException

2703     {

2704         int i;

2705 

2706         // First, do we need to finish currently open token?

2707         if (mTokenState < mStTextThreshold) {

2708             // No need to update state... will get taken care of

2709             /* 03-Mar-2006, TSa: Let's add a sanity check here, temporarily,

2710              *   to ensure we never skip any textual content when it is

2711              *   to be validated

2712              */

2713             if (mVldContent == XMLValidator.CONTENT_ALLOW_VALIDATABLE_TEXT) {

2714                 if (mCurrToken == CHARACTERS || mCurrToken == CDATA) { // should never happen

2715                     throwParseError("Internal error: skipping validatable text");

2716                 }

2717             }

2718             i = skipToken();

2719             // note: skipToken() updates the start location

2720         } else {

2721             // Start/end elements are never unfinished (ie. are always

2722             // completely read in)

2723             if (mCurrToken == START_ELEMENT) {

2724                 // Start tag may be an empty tag:

2725                 if (mStEmptyElem) {

2726                     // and if so, we'll then get 'virtual' close tag:

2727                     mStEmptyElem = false;

2728                     // ... and location info is correct already

2729                     // 27-Feb-2009, TSa: but we do have to handle validation of the end tag now

2730                     int vld = mElementStack.validateEndElement();

2731                     mVldContent = vld;

2732                     mValidateText = (vld == XMLValidator.CONTENT_ALLOW_VALIDATABLE_TEXT);

2733                     return END_ELEMENT;

2734                 }

2735             } else if (mCurrToken == END_ELEMENT) {

2736                 // Close tag removes current element from stack

2737                 if (!mElementStack.pop()) { // false if root closed

2738                     // if so, we'll get to epilog, unless in fragment mode

2739                     if (!mConfig.inputParsingModeFragment()) {

2740                         return closeContentTree();

2741                     }

2742                     // in fragment mode, fine, we'll just continue

2743                 }

2744             } else if (mCurrToken == CDATA && mTokenState <= TOKEN_PARTIAL_SINGLE) {

2745                 /* Just returned a partial CDATA... that's ok, just need to

2746                  * know we won't get opening marker etc.

2747                  * The tricky part here is just to ensure there's at least

2748                  * one character; if not, need to just discard the empty

2749                  * 'event' (note that it is possible to have an initial

2750                  * empty CDATA event for truly empty CDATA block; but not

2751                  * partial ones!). Let's just read it like a new

2752                  * CData section first:

2753                  */

2754                 // First, need to update the start location...

2755                 mTokenInputTotal = mCurrInputProcessed + mInputPtr;

2756                 mTokenInputRow = mCurrInputRow;

2757                 mTokenInputCol = mInputPtr - mCurrInputRowStart;

2758                 char c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++]

2759                     : getNextChar(SUFFIX_IN_CDATA);

2760                 if (readCDataPrimary(c)) { // got it all!

2761                     // note: can not be in coalescing mode at this point;

2762                     // as we can never have partial cdata without unfinished token

2763                     // ... still need to have gotten at least 1 char though:

2764                     if (mTextBuffer.size() > 0) {

2765                         return CDATA;

2766                     }

2767                     // otherwise need to continue and parse the next event

2768                 } else {

2769                     // Hmmh. Have to verify we get at least one char from

2770                     // CData section; if so, we are good to go for now;

2771                     // if not, need to get that damn char first:

2772                     if (mTextBuffer.size() == 0

2773                             && readCDataSecondary(mCfgLazyParsing

2774                                     ? 1 : mShortestTextSegment)) {

2775                         // Ok, all of it read

2776                         if (mTextBuffer.size() > 0) {

2777                             // And had some contents

2778                             mTokenState = TOKEN_FULL_SINGLE;

2779                             return CDATA;

2780                         }

2781                         // if nothing read, we'll just fall back (see below)

2782                     } else { // good enough!

2783                         mTokenState = TOKEN_PARTIAL_SINGLE;

2784                         return CDATA;

2785                     }

2786                 }

2787                 

2788                 /* If we get here, it was the end of the section, without

2789                  * any more text inside CDATA, so let's just continue

2790                  */

2791             }

2792             // Once again, need to update the start location info:

2793             mTokenInputTotal = mCurrInputProcessed + mInputPtr;

2794             mTokenInputRow = mCurrInputRow;

2795             mTokenInputCol = mInputPtr - mCurrInputRowStart;

2796             i = getNext();

2797         }

2798 

2799         if (i < 0) {

2800             // 07-Oct-2005, TSa: May be ok in fragment mode (not otherwise),

2801             //  but we can just check if element stack has anything, as that handles all cases

2802             if (!mElementStack.isEmpty()) {

2803                 throwUnexpectedEOF();

2804             }

2805             return handleEOF(false);

2806         }

2807 

2808         /* 26-Aug-2004, TSa: We have to deal with entities, usually, if

2809          *   they are the next thing; even in non-expanding mode there

2810          *   are entities and then there are entities... :-)

2811          *   Let's start with char entities; they can be expanded right away.

2812          */

2813         while (i == '&') {

2814             mWsStatus = ALL_WS_UNKNOWN;

2815 

2816             /* 30-Aug-2004, TSa: In some contexts entities are not

2817              *    allowed in any way, shape or form:

2818              */

2819             if (mVldContent == XMLValidator.CONTENT_ALLOW_NONE) {

2820                 /* May be char entity, general entity; whatever it is it's

2821                  * invalid!

2822                  */

2823                 reportInvalidContent(ENTITY_REFERENCE);

2824             }

2825 

2826             /* Need to call different methods based on whether we can do

2827              * automatic entity expansion or not:

2828              */

2829             int ch = mCfgReplaceEntities ?

2830                 fullyResolveEntity(true) : resolveCharOnlyEntity(true);

2831 

2832             if (ch != 0) {

2833                 /* Char-entity... need to initialize text output buffer, then;

2834                  * independent of whether it'll be needed or not.

2835                  */

2836                 /* 30-Aug-2004, TSa: In some contexts only white space is

2837                  *   accepted...

2838                  */

2839                 if (mVldContent <= XMLValidator.CONTENT_ALLOW_WS) {

2840                     // As per xml specs, only straight white space is legal

2841                     if (ch > CHAR_SPACE) {

2842                         /* 21-Sep-2008, TSa: Used to also require a call to

2843                          *   'mElementStack.reallyValidating', if only ws

2844                          *   allowed, to cover the case where non-typing-dtd

2845                          *   was only used to discover SPACE type. But

2846                          *   now that we have CONTENT_ALLOW_WS_NONSTRICT,

2847                          *   shouldn't be needed.

2848                          */

2849                         //if (mVldContent < XMLValidator.CONTENT_ALLOW_WS || mElementStack.reallyValidating()) {

2850                         reportInvalidContent(CHARACTERS);

2851                     }

2852                 }

2853                 TextBuffer tb = mTextBuffer;

2854                 tb.resetInitialized();

2855                 if (ch <= 0xFFFF) {

2856                     tb.append((char) ch);

2857                 } else {

2858                     ch -= 0x10000;

2859                     tb.append((char) ((ch >> 10)  + 0xD800));

2860                     tb.append((char) ((ch & 0x3FF)  + 0xDC00));

2861                 }

2862                 mTokenState = TOKEN_STARTED;

2863                 return CHARACTERS;

2864             }

2865 

2866             /* Nope; was a general entity... in auto-mode, it's now been

2867              * expanded; in non-auto, need to figure out entity itself.

2868              */

2869             if (!mCfgReplaceEntities|| mCfgTreatCharRefsAsEntities) {

2870                 if (!mCfgTreatCharRefsAsEntities) {

2871                     final EntityDecl ed = resolveNonCharEntity();

2872                     // Note: ed may still be null at this point

2873                     mCurrEntity = ed;

2874                 }

2875                 // Note: ed may still be null at this point

2876                 mTokenState = TOKEN_FULL_COALESCED;

2877                 /*

2878                 // let's not worry about non-parsed entities, since this is unexpanded mode

2879                 // ... although it'd be an error either way? Should we report it?

2880                 if (ed != null && !ed.isParsed()) {

2881                     throwParseError("Reference to unparsed entity '"+ed.getName()+"' from content not allowed.");

2882                 }

2883                 */

2884                 return ENTITY_REFERENCE;

2885             }

2886 

2887             // Otherwise automatic expansion fine; just need the next char:

2888             i = getNextChar(SUFFIX_IN_DOC);

2889         }

2890 

2891         if (i == '<') { // Markup

2892             // And then it should be easy to figure out type:

2893             char c = getNextChar(SUFFIX_IN_ELEMENT);

2894             if (c == '?') { // proc. inst

2895                 // 30-Aug-2004, TSa: Not legal for EMPTY elements

2896                 if (mVldContent == XMLValidator.CONTENT_ALLOW_NONE) {

2897                     reportInvalidContent(PROCESSING_INSTRUCTION);

2898                 }

2899                 return readPIPrimary();

2900             }

2901             

2902             if (c == '!') { // CDATA or comment

2903                 // Need to figure out bit more first...

2904                 int type = nextFromTreeCommentOrCData();

2905                 // 30-Aug-2004, TSa: Not legal for EMPTY elements

2906                 if (mVldContent == XMLValidator.CONTENT_ALLOW_NONE) {

2907                     reportInvalidContent(type);

2908                 }

2909                 return type;

2910             }

2911             if (c == '/') { // always legal (if name matches etc)

2912                 readEndElem();

2913                 return END_ELEMENT;

2914             }

2915 

2916             if (c == ':' || isNameStartChar(c)) {

2917                 /* Note: checking for EMPTY content type is done by the

2918                  * validator, no need to check here

2919                  */

2920                 handleStartElem(c);

2921                 return START_ELEMENT;

2922             }

2923             if (c == '[') {

2924                 throwUnexpectedChar(c, " in content after '<' (malformed <![CDATA[]] directive?)");

2925             }

2926             throwUnexpectedChar(c, " in content after '<' (malformed start element?).");

2927         }

2928 

2929         /* Text... ok; better parse the 'easy' (consequtive) portions right

2930          * away, since that's practically free (still need to scan those

2931          * characters no matter what, even if skipping).

2932          */

2933         /* But first, do we expect to get ignorable white space (only happens

2934          * in validating mode)? If so, needs bit different handling:

2935          */

2936         if (mVldContent <= XMLValidator.CONTENT_ALLOW_WS_NONSTRICT) {

2937             if (mVldContent == XMLValidator.CONTENT_ALLOW_NONE) {

2938                 if (mElementStack.reallyValidating()) {

2939                     reportInvalidContent(CHARACTERS);

2940                 }

2941             }

2942             if (i <= CHAR_SPACE) {

2943                 /* Note: need not worry about coalescing, since non-whitespace

2944                  * text is illegal (ie. can not have CDATA)

2945                  */

2946                 mTokenState = (readSpacePrimary((char) i, false)) ?

2947                     TOKEN_FULL_COALESCED : TOKEN_STARTED;

2948                 return SPACE;

2949             }

2950             // Problem if we are really validating; otherwise not

2951             if (mElementStack.reallyValidating()) {

2952                 reportInvalidContent(CHARACTERS);

2953             }

2954             /* otherwise, we know it's supposed to contain just space (or

2955              * be empty), but as we are not validating it's not an error

2956              * for this not to be true. Type should be changed to

2957              * CHARACTERS tho.

2958              */

2959         }

2960 

2961         // Further, when coalescing, can not be sure if we REALLY got it all

2962         if (readTextPrimary((char) i)) { // reached following markup

2963             mTokenState = TOKEN_FULL_SINGLE;

2964         } else {

2965             // If not coalescing, this may be enough for current event

2966             if (!mCfgCoalesceText

2967                 && mTextBuffer.size() >= mShortestTextSegment) {

2968                 mTokenState = TOKEN_PARTIAL_SINGLE;

2969             } else {

2970                 mTokenState = TOKEN_STARTED;

2971             }

2972         }

2973         return CHARACTERS;

2974     }

2975 

2976     /**

2977      * Method called when advancing stream past the end tag that closes

2978      * the root element of the open document.

2979      * Document can be either the singular one, in regular mode, or one of

2980      * possibly multiple, in multi-doc mode: this method is never called

2981      * in fragment mode. Method needs to update state properly and

2982      * parse following epilog event (if any).

2983      *

2984      * @return Event following end tag of the root elemennt, if any;

2985      *   END_DOCUMENT otherwis.e

2986      */

2987     private int closeContentTree()

2988         throws XMLStreamException

2989     {

2990         mParseState = STATE_EPILOG;

2991         // this call will update the location too...

2992         if (nextFromProlog(false)) {

2993             mSecondaryToken = 0;

2994         }

2995         /* 10-Apr-2006, TSa: Let's actually try to update

2996          *   SymbolTable here (after main xml tree); caller

2997          *   may not continue parsing after this.

2998          */

2999         if (mSymbols.isDirty()) {

3000             mOwner.updateSymbolTable(mSymbols);

3001         }

3002         // May be able to recycle, but not certain; and definitely can not just

3003         // clean contents (may contain space(s) read)

3004         mTextBuffer.recycle(false);

3005         return mCurrToken;

3006     }

3007 

3008     /**

3009      * Method that takes care of parsing of start elements; including

3010      * full parsing of namespace declarations and attributes, as well as

3011      * namespace resolution.

3012      */

3013     private final void handleStartElem(char c)

3014         throws XMLStreamException

3015     {

3016         mTokenState = TOKEN_FULL_COALESCED;

3017         boolean empty;

3018 

3019         if (mCfgNsEnabled) {

3020             String str = parseLocalName(c);

3021             c = (mInputPtr < mInputEnd) ?

3022                 mInputBuffer[mInputPtr++] : getNextCharFromCurrent(SUFFIX_EOF_EXP_NAME);

3023             if (c == ':') { // Ok, got namespace and local name

3024                 c = (mInputPtr < mInputEnd) ?

3025                     mInputBuffer[mInputPtr++] : getNextCharFromCurrent(SUFFIX_EOF_EXP_NAME);

3026                 mElementStack.push(str, parseLocalName(c));

3027                 c = (mInputPtr < mInputEnd) ?

3028                     mInputBuffer[mInputPtr++] : getNextCharFromCurrent(SUFFIX_IN_ELEMENT);

3029             } else {

3030                 mElementStack.push(null, str);

3031                 // c is fine as

3032             }

3033             /* Enough about element name itself; let's then parse attributes

3034              * and namespace declarations. Split into another method for clarity,

3035              * and so that maybe JIT has easier time to optimize it separately.

3036              */

3037              /* 04-Jul-2005, TSa: But hold up: we can easily check for a fairly

3038               *   common case of no attributes showing up, and us getting the

3039               *   closing '>' right away. Let's do that, since it can save

3040               *   a call to a rather long method.

3041               */

3042             empty = (c == '>') ? false : handleNsAttrs(c);

3043         } else { // Namespace handling not enabled:

3044             mElementStack.push(null, parseFullName(c));

3045             c = (mInputPtr < mInputEnd) ?

3046                 mInputBuffer[mInputPtr++] : getNextCharFromCurrent(SUFFIX_IN_ELEMENT);

3047             empty = (c == '>') ? false : handleNonNsAttrs(c);

3048         }

3049         if (!empty) {

3050             ++mCurrDepth; // needed to match nesting with entity expansion

3051         }

3052         mStEmptyElem = empty;

3053 

3054         /* 27-Feb-2009, TSa: [WSTX-191]: We used to validate virtual

3055          *   end element here for empty elements, but it really should

3056          *   occur later on when actually returning that end element.

3057          */

3058         int vld = mElementStack.resolveAndValidateElement();

3059         mVldContent = vld;

3060         mValidateText = (vld == XMLValidator.CONTENT_ALLOW_VALIDATABLE_TEXT);

3061     }

3062 

3063     /**

3064      * @return True if this is an empty element; false if not

3065      */

3066     private final boolean handleNsAttrs(char c)

3067         throws XMLStreamException

3068     {

3069         AttributeCollector ac = mAttrCollector;

3070 

3071         while (true) {

3072             if (c <= CHAR_SPACE) {

3073                 c = getNextInCurrAfterWS(SUFFIX_IN_ELEMENT, c);

3074             } else if (c != '/' && c != '>') {

3075                 throwUnexpectedChar(c, " excepted space, or '>' or \"/>\"");

3076             }

3077 

3078             if (c == '/') {

3079                 c = getNextCharFromCurrent(SUFFIX_IN_ELEMENT);

3080                 if (c != '>') {

3081                     throwUnexpectedChar(c, " expected '>'");

3082                 }

3083                 return true;

3084             } else if (c == '>') {

3085                 return false;

3086             } else if (c == '<') {

3087                 throwParseError("Unexpected '<' character in element (missing closing '>'?)");

3088             }

3089 

3090             String prefix, localName;

3091             String str = parseLocalName(c);

3092             c = (mInputPtr < mInputEnd) ?

3093                 mInputBuffer[mInputPtr++] : getNextCharFromCurrent(SUFFIX_EOF_EXP_NAME);

3094             if (c == ':') { // Ok, got namespace and local name

3095                 prefix = str;

3096                 c = (mInputPtr < mInputEnd) ?

3097                     mInputBuffer[mInputPtr++] : getNextCharFromCurrent(SUFFIX_EOF_EXP_NAME);

3098                 localName = parseLocalName(c);

3099             } else {

3100                 --mInputPtr; // pushback

3101                 prefix = null;

3102                 localName = str;

3103             }

3104 

3105             c = (mInputPtr < mInputEnd) ?

3106                 mInputBuffer[mInputPtr++] : getNextCharFromCurrent(SUFFIX_IN_ELEMENT);

3107             if (c <= CHAR_SPACE) {

3108                 c = getNextInCurrAfterWS(SUFFIX_IN_ELEMENT, c);

3109             }

3110             if (c != '=') {

3111                 throwUnexpectedChar(c, " expected '='");

3112             }

3113             c = (mInputPtr < mInputEnd) ?

3114                 mInputBuffer[mInputPtr++] : getNextCharFromCurrent(SUFFIX_IN_ELEMENT);

3115             if (c <= CHAR_SPACE) {

3116                 c = getNextInCurrAfterWS(SUFFIX_IN_ELEMENT, c);

3117             }

3118 

3119             // And then a quote:

3120             if (c != '"' && c != '\'') {

3121                 throwUnexpectedChar(c, SUFFIX_IN_ELEMENT+" Expected a quote");

3122             }

3123 

3124             // And then the actual value

3125             int startLen = -1;

3126             TextBuilder tb;

3127 

3128             if (prefix == sPrefixXmlns) { // non-default namespace declaration

3129                 tb = ac.getNsBuilder(localName);

3130                 // returns null if it's a dupe:

3131                 if (null == tb) {

3132                     throwParseError("Duplicate declaration for namespace prefix '"+localName+"'.");

3133                 }

3134                 startLen = tb.getCharSize();

3135             } else if (localName == sPrefixXmlns && prefix == null) {

3136                 tb = ac.getDefaultNsBuilder();

3137                 // returns null if default ns was already declared

3138                 if (null == tb) {

3139                     throwParseError("Duplicate default namespace declaration.");

3140                 }

3141             } else {

3142                 tb = ac.getAttrBuilder(prefix, localName);

3143             }

3144             parseAttrValue(c, tb);

3145 

3146             /* 19-Jul-2004, TSa: Need to check that non-default namespace

3147              *     URI is NOT empty, as per XML namespace specs, #2,

3148              *    ("...In such declarations, the namespace name may not

3149              *      be empty.")

3150              */

3151             /* (note: startLen is only set to first char position for

3152              * non-default NS declarations, see above...)

3153              */

3154             /* 04-Feb-2005, TSa: Namespaces 1.1 does allow this, though,

3155              *   so for xml 1.1 documents we need to allow it

3156              */

3157             if (!mXml11) {

3158                 if (startLen >= 0 && tb.getCharSize() == startLen) { // is empty!

3159                     throwParseError(ErrorConsts.ERR_NS_EMPTY);

3160                 }

3161             }

3162 

3163             // and then we need to iterate some more

3164             c = (mInputPtr < mInputEnd) ?

3165                 mInputBuffer[mInputPtr++] : getNextCharFromCurrent(SUFFIX_IN_ELEMENT);

3166         }

3167         // never gets here

3168     }

3169 

3170     /**

3171      * @return True if this is an empty element; false if not

3172      */

3173     private final boolean handleNonNsAttrs(char c)

3174         throws XMLStreamException

3175     {

3176         AttributeCollector ac = mAttrCollector;

3177 

3178         while (true) {

3179             if (c <= CHAR_SPACE) {

3180                 c = getNextInCurrAfterWS(SUFFIX_IN_ELEMENT, c);

3181             } else if (c != '/' && c != '>') {

3182                 throwUnexpectedChar(c, " excepted space, or '>' or \"/>\"");

3183             }

3184             if (c == '/') {

3185                 c = getNextCharFromCurrent(SUFFIX_IN_ELEMENT);

3186                 if (c != '>') {

3187                     throwUnexpectedChar(c, " expected '>'");

3188                 }

3189                 return true;

3190             } else if (c == '>') {

3191                 return false;

3192             } else if (c == '<') {

3193                 throwParseError("Unexpected '<' character in element (missing closing '>'?)");

3194             }

3195 

3196             String name = parseFullName(c);

3197             TextBuilder tb = ac.getAttrBuilder(null, name);

3198             c = (mInputPtr < mInputEnd) ?

3199                 mInputBuffer[mInputPtr++] : getNextCharFromCurrent(SUFFIX_IN_ELEMENT);

3200             if (c <= CHAR_SPACE) {

3201                 c = getNextInCurrAfterWS(SUFFIX_IN_ELEMENT, c);

3202             }

3203             if (c != '=') {

3204                 throwUnexpectedChar(c, " expected '='");

3205             }

3206             c = (mInputPtr < mInputEnd) ?

3207                 mInputBuffer[mInputPtr++] : getNextCharFromCurrent(SUFFIX_IN_ELEMENT);

3208             if (c <= CHAR_SPACE) {

3209                 c = getNextInCurrAfterWS(SUFFIX_IN_ELEMENT, c);

3210             }

3211 

3212             // And then a quote:

3213             if (c != '"' && c != '\'') {

3214                 throwUnexpectedChar(c, SUFFIX_IN_ELEMENT+" Expected a quote");

3215             }

3216 

3217             // And then the actual value

3218             parseAttrValue(c, tb);

3219             // and then we need to iterate some more

3220             c = (mInputPtr < mInputEnd) ?

3221                 mInputBuffer[mInputPtr++] : getNextCharFromCurrent(SUFFIX_IN_ELEMENT);

3222         }

3223         // never gets here

3224     }

3225 

3226     /**

3227      * Method called to completely read a close tag, and update element

3228      * stack appropriately (including checking that tag matches etc).

3229      */

3230     protected final void readEndElem()

3231         throws XMLStreamException

3232     {

3233         mTokenState = TOKEN_FULL_COALESCED; // will be read completely

3234 

3235         if (mElementStack.isEmpty()) {

3236             // Let's just offline this for clarity

3237             reportExtraEndElem();

3238             return; // never gets here

3239         }

3240 

3241         char c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++]

3242             : getNextCharFromCurrent(SUFFIX_IN_CLOSE_ELEMENT);

3243         // Quick check first; missing name?

3244         if  (!isNameStartChar(c) && c != ':') {

3245             if (c <= CHAR_SPACE) { // space

3246                 throwUnexpectedChar(c, "; missing element name?");

3247             }

3248             throwUnexpectedChar(c, "; expected an element name.");

3249         }

3250 

3251         /* Ok, now; good thing is we know exactly what to compare

3252          * against...

3253          */

3254         String expPrefix = mElementStack.getPrefix();

3255         String expLocalName = mElementStack.getLocalName();

3256 

3257         // Prefix to match?

3258         if (expPrefix != null && expPrefix.length() > 0) {

3259             int len = expPrefix.length();

3260             int i = 0;

3261 

3262             while (true){

3263                 if (c != expPrefix.charAt(i)) {

3264                     reportWrongEndPrefix(expPrefix, expLocalName, i);

3265                     return; // never gets here

3266                 }

3267                 if (++i >= len) {

3268                     break;

3269                 }

3270                 c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++]

3271                     : getNextCharFromCurrent(SUFFIX_IN_CLOSE_ELEMENT);

3272             }

3273             // And then we should get a colon

3274             c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++]

3275                 : getNextCharFromCurrent(SUFFIX_IN_CLOSE_ELEMENT);

3276             if (c != ':') {

3277                 reportWrongEndPrefix(expPrefix, expLocalName, i);

3278                 return;

3279             }

3280             c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++]

3281                 : getNextCharFromCurrent(SUFFIX_IN_CLOSE_ELEMENT);

3282         }

3283 

3284         // Ok, then, does the local name match?

3285         int len = expLocalName.length();

3286         int i = 0;

3287         

3288         while (true){

3289             if (c != expLocalName.charAt(i)) {

3290                 // Not a match...

3291                 reportWrongEndElem(expPrefix, expLocalName, i);

3292                 return; // never gets here

3293             }

3294             if (++i >= len) {

3295                 break;

3296             }

3297             c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++]

3298                 : getNextCharFromCurrent(SUFFIX_IN_CLOSE_ELEMENT);

3299         }

3300 

3301         // Let's see if end element still continues, however?

3302         c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++]

3303             : getNextCharFromCurrent(SUFFIX_IN_CLOSE_ELEMENT);

3304         if (c <= CHAR_SPACE) {

3305             c = getNextInCurrAfterWS(SUFFIX_IN_CLOSE_ELEMENT, c);

3306         } else if (c == '>') {

3307             ;

3308         } else if (c == ':' || isNameChar(c)) {

3309             reportWrongEndElem(expPrefix, expLocalName, len);

3310         }

3311 

3312         // Ok, fine, match ok; now we just need the closing gt char.

3313         if (c != '>') {

3314             throwUnexpectedChar(c, SUFFIX_IN_CLOSE_ELEMENT+" Expected '>'.");

3315         }

3316 

3317         // Finally, let's let validator detect if things are ok

3318         int vld = mElementStack.validateEndElement();

3319         mVldContent = vld;

3320         mValidateText = (vld == XMLValidator.CONTENT_ALLOW_VALIDATABLE_TEXT);

3321 

3322         // Plus verify WFC that start and end tags came from same entity

3323         /* 13-Feb-2006, TSa: Are we about to close an element that

3324          *    started within a parent element?

3325          *    That's a GE/element nesting WFC violation...

3326          */

3327         if (mCurrDepth == mInputTopDepth) {

3328             handleGreedyEntityProblem(mInput);

3329         }

3330         --mCurrDepth;

3331     }

3332 

3333     private void reportExtraEndElem()

3334         throws XMLStreamException

3335     {

3336         String name = parseFNameForError();

3337         throwParseError("Unbalanced close tag </"+name+">; no open start tag.");

3338     }

3339 

3340     private void reportWrongEndPrefix(String prefix, String localName, int done)

3341         throws XMLStreamException

3342     {

3343         --mInputPtr; // pushback

3344         String fullName = prefix + ":" + localName;

3345         String rest = parseFNameForError();

3346         String actName = fullName.substring(0, done) + rest;

3347         throwParseError("Unexpected close tag </"+actName+">; expected </"

3348                         +fullName+">.");

3349     }

3350 

3351     private void reportWrongEndElem(String prefix, String localName, int done)

3352         throws XMLStreamException

3353     {

3354         --mInputPtr; // pushback

3355         String fullName;

3356         if (prefix != null && prefix.length() > 0) {

3357             fullName = prefix + ":" + localName;

3358             done += 1 + prefix.length();

3359         } else {

3360             fullName = localName;

3361         }

3362         String rest = parseFNameForError();

3363         String actName = fullName.substring(0, done) + rest;

3364         throwParseError("Unexpected close tag </"+actName+">; expected </"

3365                         +fullName+">.");

3366     }

3367 

3368     /**

3369      *<p>

3370      * Note: According to StAX 1.0, coalesced text events are always to be

3371      * returned as CHARACTERS, never as CDATA. And since at this point we

3372      * don't really know if there's anything to coalesce (but there may

3373      * be), let's convert CDATA if necessary.

3374      */

3375     private int nextFromTreeCommentOrCData()

3376         throws XMLStreamException

3377     {

3378         char c = getNextCharFromCurrent(SUFFIX_IN_DOC);

3379         if (c == '[') {

3380             checkCData();

3381             /* Good enough; it is a CDATA section... but let's just also

3382              * parse the easy ("free") stuff:

3383              */

3384             c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++]

3385                 : getNextCharFromCurrent(SUFFIX_IN_CDATA);

3386             readCDataPrimary(c); // sets token state appropriately...

3387             return CDATA;

3388         }

3389         if (c == '-' && getNextCharFromCurrent(SUFFIX_IN_DOC) == '-') {

3390             mTokenState = TOKEN_STARTED;

3391             return COMMENT;

3392         }

3393         throwParseError("Unrecognized XML directive; expected CDATA or comment ('<![CDATA[' or '<!--').");

3394         return 0; // never gets here, but compilers don't know it...

3395     }

3396 

3397     /*

3398     ///////////////////////////////////////////////////////////////////////

3399     // Internal methods, skipping

3400     ///////////////////////////////////////////////////////////////////////

3401      */

3402 

3403     /**

3404      * Method called to skip last part of current token, when full token

3405      * has not been parsed. Generally happens when caller is not interested

3406      * in current token and just calls next() to iterate to next token.

3407      *<p>

3408      * Note: this method is to accurately update the location information

3409      * to reflect where the next event will start (or, in case of EOF, where

3410      * EOF was encountered, ie. where event would start, if there was one).

3411      *

3412      * @return Next character after node has been skipped, or -1 if EOF

3413      *    follows

3414      */

3415     private int skipToken()

3416         throws XMLStreamException

3417     {

3418         int result;

3419 

3420         main_switch:

3421         switch (mCurrToken) {

3422         case CDATA:

3423             {

3424                 /* 30-Aug-2004, TSa: Need to be careful here: we may

3425                  *    actually have finished with CDATA, but are just

3426                  *    coalescing... if so, need to skip first part of

3427                  *    skipping

3428                  */

3429                 if (mTokenState <= TOKEN_PARTIAL_SINGLE) {

3430                     // Skipping CDATA is easy; just need to spot closing ]]&gt;

3431                     skipCommentOrCData(SUFFIX_IN_CDATA, ']', false);

3432                 }

3433                 result = getNext();

3434                 // ... except if coalescing, may need to skip more:

3435                 if (mCfgCoalesceText) {

3436                     result = skipCoalescedText(result);

3437                 }

3438             }

3439             break;

3440                 

3441         case COMMENT:

3442             skipCommentOrCData(SUFFIX_IN_COMMENT, '-', true);

3443             result = 0;

3444             break;

3445 

3446         case CHARACTERS:

3447             {

3448                 result = skipTokenText(getNext());

3449                 // ... except if coalescing, need to skip more:

3450                 if (mCfgCoalesceText) {

3451                     result = skipCoalescedText(result);

3452                 }

3453             }

3454             break;

3455 

3456         case DTD:

3457             finishDTD(false);

3458             result = 0;

3459             break;

3460 

3461         case PROCESSING_INSTRUCTION:

3462             while (true) {

3463                 char c = (mInputPtr < mInputEnd)

3464                     ? mInputBuffer[mInputPtr++] : getNextCharFromCurrent(SUFFIX_IN_PROC_INSTR);

3465                 if (c == '?') {

3466                     do {

3467                         c = (mInputPtr < mInputEnd)

3468                             ? mInputBuffer[mInputPtr++] : getNextCharFromCurrent(SUFFIX_IN_PROC_INSTR);

3469                     } while (c == '?');

3470                     if (c == '>') {

3471                         result = 0;

3472                         break main_switch;

3473                     }

3474                 }

3475                 if (c < CHAR_SPACE) {

3476                     if (c == '\n' || c == '\r') {

3477                         skipCRLF(c);

3478                     } else if (c != '\t') {

3479                         throwInvalidSpace(c);

3480                     }

3481                 }

3482             }

3483             // never gets in here

3484 

3485         case SPACE:

3486 

3487             while (true) {

3488                 // Fairly easy to skip through white space...

3489                 while (mInputPtr < mInputEnd) {

3490                     char c = mInputBuffer[mInputPtr++];

3491                     if (c > CHAR_SPACE) { // non-EOF non-WS?

3492                         result = c;

3493                         break main_switch;

3494                     }

3495                     if (c == '\n' || c == '\r') {

3496                         skipCRLF(c);

3497                     } else if (c != CHAR_SPACE && c != '\t') {

3498                         throwInvalidSpace(c);

3499                     }

3500                 }

3501                 if (!loadMore()) {

3502                     result = -1;

3503                     break main_switch;

3504                 }

3505             }

3506             // never gets in here

3507 

3508         case ENTITY_REFERENCE: // these should never end up in here...

3509         case ENTITY_DECLARATION:

3510         case NOTATION_DECLARATION:

3511         case START_DOCUMENT:

3512         case END_DOCUMENT:

3513             // As are start/end document

3514             throw new IllegalStateException("skipToken() called when current token is "+tokenTypeDesc(mCurrToken));

3515 

3516         case ATTRIBUTE:

3517         case NAMESPACE:

3518             // These two are never returned by this class

3519         case START_ELEMENT:

3520         case END_ELEMENT:

3521             /* Never called for elements tokens; start token handled

3522              * differently, end token always completely read in the first place

3523              */

3524 

3525         default:

3526             throw new IllegalStateException("Internal error: unexpected token "+tokenTypeDesc(mCurrToken));

3527 

3528         }

3529 

3530         /* Ok; now we have 3 possibilities; result is:

3531          *

3532          * + 0 -> could reliably read the prev event, now need the

3533          *   following char/EOF

3534          * + -1 -> hit EOF; can return it

3535          * + something else -> this is the next char, return it.

3536          *

3537          * In first 2 cases, next event start offset is the current location;

3538          * in third case, it needs to be backtracked by one char

3539          */

3540         if (result < 1) {

3541             mTokenInputRow = mCurrInputRow;

3542             mTokenInputTotal = mCurrInputProcessed + mInputPtr;

3543             mTokenInputCol = mInputPtr - mCurrInputRowStart;

3544             return (result < 0) ? result : getNext();

3545         }

3546 

3547         // Ok, need to offset location, and return whatever we got:

3548         mTokenInputRow = mCurrInputRow;

3549         mTokenInputTotal = mCurrInputProcessed + mInputPtr - 1;

3550         mTokenInputCol = mInputPtr - mCurrInputRowStart - 1;

3551         return result;

3552     }

3553 

3554     private void skipCommentOrCData(String errorMsg, char endChar, boolean preventDoubles)

3555         throws XMLStreamException

3556     {

3557         /* Let's skip all chars except for double-ending chars in

3558          * question (hyphen for comments, right brack for cdata)

3559          */

3560         int count = 0;

3561         while (true) {

3562             char c;

3563             while (true) {

3564                 if (mInputPtr >= mInputEnd) {

3565                     verifyLimit("Text size", mConfig.getMaxTextLength(), count);

3566                     c = getNextCharFromCurrent(errorMsg);

3567                 } else {

3568                     c =  mInputBuffer[mInputPtr++];

3569                 }

3570                 if (c < CHAR_SPACE) {

3571                     if (c == '\n' || c == '\r') {

3572                         skipCRLF(c);

3573                     } else if (c != '\t') {

3574                         throwInvalidSpace(c);

3575                     }

3576                 } else if (c == endChar) {

3577                     break;

3578                 }

3579                 ++count;

3580             }

3581 

3582             // Now, we may be getting end mark; first need second marker char:.

3583             c = getNextChar(errorMsg);

3584             if (c == endChar) { // Probably?

3585                 // Now; we should be getting a '>', most likely.

3586                 c = getNextChar(errorMsg);

3587                 if (c == '>') {

3588                     break;

3589                 }

3590                 if (preventDoubles) { // if not, it may be a problem...

3591                     throwParseError("String '--' not allowed in comment (missing '>'?)");

3592                 }

3593                 // Otherwise, let's loop to see if there is end

3594                 while (c == endChar) {

3595                     c = (mInputPtr < mInputEnd)

3596                         ? mInputBuffer[mInputPtr++] : getNextCharFromCurrent(errorMsg);

3597                 }

3598                 if (c == '>') {

3599                     break;

3600                 }

3601             }

3602 

3603             // No match, did we get a linefeed?

3604             if (c < CHAR_SPACE) {

3605                 if (c == '\n' || c == '\r') {

3606                     skipCRLF(c);

3607                 } else if (c != '\t') {

3608                     throwInvalidSpace(c);

3609                 }

3610             }

3611             // Let's continue from beginning, then

3612         }

3613     }

3614 

3615     /**

3616      * Method called to skip past all following text and CDATA segments,

3617      * until encountering something else (including a general entity,

3618      * which may in turn expand to text).

3619      *

3620      * @return Character following all the skipped text and CDATA segments,

3621      *   if any; or -1 to denote EOF

3622      */

3623     private int skipCoalescedText(int i)

3624         throws XMLStreamException

3625     {

3626         while (true) {

3627             // Ok, plain text or markup?

3628             if (i == '<') { // markup, maybe CDATA?

3629                 // Need to distinguish "<![" from other tags/directives

3630                 if (!ensureInput(3)) {

3631                     /* Most likely an error condition, but let's leave

3632                      * it up for other parts of code to complain.

3633                      */

3634                     return i;

3635                 }

3636                 if (mInputBuffer[mInputPtr] != '!'

3637                     || mInputBuffer[mInputPtr+1] != '[') {

3638                     // Nah, some other tag or directive

3639                     return i;

3640                 }

3641                 // Let's skip beginning parts, then:

3642                 mInputPtr += 2;

3643                 // And verify we get proper CDATA directive

3644                 checkCData();

3645                 skipCommentOrCData(SUFFIX_IN_CDATA, ']', false);

3646                 i = getNext();

3647             } else if (i < 0) { // eof

3648                 return i;

3649             } else { // nah, normal text, gotta skip

3650                 i = skipTokenText(i);

3651                 /* Did we hit an unexpandable entity? If so, need to

3652                  * return ampersand to the caller...

3653                  * (and same for EOF too)

3654                  */

3655                 if (i == '&' || i < 0) {

3656                     return i;

3657                 }

3658             }

3659         }

3660     }

3661 

3662     private int skipTokenText(int i)

3663         throws XMLStreamException

3664     {

3665         /* Fairly easy; except for potential to have entities

3666          * expand to some crap?

3667          */

3668         int count = 0;

3669         

3670         main_loop:

3671         while (true) {

3672             if (i == '<') {

3673                 return i;

3674             }

3675             if (i == '&') {

3676                 // Can entities be resolved automatically?

3677                 if (mCfgReplaceEntities) {

3678                     // Let's first try quick resolution:

3679                     if ((mInputEnd - mInputPtr) >= 3

3680                         && resolveSimpleEntity(true) != 0) {

3681                         ;

3682                     } else {

3683                         i = fullyResolveEntity(true);

3684                         /* Either way, it's just fine; we don't care about

3685                          * returned single-char value.

3686                          */

3687                     }

3688                 } else {

3689                     /* Can only skip character entities; others need to

3690                      * be returned separately.

3691                      */

3692                     if (resolveCharOnlyEntity(true) == 0) {

3693                         /* Now points to the char after ampersand, and we need

3694                          * to return the ampersand itself

3695                          */

3696                         return i;

3697                     }

3698                 }

3699             } else if (i < CHAR_SPACE) {

3700                 if (i == '\r' || i == '\n') {

3701                     skipCRLF((char) i);

3702                 } else if (i < 0) { // EOF

3703                     return i;

3704                 } else if (i != '\t') {

3705                     throwInvalidSpace(i);

3706                 }

3707 

3708             }

3709             ++count;

3710             verifyLimit("Text size", mConfig.getMaxTextLength(), count);

3711 

3712             // Hmmh... let's do quick looping here:

3713             while (mInputPtr < mInputEnd) {

3714                 char c = mInputBuffer[mInputPtr++];

3715                 if (c < CHAR_FIRST_PURE_TEXT) { // need to check it

3716                     i = c;

3717                     continue main_loop;

3718                 }

3719             }

3720 

3721             i = getNext();

3722         }

3723         // never gets here...

3724     }

3725 

3726     /*

3727     ///////////////////////////////////////////////////////////////////////

3728     // Internal methods, parsing

3729     ///////////////////////////////////////////////////////////////////////

3730      */

3731 

3732     protected void ensureFinishToken() throws XMLStreamException

3733     {

3734         if (mTokenState < mStTextThreshold) {

3735             finishToken(false);

3736         }

3737     }

3738 

3739     protected void safeEnsureFinishToken()

3740     {

3741         if (mTokenState < mStTextThreshold) {

3742             safeFinishToken();

3743         }

3744     }

3745 

3746     protected void safeFinishToken()

3747     {

3748         try {

3749             /* 24-Sep-2006, TSa: Let's try to reduce number of unchecked

3750              *   (wrapped) exceptions we throw, and defer some. For now,

3751              *   this is only for CHARACTERS (since it's always legal to

3752              *   split CHARACTERS segment); could be expanded in future.

3753              */

3754             boolean deferErrors = (mCurrToken == CHARACTERS);

3755             finishToken(deferErrors);

3756         } catch (XMLStreamException strex) {

3757             throwLazyError(strex);

3758         }

3759     }

3760 

3761     /**

3762      * Method called to read in contents of the token completely, if not

3763      * yet read. Generally called when caller needs to access anything

3764      * other than basic token type (except for elements), text contents

3765      * or such.

3766      *

3767      * @param deferErrors Flag to enable storing an exception to a 

3768      *   variable, instead of immediately throwing it. If true, will

3769      *   just store the exception; if false, will not store, just throw.

3770      */

3771     protected void finishToken(boolean deferErrors)

3772         throws XMLStreamException

3773     {

3774         switch (mCurrToken) {

3775         case CDATA:

3776             if (mCfgCoalesceText) {

3777                 readCoalescedText(mCurrToken, deferErrors);

3778             } else {

3779                 if (readCDataSecondary(Integer.MAX_VALUE)) {

3780                     mTokenState = TOKEN_FULL_SINGLE;

3781                 } else { // can this ever happen?

3782                     mTokenState = TOKEN_PARTIAL_SINGLE;

3783                 }

3784             }

3785             return;

3786 

3787         case CHARACTERS:

3788             if (mCfgCoalesceText) {

3789                 /* 21-Sep-2005, TSa: It is often possible to optimize

3790                  *   here: if we get '<' NOT followed by '!', it can not

3791                  *   be CDATA, and thus we are done.

3792                  */

3793                 if (mTokenState == TOKEN_FULL_SINGLE

3794                     && (mInputPtr + 1) < mInputEnd

3795                     && mInputBuffer[mInputPtr+1] != '!') {

3796                     mTokenState = TOKEN_FULL_COALESCED;

3797                     return;

3798                 }

3799                 readCoalescedText(mCurrToken, deferErrors);

3800             } else {

3801                 if (readTextSecondary(mShortestTextSegment, deferErrors)) {

3802                     mTokenState = TOKEN_FULL_SINGLE;

3803                 } else {

3804                     mTokenState = TOKEN_PARTIAL_SINGLE;

3805                 }

3806             }

3807             return;

3808 

3809         case SPACE:

3810             {

3811                 /* Only need to ensure there's no non-whitespace text

3812                  * when parsing 'real' ignorable white space (in validating

3813                  * mode, but that's implicit here)

3814                  */

3815                 boolean prolog = (mParseState != STATE_TREE);

3816                 readSpaceSecondary(prolog);

3817                 mTokenState = TOKEN_FULL_COALESCED;

3818             }

3819             return;

3820 

3821         case COMMENT:

3822             readComment();

3823             mTokenState = TOKEN_FULL_COALESCED;

3824             return;

3825 

3826         case DTD:

3827 

3828             /* 05-Jan-2006, TSa: Although we shouldn't have to use finally

3829              *   here, it's probably better to do that for robustness

3830              *   (specifically, in case of a parsing problem, we don't want

3831              *   to remain in 'DTD partially read' case -- it's better

3832              *   to get in panic mode and skip the rest)

3833              */

3834             try {

3835                 finishDTD(true);

3836             } finally {

3837                 mTokenState = TOKEN_FULL_COALESCED;

3838             }

3839             return;

3840 

3841         case PROCESSING_INSTRUCTION:

3842             readPI();

3843             mTokenState = TOKEN_FULL_COALESCED;

3844             return;

3845 

3846         case START_ELEMENT:

3847         case END_ELEMENT: // these 2 should never end up in here...

3848         case ENTITY_REFERENCE:

3849         case ENTITY_DECLARATION:

3850         case NOTATION_DECLARATION:

3851         case START_DOCUMENT:

3852         case END_DOCUMENT:

3853             throw new IllegalStateException("finishToken() called when current token is "+tokenTypeDesc(mCurrToken));

3854 

3855         case ATTRIBUTE:

3856         case NAMESPACE:

3857             // These two are never returned by this class

3858         default:

3859         }

3860 

3861         throw new IllegalStateException("Internal error: unexpected token "+tokenTypeDesc(mCurrToken));

3862     }

3863 

3864     private void readComment()

3865         throws XMLStreamException

3866     {

3867         char[] inputBuf = mInputBuffer;

3868         int inputLen = mInputEnd;

3869         int ptr = mInputPtr;

3870         int start = ptr;

3871 

3872         // Let's first see if we can just share input buffer:

3873         while (ptr < inputLen) {

3874             char c = inputBuf[ptr++];

3875             if (c > '-') {

3876                 continue;

3877             }

3878 

3879             if (c < CHAR_SPACE) {

3880                 if (c == '\n') {

3881                     markLF(ptr);

3882                 } else if (c == '\r') {

3883                     if (!mNormalizeLFs && ptr < inputLen) {

3884                         if (inputBuf[ptr] == '\n') {

3885                             ++ptr;

3886                         }

3887                         markLF(ptr);

3888                     } else {

3889                         --ptr; // pushback

3890                         break;

3891                     }

3892                 } else if (c != '\t') {

3893                     throwInvalidSpace(c);

3894                 }

3895             } else if (c == '-') {

3896                 // Ok; need to get '->', can not get '--'

3897                 

3898                 if ((ptr + 1) >= inputLen) {

3899                     // Can't check next 2, let's push '-' back, for rest of

3900                     // code to take care of

3901                     --ptr;

3902                     break;

3903                 }

3904                 

3905                 if (inputBuf[ptr] != '-') {

3906                     // Can't skip, might be LF/CR

3907                     continue;

3908                 }

3909                 // Ok; either get '>' or error:

3910                 c = inputBuf[ptr+1];

3911                 if (c != '>') {

3912                     throwParseError("String '--' not allowed in comment (missing '>'?)");

3913                 }

3914                 mTextBuffer.resetWithShared(inputBuf, start, ptr-start-1);

3915                 mInputPtr = ptr + 2;

3916                 return;

3917             }

3918         }

3919         mInputPtr = ptr;

3920         mTextBuffer.resetWithCopy(inputBuf, start, ptr-start);

3921         readComment2(mTextBuffer);

3922     }

3923 

3924     private void readComment2(TextBuffer tb)

3925         throws XMLStreamException

3926     {

3927         /* Output pointers; calls will also ensure that the buffer is

3928          * not shared, AND has room for at least one more char

3929          */

3930         char[] outBuf = tb.getCurrentSegment();

3931         int outPtr = tb.getCurrentSegmentSize();

3932         int outLen = outBuf.length;

3933 

3934         while (true) {

3935             char c = (mInputPtr < mInputEnd) ?

3936                 mInputBuffer[mInputPtr++] : getNextCharFromCurrent(SUFFIX_IN_COMMENT);

3937 

3938             if (c < CHAR_SPACE) {

3939                 if (c == '\n') {

3940                     markLF();

3941                 } else if (c == '\r') {

3942                     if (skipCRLF(c)) { // got 2 char LF

3943                         if (!mNormalizeLFs) {

3944                             if (outPtr >= outLen) { // need more room?

3945                                 outBuf = mTextBuffer.finishCurrentSegment();

3946                                 outLen = outBuf.length;

3947                                 outPtr = 0;

3948                             }

3949                             outBuf[outPtr++] = c;

3950                         }

3951                         // And let's let default output the 2nd char

3952                         c = '\n';

3953                     } else if (mNormalizeLFs) { // just \r, but need to convert

3954                         c = '\n'; // For Mac text

3955                     }

3956                 } else if (c != '\t') {

3957                     throwInvalidSpace(c);

3958                 }

3959             } else if (c == '-') { // Ok; need to get '->', can not get '--'

3960                 c = getNextCharFromCurrent(SUFFIX_IN_COMMENT);

3961                 if (c == '-') { // Ok, has to be end marker then:

3962                     // Either get '>' or error:

3963                     c = getNextCharFromCurrent(SUFFIX_IN_COMMENT);

3964                     if (c != '>') {

3965                         throwParseError(ErrorConsts.ERR_HYPHENS_IN_COMMENT);

3966                     }

3967                     break;

3968                 }

3969 

3970                 /* Not the end marker; let's just output the first hyphen,

3971                  * push the second char back , and let main

3972                  * code handle it.

3973                  */

3974                 c = '-';

3975                 --mInputPtr;

3976             }

3977 

3978             // Need more room?

3979             if (outPtr >= outLen) {

3980                 outBuf = mTextBuffer.finishCurrentSegment();

3981                 outLen = outBuf.length;

3982                 outPtr = 0;

3983                 verifyLimit("Text size", mConfig.getMaxTextLength(), mTextBuffer.size());

3984             }

3985             // Ok, let's add char to output:

3986             outBuf[outPtr++] = c;

3987         }

3988 

3989         // Ok, all done, then!

3990         mTextBuffer.setCurrentLength(outPtr);

3991     }

3992 

3993     /**

3994      * Method that reads the primary part of a PI, ie. target, and also

3995      * skips white space between target and data (if any data)

3996      *

3997      * @return Usually <code>PROCESSING_INSTRUCTION</code>; but may be

3998      *    different in multi-doc mode, if we actually hit a secondary

3999      *    xml declaration.

4000      */

4001     private final int readPIPrimary()

4002         throws XMLStreamException

4003     {

4004         // Ok, first we need the name:

4005         String target = parseFullName();

4006         mCurrName = target;

4007 

4008         if (target.length() == 0) {

4009             throwParseError(ErrorConsts.ERR_WF_PI_MISSING_TARGET);

4010         }

4011 

4012         // As per XML specs, #17, case-insensitive 'xml' is illegal:

4013         if (target.equalsIgnoreCase("xml")) {

4014             // 07-Oct-2005, TSa: Still legal in multi-doc mode...

4015             if (!mConfig.inputParsingModeDocuments()) {

4016                 throwParseError(ErrorConsts.ERR_WF_PI_XML_TARGET, target, null);

4017             }

4018             // Ok, let's just verify we get space then

4019             char c = getNextCharFromCurrent(SUFFIX_IN_XML_DECL);

4020             if (!isSpaceChar(c)) {

4021                 throwUnexpectedChar(c, "excepted a space in xml declaration after 'xml'");

4022             }

4023             return handleMultiDocStart(START_DOCUMENT);

4024         }

4025 

4026         // And then either white space before data, or end marker:

4027         char c = (mInputPtr < mInputEnd) ?

4028             mInputBuffer[mInputPtr++] : getNextCharFromCurrent(SUFFIX_IN_PROC_INSTR);

4029         if (isSpaceChar(c)) { // Ok, space to skip

4030             mTokenState = TOKEN_STARTED;

4031             // Need to skip the WS...

4032             skipWS(c);

4033         } else { // Nope; apparently finishes right away...

4034             mTokenState = TOKEN_FULL_COALESCED;

4035             mTextBuffer.resetWithEmpty();

4036             // or does it?

4037             if (c != '?' || getNextCharFromCurrent(SUFFIX_IN_PROC_INSTR) != '>') {

4038                 throwUnexpectedChar(c, ErrorConsts.ERR_WF_PI_XML_MISSING_SPACE);

4039             }

4040         }

4041 

4042         return PROCESSING_INSTRUCTION;

4043     }

4044 

4045     /**

4046      * Method that parses a processing instruction's data portion; at this

4047      * point target has been parsed.

4048      */

4049     private void readPI()

4050         throws XMLStreamException

4051     {

4052         int ptr = mInputPtr;

4053         int start = ptr;

4054         char[] inputBuf = mInputBuffer;

4055         int inputLen = mInputEnd;

4056 

4057         outer_loop:

4058         while (ptr < inputLen) {

4059             char c = inputBuf[ptr++];

4060             if (c < CHAR_SPACE) {

4061                 if (c == '\n') {

4062                     markLF(ptr);

4063                 } else if (c == '\r') {

4064                     if (ptr < inputLen && !mNormalizeLFs) {

4065                         if (inputBuf[ptr] == '\n') {

4066                             ++ptr;

4067                         }

4068                         markLF(ptr);

4069                     } else {

4070                         --ptr; // pushback

4071                         break;

4072                     }

4073                 } else if (c != '\t') {

4074                     throwInvalidSpace(c);

4075                 }

4076             } else if (c == '?') {

4077                 // K; now just need '>' after zero or more '?'s

4078                 while (true) {

4079                     if (ptr >= inputLen) {

4080                         /* end of buffer; need to push back at least one of

4081                          * question marks (not all, since just one is needed

4082                          * to close the PI)

4083                          */

4084                         --ptr;

4085                         break outer_loop;

4086                     }

4087                     c = inputBuf[ptr++];

4088                     if (c == '>') {

4089                         mInputPtr = ptr;

4090                         // Need to discard trailing '?>'

4091                         mTextBuffer.resetWithShared(inputBuf, start, ptr-start-2);

4092                         return;

4093                     }

4094                     if (c != '?') {

4095                         // Not end, can continue, but need to push back last char, in case it's LF/CR

4096                         --ptr;

4097                         break;

4098                     }

4099                 }

4100             }

4101         }

4102         

4103         mInputPtr = ptr;

4104         // No point in trying to share... let's just append

4105         mTextBuffer.resetWithCopy(inputBuf, start, ptr-start);

4106         readPI2(mTextBuffer);

4107     }

4108 

4109     private void readPI2(TextBuffer tb)

4110         throws XMLStreamException

4111     {

4112         char[] inputBuf = mInputBuffer;

4113         int inputLen = mInputEnd;

4114         int inputPtr = mInputPtr;

4115 

4116         /* Output pointers; calls will also ensure that the buffer is

4117          * not shared, AND has room for one more char

4118          */

4119         char[] outBuf = tb.getCurrentSegment();

4120         int outPtr = tb.getCurrentSegmentSize();

4121 

4122         main_loop:

4123         while (true) {

4124             // Let's first ensure we have some data in there...

4125             if (inputPtr >= inputLen) {

4126                 loadMoreFromCurrent(SUFFIX_IN_PROC_INSTR);

4127                 inputBuf = mInputBuffer;

4128                 inputPtr = mInputPtr;

4129                 inputLen = mInputEnd;

4130             }

4131 

4132             // And then do chunks

4133             char c = inputBuf[inputPtr++];

4134             if (c < CHAR_SPACE) {

4135                 if (c == '\n') {

4136                     markLF(inputPtr);

4137                 } else if (c == '\r') {

4138                     mInputPtr = inputPtr;

4139                     if (skipCRLF(c)) { // got 2 char LF

4140                         if (!mNormalizeLFs) {

4141                             // Special handling, to output 2 chars at a time:

4142                             if (outPtr >= outBuf.length) { // need more room?

4143                                 outBuf = mTextBuffer.finishCurrentSegment();

4144                                 outPtr = 0;

4145                             }

4146                             outBuf[outPtr++] = c;

4147                         }

4148                         // And let's let default output the 2nd char, either way

4149                         c = '\n';

4150                     } else if (mNormalizeLFs) { // just \r, but need to convert

4151                         c = '\n'; // For Mac text

4152                     }

4153                     /* Since skipCRLF() needs to peek(), buffer may have

4154                      * changed, even if there was no CR+LF.

4155                      */

4156                     inputPtr = mInputPtr;

4157                     inputBuf = mInputBuffer;

4158                     inputLen = mInputEnd;

4159                 } else if (c != '\t') {

4160                     throwInvalidSpace(c);

4161                 }

4162             } else if (c == '?') { // Ok, just need '>' after zero or more '?'s

4163                 mInputPtr = inputPtr; // to allow us to call getNextChar

4164 

4165                 qmLoop:

4166                 while (true) {

4167                     c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++]

4168                         : getNextCharFromCurrent(SUFFIX_IN_PROC_INSTR);

4169                     if (c == '>') { // got it!

4170                         break main_loop;

4171                     } else if (c == '?') {

4172                         if (outPtr >= outBuf.length) { // need more room?

4173                             outBuf = tb.finishCurrentSegment();

4174                             outPtr = 0;

4175                         }

4176                         outBuf[outPtr++] = c;

4177                     } else {

4178                         /* Hmmh. Wasn't end mark after all. Thus, need to

4179                          * fall back to normal processing, with one more

4180                          * question mark (first one matched that wasn't

4181                          * yet output),

4182                          * reset variables, and go back to main loop.

4183                          */

4184                         inputPtr = --mInputPtr; // push back last char

4185                         inputBuf = mInputBuffer;

4186                         inputLen = mInputEnd;

4187                         c = '?';

4188                         break qmLoop;

4189                     }

4190                 }

4191             } // if (c == '?)

4192 

4193             // Need more room?

4194             if (outPtr >= outBuf.length) {

4195                 outBuf = tb.finishCurrentSegment();

4196                 outPtr = 0;

4197             }

4198             // Ok, let's add char to output:

4199             outBuf[outPtr++] = c;

4200 

4201         } // while (true)

4202 

4203         tb.setCurrentLength(outPtr);

4204     }

4205 

4206     /**

4207      * Method called to read the content of both current CDATA/CHARACTERS

4208      * events, and all following consequtive events into the text buffer.

4209      * At this point the current type is known, prefix (for CDATA) skipped,

4210      * and initial consequtive contents (if any) read in.

4211      *

4212      * @param deferErrors Flag to enable storing an exception to a 

4213      *   variable, instead of immediately throwing it. If true, will

4214      *   just store the exception; if false, will not store, just throw.

4215      */

4216     protected void readCoalescedText(int currType, boolean deferErrors)

4217         throws XMLStreamException

4218     {

4219         boolean wasCData;

4220 

4221         // Ok; so we may need to combine adjacent text/CDATA chunks.

4222         if (currType == CHARACTERS || currType == SPACE) {

4223             readTextSecondary(Integer.MAX_VALUE, deferErrors);

4224             wasCData = false;

4225         } else if (currType == CDATA) {

4226             /* We may have actually really finished it, but just left

4227              * the 'unfinished' flag due to need to coalesce...

4228              */

4229             if (mTokenState <= TOKEN_PARTIAL_SINGLE) {

4230                 readCDataSecondary(Integer.MAX_VALUE);

4231             }

4232             wasCData = true;

4233         } else {

4234             throw new IllegalStateException("Internal error: unexpected token "+tokenTypeDesc(mCurrToken)+"; expected CHARACTERS, CDATA or SPACE.");

4235         }

4236 

4237         // But how about additional text?

4238         while (!deferErrors || (mPendingException == null)) {

4239             if (mInputPtr >= mInputEnd) {

4240                 mTextBuffer.ensureNotShared();

4241                 if (!loadMore()) {

4242                     // ??? Likely an error but let's just break

4243                     break;

4244                 }

4245             }

4246             // Let's peek, ie. not advance it yet

4247             char c = mInputBuffer[mInputPtr];

4248             if (c == '<') { // CDATA, maybe?

4249                 // Need to distinguish "<![" from other tags/directives

4250                 // 26-Feb-2014, tatu: Wrt [WSTX-294], need to unshare buffer

4251                 //   unless whole leading CDATA marker fits in buffer

4252                 if ((mInputEnd - mInputPtr) < 9) { // 3 for "<![" and 6 more for "CDATA["

4253                     mTextBuffer.ensureNotShared();

4254                     if (!ensureInput(3)) {

4255                         break;

4256                     }

4257                 }

4258                 if (mInputBuffer[mInputPtr+1] != '!'

4259                     || mInputBuffer[mInputPtr+2] != '[') {

4260                     // Nah, some other tag or directive

4261                     break;

4262                 }

4263                 // Let's skip beginning parts, then:

4264                 mInputPtr += 3;

4265                 // And verify we get proper CDATA directive

4266                 checkCData();

4267                 /* No need to call the primary data; it's only useful if

4268                  * there's a chance for sharing buffers... so let's call

4269                  * the secondary loop straight on.

4270                  */

4271                 readCDataSecondary(Integer.MAX_VALUE);

4272                 wasCData = true;

4273             } else { // text

4274                 /* Did we hit an 'unexpandable' entity? If so, need to

4275                  * just bail out.

4276                  */

4277                 if (c == '&' && !wasCData) {

4278                     break;

4279                 }

4280                 // Likewise, can't share buffers, let's call secondary loop:

4281                 readTextSecondary(Integer.MAX_VALUE, deferErrors);

4282                 wasCData = false;

4283             }

4284         }

4285 

4286         mTokenState = TOKEN_FULL_COALESCED;

4287     }

4288 

4289     /**

4290      * Method called to read in consecutive beginning parts of a CDATA

4291      * segment, up to either end of the segment (]] and >) or until

4292      * first 'hole' in text (buffer end, 2-char lf to convert, entity).

4293      *<p>

4294      * When the method is called, it's expected that the first character

4295      * has been read as is in the current input buffer just before current

4296      * pointer

4297      *

4298      * @param c First character in the CDATA segment (possibly part of end

4299      *   marker for empty segments

4300      *

4301      * @return True if the whole CDATA segment was completely read; this

4302      *   happens only if lt-char is hit; false if it's possible that

4303      *   it wasn't read (ie. end-of-buffer or entity encountered).

4304      */

4305     private final boolean readCDataPrimary(char c)

4306         throws XMLStreamException

4307     {

4308         mWsStatus = (c <= CHAR_SPACE) ? ALL_WS_UNKNOWN : ALL_WS_NO;

4309 

4310         int ptr = mInputPtr;

4311         int inputLen = mInputEnd;

4312         char[] inputBuf = mInputBuffer;

4313         int start = ptr-1;

4314 

4315         while (true) {

4316             if (c < CHAR_SPACE) {

4317                 if (c == '\n') {

4318                     markLF(ptr);

4319                 } else if (c == '\r') {

4320                     if (ptr >= inputLen) { // can't peek?

4321                         --ptr;

4322                         break;

4323                     }

4324                     if (mNormalizeLFs) { // can we do in-place Mac replacement?

4325                         if (inputBuf[ptr] == '\n') { // nope, 2 char lf

4326                             --ptr;

4327                             break;

4328                         }

4329                         inputBuf[ptr-1] = '\n'; // yup

4330                     } else {

4331                         // No LF normalization... can we just skip it?

4332                         if (inputBuf[ptr] == '\n') {

4333                             ++ptr;

4334                         }

4335                     }

4336                     markLF(ptr);

4337                 } else if (c != '\t') {

4338                     throwInvalidSpace(c);

4339                 }

4340             } else if (c == ']') {

4341                 // Ok; need to get one or more ']'s, then '>'

4342                 if ((ptr + 1) >= inputLen) { // not enough room? need to push it back

4343                     --ptr;

4344                     break;

4345                 }

4346 

4347                 // Needs to be followed by another ']'...

4348                 if (inputBuf[ptr] == ']') {

4349                     ++ptr;

4350                     inner_loop:

4351                     while (true) {

4352                         if (ptr >= inputLen) {

4353                             /* Need to push back last 2 right brackets; it may

4354                              * be end marker divided by input buffer boundary

4355                              */

4356                             ptr -= 2;

4357                             break inner_loop;

4358                         }

4359                         c = inputBuf[ptr++];

4360                         if (c == '>') { // Ok, got it!

4361                             mInputPtr = ptr;

4362                             ptr -= (start+3);

4363                             mTextBuffer.resetWithShared(inputBuf, start, ptr);

4364                             mTokenState = TOKEN_FULL_SINGLE;

4365                             return true;

4366                         }

4367                         if (c != ']') {

4368                             // Need to re-check this char (may be linefeed)

4369                             --ptr;

4370                             break inner_loop;

4371                         }

4372                         // Fall through to next round

4373                     }

4374                 }

4375             }

4376 

4377             if (ptr >= inputLen) { // end-of-buffer?

4378                 break;

4379             }

4380             c = inputBuf[ptr++];

4381         }

4382 

4383         mInputPtr = ptr;

4384 

4385         /* If we end up here, we either ran out of input, or hit something

4386          * which would leave 'holes' in buffer... fine, let's return then;

4387          * we can still update shared buffer copy: would be too early to

4388          * make a copy since caller may not even be interested in the

4389          * stuff.

4390          */

4391         int len = ptr - start;

4392         mTextBuffer.resetWithShared(inputBuf, start, len);

4393         if (mCfgCoalesceText ||

4394             (mTextBuffer.size() < mShortestTextSegment)) {

4395             mTokenState = TOKEN_STARTED;

4396         } else {

4397             mTokenState = TOKEN_PARTIAL_SINGLE;

4398         }

4399         return false;

4400     }

4401 

4402     /**

4403      * @return True if the whole CData section was completely read (we

4404      *   hit the end marker); false if a shorter segment was returned.

4405      */

4406     protected boolean readCDataSecondary(int shortestSegment)

4407         throws XMLStreamException

4408     {

4409         // Input pointers

4410         char[] inputBuf = mInputBuffer;

4411         int inputLen = mInputEnd;

4412         int inputPtr = mInputPtr;

4413 

4414         /* Output pointers; calls will also ensure that the buffer is

4415          * not shared, AND has room for one more char

4416          */

4417         char[] outBuf = mTextBuffer.getCurrentSegment();

4418         int outPtr = mTextBuffer.getCurrentSegmentSize();

4419 

4420         while (true) {

4421             if (inputPtr >= inputLen) {

4422                 loadMore(SUFFIX_IN_CDATA);

4423                 inputBuf = mInputBuffer;

4424                 inputPtr = mInputPtr;

4425                 inputLen = mInputEnd;

4426             }

4427             char c = inputBuf[inputPtr++];

4428 

4429             if (c < CHAR_SPACE) {

4430                 if (c == '\n') {

4431                     markLF(inputPtr);

4432                 } else if (c == '\r') {

4433                     mInputPtr = inputPtr;

4434                     if (skipCRLF(c)) { // got 2 char LF

4435                         if (!mNormalizeLFs) {

4436                             // Special handling, to output 2 chars at a time:

4437                             outBuf[outPtr++] = c;

4438                             if (outPtr >= outBuf.length) { // need more room?

4439                                 outBuf = mTextBuffer.finishCurrentSegment();

4440                                 outPtr = 0;

4441                             }

4442                         }

4443                         // And let's let default output the 2nd char, either way

4444                         c = '\n';

4445                     } else if (mNormalizeLFs) { // just \r, but need to convert

4446                         c = '\n'; // For Mac text

4447                     }

4448                     /* Since skipCRLF() needs to peek(), buffer may have

4449                      * changed, even if there was no CR+LF.

4450                      */

4451                     inputPtr = mInputPtr;

4452                     inputBuf = mInputBuffer;

4453                     inputLen = mInputEnd;

4454                 } else if (c != '\t') {

4455                     throwInvalidSpace(c);

4456                 }

4457             } else if (c == ']') {

4458                 // Ok; need to get ']>'

4459                 mInputPtr = inputPtr;

4460                 if (checkCDataEnd(outBuf, outPtr)) {

4461                     return true;

4462                 }

4463                 inputPtr = mInputPtr;

4464                 inputBuf = mInputBuffer;

4465                 inputLen = mInputEnd;

4466 

4467                 outBuf = mTextBuffer.getCurrentSegment();

4468                 outPtr = mTextBuffer.getCurrentSegmentSize();

4469                 continue; // need to re-process last (non-bracket) char

4470             }

4471 

4472             // Ok, let's add char to output:

4473             outBuf[outPtr++] = c;

4474 

4475             // Need more room?

4476             if (outPtr >= outBuf.length) {

4477                 TextBuffer tb = mTextBuffer;

4478                 // Perhaps we have now enough to return?

4479                 if (!mCfgCoalesceText) {

4480                     tb.setCurrentLength(outBuf.length);

4481                     if (tb.size() >= shortestSegment) {

4482                         mInputPtr = inputPtr;

4483                         return false;

4484                     }

4485                 }

4486                 // If not, need more buffer space:

4487                 outBuf = tb.finishCurrentSegment();

4488                 outPtr = 0;

4489                 // 17-Aug-2016, tatu: need to make sure to enforce size limits here too

4490                 verifyLimit("Text size", mConfig.getMaxTextLength(), mTextBuffer.size());

4491             }

4492         }

4493         // never gets here

4494     }

4495 

4496     /**

4497      * Method that will check, given the starting ']', whether there is

4498      * ending ']]>' (including optional extra ']'s); if so, will updated

4499      * output buffer with extra ]s, if not, will make sure input and output

4500      * are positioned for further checking.

4501      * 

4502      * @return True, if we hit the end marker; false if not.

4503      */

4504     private boolean checkCDataEnd(char[] outBuf, int outPtr)

4505         throws XMLStreamException

4506     {

4507         int bracketCount = 0;

4508         char c;

4509         do {

4510             ++bracketCount;

4511             c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++]

4512                 : getNextCharFromCurrent(SUFFIX_IN_CDATA);

4513         } while (c == ']');

4514 

4515         boolean match = (bracketCount >= 2 && c == '>');

4516         if (match) {

4517             bracketCount -= 2;

4518         }

4519         while (bracketCount > 0) {

4520             --bracketCount;

4521             outBuf[outPtr++] = ']';

4522             if (outPtr >= outBuf.length) {

4523                 /* Can't really easily return, even if we have enough

4524                  * stuff here, since we've more than one char...

4525                  */

4526                 outBuf = mTextBuffer.finishCurrentSegment();

4527                 outPtr = 0;

4528             }

4529         }

4530         mTextBuffer.setCurrentLength(outPtr);

4531         // Match? Can break, then:

4532         if (match) {

4533             return true;

4534         }

4535         // No match, need to push the last char back and admit defeat...

4536         --mInputPtr;

4537         return false;

4538     }

4539 

4540     /**

4541      * Method called to read in consecutive beginning parts of a text

4542      * segment, up to either end of the segment (lt char) or until

4543      * first 'hole' in text (buffer end, 2-char lf to convert, entity).

4544      *<p>

4545      * When the method is called, it's expected that the first character

4546      * has been read as is in the current input buffer just before current

4547      * pointer

4548      *

4549      * @param c First character of the text segment

4550      *

4551      * @return True if the whole text segment was completely read; this

4552      *   happens only if lt-char is hit; false if it's possible that

4553      *   it wasn't read (ie. end-of-buffer or entity encountered).

4554      */

4555     private final boolean readTextPrimary(char c) throws XMLStreamException

4556     {

4557         int ptr = mInputPtr;

4558         int start = ptr-1;

4559 

4560         // First: can we heuristically canonicalize ws used for indentation?

4561         if (c <= CHAR_SPACE) {

4562             int len = mInputEnd;

4563             /* Even without indentation removal, it's good idea to

4564              * 'convert' \r or \r\n into \n (by replacing or skipping first

4565              * char): this may allow reusing the buffer. 

4566              * But note that conversion MUST be enabled -- this is toggled

4567              * by code that includes internal entities, to prevent replacement

4568              * of CRs from int. general entities, as applicable.

4569              */

4570             do {

4571                 // We'll need at least one char, no matter what:

4572                 if (ptr < len && mNormalizeLFs) {

4573                     if (c == '\r') {

4574                         c = '\n';

4575                         if (mInputBuffer[ptr] == c) {

4576                             // Ok, whatever happens, can 'skip' \r, to point to following \n:

4577                             ++start;

4578                             // But if that's buffer end, can't skip that

4579                             if (++ptr >= len) {

4580                                 break;

4581                             }

4582                         } else {

4583                             mInputBuffer[start] = c;

4584                         }

4585                     } else if (c != '\n') {

4586                         break;

4587                     }

4588                     markLF(ptr);

4589                     if (mCheckIndentation > 0) {

4590                         ptr = readIndentation(c, ptr);

4591                         if (ptr < 0) { // success!

4592                             return true;

4593                         }

4594                     }

4595                     // If we got this far, we skipped a lf, need to read next char

4596                     c = mInputBuffer[ptr++];

4597                 }

4598             } while (false);

4599 

4600             // can we figure out indentation?

4601             mWsStatus = ALL_WS_UNKNOWN;

4602         } else {

4603             mWsStatus = ALL_WS_NO;

4604         }

4605         

4606         char[] inputBuf = mInputBuffer;

4607         int inputLen = mInputEnd;

4608 

4609         // Let's first see if we can just share input buffer:

4610         while (true) {

4611             if (c < CHAR_FIRST_PURE_TEXT) {

4612                 if (c == '<') {

4613                     mInputPtr = --ptr;

4614                     mTextBuffer.resetWithShared(inputBuf, start, ptr-start);

4615                     return true;

4616                 }

4617                 if (c < CHAR_SPACE) {

4618                     if (c == '\n') {

4619                         markLF(ptr);

4620                     } else if (c == '\r') {

4621                         if (ptr >= inputLen) { // can't peek?

4622                             --ptr;

4623                             break;

4624                         }

4625                         if (mNormalizeLFs) { // can we do in-place Mac replacement?

4626                             if (inputBuf[ptr] == '\n') { // nope, 2 char lf

4627                                 --ptr;

4628                                 break;

4629                             }

4630                             /* This would otherwise be risky (may modify value of

4631                              * a shared entity value), but since DTDs are cached/accessed

4632                              * based on properties including lf-normalization there's no

4633                              * harm in 'fixing' it in place.

4634                              */

4635                             inputBuf[ptr-1] = '\n'; // yup

4636                         } else {

4637                             // No LF normalization... can we just skip it?

4638                             if (inputBuf[ptr] == '\n') {

4639                                 ++ptr;

4640                             }

4641                         }

4642                         markLF(ptr);

4643                     } else if (c != '\t') {

4644                         // Should consume invalid char, but not include in result

4645                         mInputPtr = ptr;

4646                         mTextBuffer.resetWithShared(inputBuf, start, ptr-start-1);

4647                         /* Let's defer exception, provided we got at least

4648                          * one valid character (if not, better throw

4649                          * exception right away)

4650                          */

4651                         boolean deferErrors = (ptr - start) > 1;

4652                         mPendingException = throwInvalidSpace(c, deferErrors);

4653                         return true;

4654                     }

4655                 } else if (c == '&') {

4656                     // Let's push it back and break

4657                     --ptr;

4658                    break;

4659                 } else if (c == '>') {

4660                     // Let's see if we got ']]>'?

4661                     if ((ptr - start) >= 3) {

4662                         if (inputBuf[ptr-3] == ']' && inputBuf[ptr-2] == ']') {

4663                             /* Let's include ']]' in there, not '>' (since that

4664                              * makes it non-wellformed): but need to consume

4665                              * that char nonetheless

4666                              */

4667                             mInputPtr = ptr;

4668                             mTextBuffer.resetWithShared(inputBuf, start, ptr-start-1);

4669                             mPendingException = throwWfcException(ErrorConsts.ERR_BRACKET_IN_TEXT, true);

4670                             return true; // and we are fully done

4671                         }

4672                     }

4673                 }

4674             } // if (char in lower code range)

4675 

4676             if (ptr >= inputLen) { // end-of-buffer?

4677                 break;

4678             }

4679             c = inputBuf[ptr++];

4680         }

4681         mInputPtr = ptr;

4682 

4683         /* If we end up here, we either ran out of input, or hit something

4684          * which would leave 'holes' in buffer... fine, let's return then;

4685          * we can still update shared buffer copy: would be too early to

4686          * make a copy since caller may not even be interested in the

4687          * stuff.

4688          */

4689         mTextBuffer.resetWithShared(inputBuf, start, ptr - start);

4690         return false;

4691     }

4692 

4693     /**

4694      *

4695      * @param deferErrors Flag to enable storing an exception to a 

4696      *   variable, instead of immediately throwing it. If true, will

4697      *   just store the exception; if false, will not store, just throw.

4698      *

4699      * @return True if the text segment was completely read ({@code '<'} was hit,

4700      *   or in non-entity-expanding mode, a non-char entity); false if

4701      *   it may still continue

4702      */

4703     protected final boolean readTextSecondary(int shortestSegment, boolean deferErrors)

4704         throws XMLStreamException

4705     {

4706         /* Output pointers; calls will also ensure that the buffer is

4707          * not shared, AND has room for at least one more char

4708          */

4709         char[] outBuf = mTextBuffer.getCurrentSegment();

4710         int outPtr = mTextBuffer.getCurrentSegmentSize();

4711         int inputPtr = mInputPtr;

4712         char[] inputBuffer = mInputBuffer;

4713         int inputLen = mInputEnd;

4714 

4715         while (true) {

4716             if (inputPtr >= inputLen) {

4717                 /* 07-Oct-2005, TSa: Let's not throw an exception for EOF from

4718                  *   here -- in fragment mode, it shouldn't be thrown, and in

4719                  *   other modes we might as well first return text, and only

4720                  *   then throw an exception: no need to do that yet.

4721                  */

4722                 mInputPtr = inputPtr;

4723                 if (!loadMore()) {

4724                     break;

4725                 }

4726                 inputPtr = mInputPtr;

4727                 inputBuffer = mInputBuffer;

4728                 inputLen = mInputEnd;

4729             }

4730             char c = inputBuffer[inputPtr++];

4731 

4732             // Most common case is we don't have special char, thus:

4733             if (c < CHAR_FIRST_PURE_TEXT) {

4734                 if (c < CHAR_SPACE) {

4735                     if (c == '\n') {

4736                         markLF(inputPtr);

4737                     } else if (c == '\r') {

4738                         mInputPtr = inputPtr;

4739                         if (skipCRLF(c)) { // got 2 char LF

4740                             if (!mNormalizeLFs) {

4741                                 // Special handling, to output 2 chars at a time:

4742                                 outBuf[outPtr++] = c;

4743                                 if (outPtr >= outBuf.length) { // need more room?

4744                                     outBuf = mTextBuffer.finishCurrentSegment();

4745                                     outPtr = 0;

4746                                 }

4747                             }

4748                             // And let's let default output the 2nd char

4749                             c = '\n';

4750                         } else if (mNormalizeLFs) { // just \r, but need to convert

4751                             c = '\n'; // For Mac text

4752                         }

4753                         /* note: skipCRLF() may change ptr and len, but since

4754                          * it does not close input source, it won't change

4755                          * actual buffer object:

4756                          */

4757                         //inputBuffer = mInputBuffer;

4758                         inputLen = mInputEnd;

4759                         inputPtr = mInputPtr;

4760                     } else if (c != '\t') {

4761                         mTextBuffer.setCurrentLength(outPtr);

4762                         mInputPtr = inputPtr;

4763                         mPendingException = throwInvalidSpace(c, deferErrors);

4764                         break;

4765                     }

4766                 } else if (c == '<') { // end is nigh!

4767                     mInputPtr = inputPtr-1;

4768                     break;

4769                 } else if (c == '&') {

4770                     mInputPtr = inputPtr;

4771                     int ch;

4772                     if (mCfgReplaceEntities) { // can we expand all entities?

4773                         if ((inputLen - inputPtr) >= 3

4774                             && (ch = resolveSimpleEntity(true)) != 0) {

4775                             // Ok, it's fine then

4776                         } else {

4777                             ch = fullyResolveEntity(true);

4778                             if (ch == 0) {

4779                                 // Input buffer changed, nothing to output quite yet:

4780                                 inputBuffer = mInputBuffer;

4781                                 inputLen = mInputEnd;

4782                                 inputPtr = mInputPtr;

4783                                 continue;

4784                             }

4785                             // otherwise char is now fine...

4786                         }

4787                     } else {

4788                         /* Nope, can only expand char entities; others need

4789                          * to be separately handled.

4790                          */

4791                         ch = resolveCharOnlyEntity(true);

4792                         if (ch == 0) { // some other entity...

4793                             /* can't expand; underlying pointer now points to

4794                              * char after ampersand, need to rewind

4795                              */

4796                             --mInputPtr;

4797                             break;

4798                         }

4799                         // .. otherwise we got char we needed

4800                     }

4801                     if (ch <= 0xFFFF) {

4802                         c = (char) ch;

4803                     } else {

4804                         ch -= 0x10000;

4805                         // need more room?

4806                         if (outPtr >= outBuf.length) {

4807                             outBuf = mTextBuffer.finishCurrentSegment();

4808                             outPtr = 0;

4809                         }

4810                         outBuf[outPtr++] = (char) ((ch >> 10)  + 0xD800);

4811                         if (outPtr >= outBuf.length) {

4812                             if ((outBuf = _expandOutputForText(inputPtr, outBuf, Integer.MAX_VALUE)) == null) { // got enough, leave

4813                                 return false;

4814                             }

4815                             outPtr = 0;

4816                         }

4817                         c = (char) ((ch & 0x3FF)  + 0xDC00);

4818                     }

4819                     inputPtr = mInputPtr;

4820                     // not quite sure why this is needed... but it is:

4821                     inputLen = mInputEnd;

4822                 } else if (c == '>') {

4823                     // Let's see if we got ']]>'?

4824                     /* 21-Apr-2005, TSa: But we can NOT check the output buffer

4825                      *  as it contains _expanded_ stuff... only input side.

4826                      *  For now, 98% accuracy has to do, as we may not be able

4827                      *  to access previous buffer's contents. But at least we

4828                      *  won't produce false positives from entity expansion

4829                      */

4830                     if (inputPtr > 2) { // can we do it here?

4831                         // Since mInputPtr has been advanced, -1 refers to '>'

4832                         if (inputBuffer[inputPtr-3] == ']'

4833                             && inputBuffer[inputPtr-2] == ']') {

4834                             mInputPtr = inputPtr;

4835                             /* We have already added ']]' into output buffer...

4836                              * should be ok, since only with '>' does it become

4837                              * non-wellformed.

4838                              */

4839                             mTextBuffer.setCurrentLength(outPtr);

4840                             mPendingException = throwWfcException(ErrorConsts.ERR_BRACKET_IN_TEXT, deferErrors);

4841                             break;

4842                         }

4843                     } else {

4844                         /* 21-Apr-2005, TSa: No good way to verify it,

4845                          *   at this point. Should come back and think of how

4846                          *   to properly handle this (rare) possibility.

4847                          */

4848                         ;

4849                     }

4850                 }

4851             }

4852             // Ok, let's add char to output:

4853             outBuf[outPtr++] = c;

4854 

4855             // Need more room?

4856             if (outPtr >= outBuf.length) {

4857                 if ((outBuf = _expandOutputForText(inputPtr, outBuf, shortestSegment)) == null) { // got enough, leave

4858                     return false;

4859                 }

4860                 verifyLimit("Text size", mConfig.getMaxTextLength(), mTextBuffer.size());

4861                 outPtr = 0;

4862             }

4863         }

4864         mTextBuffer.setCurrentLength(outPtr);

4865         return true;

4866     }

4867 

4868     private final char[] _expandOutputForText(int inputPtr, char[] outBuf,

4869             int shortestSegment)

4870     {

4871         TextBuffer tb = mTextBuffer;

4872         // Perhaps we have now enough to return?

4873         tb.setCurrentLength(outBuf.length);

4874         if (tb.size() >= shortestSegment) {

4875             mInputPtr = inputPtr;

4876             return null;

4877         }

4878         // If not, need more buffer space:

4879         return tb.finishCurrentSegment();

4880     }

4881     

4882     /**

4883      * Method called to try to parse and canonicalize white space that

4884      * has a good chance of being white space with somewhat regular

4885      * structure; specifically, something that looks like typical

4886      * indentation.

4887      *<p>

4888      * Note: Caller guarantees that there will be at least 2 characters

4889      * available in the input buffer. And method has to ensure that if

4890      * it does not find a match, it will return pointer value such

4891      * that there is at least one valid character remaining.

4892      *

4893      * @return -1, if the content was determined to be canonicalizable

4894      *    (indentation) white space; and thus fully parsed. Otherwise

4895      *    pointer (value to set to mInputPtr) to the next character

4896      *    to process (not processed by this method)

4897      */

4898     private final int readIndentation(char c, int ptr)

4899         throws XMLStreamException

4900     {

4901         /* We need to verify that:

4902          * (a) we can read enough contiguous data to do determination

4903          * (b) sequence is a linefeed, with either zero or more following

4904          *    spaces, or zero or more tabs; and followed by non-directive

4905          *    tag (start/end tag)

4906          * and if so, we can use a canonical shared representation of

4907          * this even.

4908          */

4909         final int inputLen = mInputEnd;

4910         final char[] inputBuf = mInputBuffer;

4911         int start = ptr-1;

4912         final char lf = c;

4913 

4914         // Note: caller guarantees at least one more char in the input buffer

4915         ws_loop:

4916         do { // dummy loop to allow for break (which indicates failure)

4917             c = inputBuf[ptr++];

4918             if (c == ' ' || c == '\t') { // indentation?

4919                 // Need to limit to maximum

4920                 int lastIndCharPos = (c == ' ') ? TextBuffer.MAX_INDENT_SPACES : TextBuffer.MAX_INDENT_TABS;

4921                 lastIndCharPos += ptr;

4922                 if (lastIndCharPos > inputLen) {

4923                     lastIndCharPos = inputLen;

4924                 }

4925 

4926                 inner_loop:

4927                 while (true) {

4928                     if (ptr >= lastIndCharPos) { // overflow; let's backtrack

4929                         --ptr;

4930                         break ws_loop;

4931                     }

4932                     char d = inputBuf[ptr++];

4933                     if (d != c) {

4934                         if (d == '<') { // yup, got it!

4935                             break inner_loop;

4936                         }

4937                         --ptr; // caller needs to reprocess it

4938                         break ws_loop; // nope, blew it

4939                     }

4940                 }

4941                 // This means we had success case; let's fall through

4942             } else if (c != '<') { // nope, can not be

4943                 --ptr; // simpler if we just push it back; needs to be processed later on

4944                 break ws_loop;

4945             }

4946 

4947             // Ok; we got '<'... just need any other char than '!'...

4948             if (ptr < inputLen && inputBuf[ptr] != '!') {

4949                 // Voila!

4950                 mInputPtr = --ptr; // need to push back that '<' too

4951                 mTextBuffer.resetWithIndentation(ptr - start - 1, c);

4952                 // One more thing: had a positive match, need to note it

4953                 if (mCheckIndentation < INDENT_CHECK_MAX) {

4954                     mCheckIndentation += INDENT_CHECK_START;

4955                 }

4956                 mWsStatus = ALL_WS_YES;

4957                 return -1;

4958             }

4959             // Nope: need to push '<' back, then

4960             --ptr;

4961         } while (false);

4962 

4963         // Ok, nope... caller can/need to take care of it:

4964         /* Also, we may need to subtract indentation check count to possibly

4965          * disable this check if it doesn't seem to work.

4966          */

4967         --mCheckIndentation;

4968         /* Also; if lf we got was \r, need to convert it now (this

4969          * method only gets called in lf converting mode)

4970          * (and yes, it is safe to modify input buffer at this point;

4971          * see calling method for details)

4972          */

4973         if (lf == '\r') {

4974             inputBuf[start] = '\n';

4975         }

4976         return ptr;

4977     }

4978 

4979     /**

4980      * Reading whitespace should be very similar to reading normal text;

4981      * although couple of simplifications can be made. Further, since this

4982      * method is very unlikely to be of much performance concern, some

4983      * optimizations are left out, where it simplifies code.

4984      *

4985      * @param c First white space characters; known to contain white space

4986      *   at this point

4987      * @param prologWS If true, is reading white space outside XML tree,

4988      *   and as such can get EOF. If false, should not get EOF, nor be

4989      *   followed by any other char than &lt;

4990      *

4991      * @return True if the whole white space segment was read; false if

4992      *   something prevented that (end of buffer, replaceable 2-char lf)

4993      */

4994     private final boolean readSpacePrimary(char c, boolean prologWS)

4995         throws XMLStreamException

4996     {

4997         int ptr = mInputPtr;

4998         char[] inputBuf = mInputBuffer;

4999         int inputLen = mInputEnd;

5000         int start = ptr-1;

5001 

5002         // Let's first see if we can just share input buffer:

5003         while (true) {

5004             /* 30-Aug-2006, TSa: Let's not check for validity errors yet,

5005              * even if we could detect problems at this point.

5006              * This because it's not always

5007              * an error (in dtd-aware, non-validating mode); but also since

5008              * that way we can first return all space we got, and only

5009              * indicate error when next token is to be accessed.

5010              */

5011             if (c > CHAR_SPACE) { // End of whitespace

5012                 mInputPtr = --ptr;

5013                 mTextBuffer.resetWithShared(mInputBuffer, start, ptr-start);

5014                 return true;

5015             }

5016 

5017             if (c == '\n') {

5018                 markLF(ptr);

5019             } else if (c == '\r') {

5020                 if (ptr >= mInputEnd) { // can't peek?

5021                     --ptr;

5022                     break;

5023                 }

5024                 if (mNormalizeLFs) { // can we do in-place Mac replacement?

5025                     if (inputBuf[ptr] == '\n') { // nope, 2 char lf

5026                         --ptr;

5027                         break;

5028                     }

5029                     inputBuf[ptr-1] = '\n'; // yup

5030                 } else {

5031                     // No LF normalization... can we just skip it?

5032                     if (inputBuf[ptr] == '\n') {

5033                         ++ptr;

5034                     }

5035                 }

5036                 markLF(ptr);

5037             } else if (c != CHAR_SPACE && c != '\t') {

5038                 throwInvalidSpace(c);

5039             }

5040             if (ptr >= inputLen) { // end-of-buffer?

5041                 break;

5042             }

5043             c = inputBuf[ptr++];

5044         }

5045 

5046         mInputPtr = ptr;

5047         

5048         /* Ok, couldn't read it completely, let's just return whatever

5049          * we did get as shared data

5050          */

5051         mTextBuffer.resetWithShared(inputBuf, start, ptr - start);

5052         return false;

5053     }

5054 

5055     /**

5056      * This is very similar to readSecondaryText(); called when we need

5057      * to read in rest of (ignorable) white space segment.

5058      *

5059      * @param prologWS True if the ignorable white space is within prolog

5060      *   (or epilog); false if it's within xml tree.

5061      */

5062     private void readSpaceSecondary(boolean prologWS)

5063         throws XMLStreamException

5064     {

5065         /* Let's not bother optimizing input. However, we can easily optimize

5066          * output, since it's easy to do, yet has more effect on performance

5067          * than localizing input variables.

5068          */

5069         char[] outBuf = mTextBuffer.getCurrentSegment();

5070         int outPtr = mTextBuffer.getCurrentSegmentSize();

5071 

5072         while (true) {

5073             if (mInputPtr >= mInputEnd) {

5074                 /* 07-Oct-2005, TSa: Let's not throw an exception yet --

5075                  *   can return SPACE, and let exception be thrown

5076                  *   when trying to fetch next event.

5077                  */

5078                 if (!loadMore()) {

5079                     break;

5080                 }

5081             }

5082             char c = mInputBuffer[mInputPtr];

5083             if (c > CHAR_SPACE) { // end of WS?

5084                 break;

5085             }

5086             ++mInputPtr;

5087             if (c == '\n') {

5088                 markLF();

5089             } else if (c == '\r') {

5090                 if (skipCRLF(c)) {

5091                     if (!mNormalizeLFs) {

5092                         // Special handling, to output 2 chars at a time:

5093                         outBuf[outPtr++] = c;

5094                         if (outPtr >= outBuf.length) { // need more room?

5095                             outBuf = mTextBuffer.finishCurrentSegment();

5096                             outPtr = 0;

5097                         }

5098                     }

5099                     c = '\n';

5100                 } else if (mNormalizeLFs) {

5101                     c = '\n'; // For Mac text

5102                 }

5103             } else if (c != CHAR_SPACE && c != '\t') {

5104                 throwInvalidSpace(c);

5105             }

5106                 

5107             // Ok, let's add char to output:

5108             outBuf[outPtr++] = c;

5109 

5110             // Need more room?

5111             if (outPtr >= outBuf.length) {

5112                 outBuf = mTextBuffer.finishCurrentSegment();

5113                 outPtr = 0;

5114             }

5115         }

5116         mTextBuffer.setCurrentLength(outPtr);

5117     }

5118 

5119     /**

5120      * Method called to read the contents of the current CHARACTERS

5121      * event, and write all contents using the specified Writer.

5122      *

5123      * @param w Writer to use for writing out textual content parsed

5124      *

5125      * @return Total number of characters written using the writer

5126      */

5127     private int readAndWriteText(Writer w)

5128         throws IOException, XMLStreamException

5129     {

5130         mTokenState = TOKEN_FULL_SINGLE; // we'll read it all

5131 

5132         /* We should be able to mostly just use the input buffer at this

5133          * point; exceptions being two-char linefeeds (when converting

5134          * to single ones) and entities (which likewise can expand or

5135          * shrink), both of which require flushing and/or single byte

5136          * output.

5137          */

5138         int start = mInputPtr;

5139         int count = 0;

5140 

5141         main_loop:

5142         while (true) {

5143             char c;

5144             // Reached the end of buffer? Need to flush, then

5145             if (mInputPtr >= mInputEnd) {

5146                 int len = mInputPtr - start;

5147                 if (len > 0) {

5148                     w.write(mInputBuffer, start, len);

5149                     count += len;

5150                 }

5151                 c = getNextChar(SUFFIX_IN_TEXT);

5152                 start = mInputPtr-1; // needs to be prior to char we got

5153             } else {

5154                 c = mInputBuffer[mInputPtr++];

5155             }

5156             // Most common case is we don't have a special char, thus:

5157             if (c < CHAR_FIRST_PURE_TEXT) {

5158                 if (c < CHAR_SPACE) {

5159                     if (c == '\n') {

5160                         markLF();

5161                     } else if (c == '\r') {

5162                         char d;

5163                         final boolean atBoundary = (mInputPtr >= mInputEnd);

5164                         if (atBoundary) {

5165                             // If we can't peek easily, let's flush past stuff and load

5166                             // more... (have to flush, since new read will overwrite input buffers)

5167                             // 06-Dec-2019, tatu: [woodstox-core#97] Need to avoid copying \r tho:

5168                             int len = mInputPtr - start - 1;

5169                             if (len > 0) {

5170                                 w.write(mInputBuffer, start, len);

5171                                 count += len;

5172                             }

5173                             d = getNextChar(SUFFIX_IN_TEXT);

5174                             start = mInputPtr; // to mark 'no past content'

5175                         } else {

5176                             d = mInputBuffer[mInputPtr++];

5177                         }

5178                         if (d == '\n') {

5179                             if (mNormalizeLFs) {

5180                                 // Let's flush content prior to 2-char LF, and start the new

5181                                 // segment on the second char... this way, no mods are needed

5182                                 // for the buffer, AND it'll also work on split 2-char lf!

5183                                 int len = mInputPtr - start - 2;

5184                                 if (len > 0) {

5185                                     w.write(mInputBuffer, start, len);

5186                                     count += len;

5187                                 }

5188                                 start = mInputPtr-1; // so '\n' is the first char

5189                             } else {

5190                                 // otherwise it's good as is... almost

5191                                 if (atBoundary) { // except, we don't want to lose that \r!

5192                                     w.write(c);

5193                                 }

5194                             }

5195                         } else { // not 2-char... need to replace?

5196                             // First: push back whatever non-linefeed we got:

5197                             --mInputPtr;

5198                             // 06-Dec-2019, tatu: But beware [woodstox-core#97]

5199                             if (atBoundary) {

5200                                 // If at boundary, no room to replace; must write single lf char

5201                                 w.write(mNormalizeLFs ? '\n' : c);

5202                                 ++count;

5203                             } else { // but if not at boundary, can just replace lone '\r' if need be

5204                                 if (mNormalizeLFs) { // replace \r with \n

5205                                     mInputBuffer[mInputPtr-1] = '\n';

5206                                 }

5207                             }

5208                         }

5209                         markLF();

5210                     } else if (c != '\t') {

5211                         throwInvalidSpace(c);

5212                     }

5213                 } else if (c == '<') { // end is nigh!

5214                     break main_loop;

5215                 } else if (c == '&') {

5216                     // Have to flush all stuff, since entities pretty much

5217                     // force it; input buffer won't be contiguous

5218                     int len = mInputPtr - 1 - start; // -1 to remove ampersand

5219                     if (len > 0) {

5220                         w.write(mInputBuffer, start, len);

5221                         count += len;

5222                     }

5223                     int ch;

5224                     if (mCfgReplaceEntities) { // can we expand all entities?

5225                         if ((mInputEnd - mInputPtr) < 3

5226                             || (ch = resolveSimpleEntity(true)) == 0) {

5227                             ch = fullyResolveEntity(true);

5228                         }

5229                     } else {

5230                         ch = resolveCharOnlyEntity(true);

5231                         if (ch == 0) { // some other entity...

5232                             /* can't expand, so, let's just bail out... but

5233                              * let's also ensure no text is added twice, as

5234                              * all prev text was just flushed, but resolve

5235                              * may have moved input buffer around.

5236                              */

5237                             start = mInputPtr;

5238                             break main_loop;

5239                         }

5240                     }

5241                     if (ch != 0) {

5242                         if (ch <= 0xFFFF) {

5243                             c = (char) ch;

5244                         } else {

5245                             ch -= 0x10000;

5246                             w.write((char) ((ch >> 10)  + 0xD800));

5247                             c = (char) ((ch & 0x3FF)  + 0xDC00);

5248                         }

5249                         w.write(c);

5250                         ++count;

5251                     }

5252                     start = mInputPtr;

5253                 } else if (c == '>') { // did we get ']]>'?

5254                     /* 21-Apr-2005, TSa: But we can NOT check the output buffer

5255                      *  (see comments in readTextSecondary() for details)

5256                      */

5257                     if (mInputPtr >= 2) { // can we do it here?

5258                         if (mInputBuffer[mInputPtr-2] == ']'

5259                             && mInputBuffer[mInputPtr-1] == ']') {

5260                             // Anything to flush?

5261                             int len = mInputPtr - start;

5262                             if (len > 0) {

5263                                 w.write(mInputBuffer, start, len);

5264                             }

5265                             throwParseError(ErrorConsts.ERR_BRACKET_IN_TEXT);

5266                         }

5267                     } else {

5268                         ; // !!! TBI: how to check past boundary?

5269                     }

5270                 } else if (c == CHAR_NULL) {

5271                     throwNullChar();

5272                 }

5273             }

5274         } // while (true)

5275 

5276         /* Need to push back '<' or '&', whichever caused us to

5277          * get out...

5278          */

5279         --mInputPtr;

5280 

5281         // Anything left to flush?

5282         int len = mInputPtr - start;

5283         if (len > 0) {

5284             w.write(mInputBuffer, start, len);

5285             count += len;

5286         }

5287         return count;

5288     }

5289 

5290     /**

5291      * Method called to read the contents of the current (possibly partially

5292      * read) CDATA

5293      * event, and write all contents using the specified Writer.

5294      *

5295      * @param w Writer to use for writing out textual content parsed

5296      *

5297      * @return Total number of characters written using the writer for

5298      *   the current CDATA event

5299      */

5300     private int readAndWriteCData(Writer w)

5301         throws IOException, XMLStreamException

5302     {

5303         mTokenState = TOKEN_FULL_SINGLE; // we'll read it all

5304 

5305         /* Ok; here we can basically have 2 modes; first the big loop to

5306          * gather all data up until a ']'; and then another loop to see

5307          * if ']' is part of ']]>', and after this if no end marker found,

5308          * go back to the first part.

5309          */

5310         char c = (mInputPtr < mInputEnd) ?

5311             mInputBuffer[mInputPtr++] : getNextChar(SUFFIX_IN_CDATA);

5312         int count = 0;

5313 

5314         main_loop:

5315         while (true) {

5316             int start = mInputPtr-1;

5317 

5318             quick_loop:

5319             while (true) {

5320                 if (c > CHAR_CR_LF_OR_NULL) {

5321                     if (c == ']') {

5322                         break quick_loop;

5323                     }

5324                 } else {

5325                     if (c < CHAR_SPACE) {

5326                         if (c == '\n') {

5327                             markLF();

5328                         } else if (c == '\r') {

5329                             char d;

5330                             if (mInputPtr >= mInputEnd) {

5331                                 /* If we can't peek easily, let's flush past stuff

5332                                  * and load more... (have to flush, since new read

5333                                  * will overwrite inbut buffers)

5334                                  */

5335                                 int len = mInputPtr - start;

5336                                 if (len > 0) {

5337                                     w.write(mInputBuffer, start, len);

5338                                     count += len;

5339                                 }

5340                                 d = getNextChar(SUFFIX_IN_CDATA);

5341                                 start = mInputPtr; // to mark 'no past content'

5342                             } else {

5343                                 d = mInputBuffer[mInputPtr++];

5344                             }

5345                             if (d == '\n') {

5346                                 if (mNormalizeLFs) {

5347                                     /* Let's flush content prior to 2-char LF, and

5348                                      * start the new segment on the second char...

5349                                      * this way, no mods are needed for the buffer,

5350                                      * AND it'll also  work on split 2-char lf!

5351                                      */

5352                                     int len = mInputPtr - 2 - start;

5353                                     if (len > 0) {

5354                                         w.write(mInputBuffer, start, len);

5355                                         count += len;

5356                                     }

5357                                     start = mInputPtr-1; // so '\n' is the first char

5358                                 } else {

5359                                     // otherwise it's good as is

5360                                 }

5361                             } else { // not 2-char... need to replace?

5362                                 --mInputPtr;

5363                                 if (mNormalizeLFs) {

5364                                     mInputBuffer[mInputPtr-1] = '\n';

5365                                 }

5366                             }

5367                             markLF();

5368                         } else if (c != '\t') {

5369                             throwInvalidSpace(c);

5370                         }

5371                     }

5372                 }

5373                 // Reached the end of buffer? Need to flush, then

5374                 if (mInputPtr >= mInputEnd) {

5375                     int len = mInputPtr - start;

5376                     if (len > 0) {

5377                         w.write(mInputBuffer, start, len);

5378                         count += len;

5379                     }

5380                     start = 0;

5381                     c = getNextChar(SUFFIX_IN_CDATA);

5382                 } else {

5383                     c = mInputBuffer[mInputPtr++];

5384                 }

5385             } // while (true)

5386 

5387             // Anything to flush once we hit ']'?

5388             {

5389                 /* -1 since the last char in there (a '[') is NOT to be

5390                  * output at this point

5391                  */

5392                 int len = mInputPtr - start - 1;

5393                 if (len > 0) {

5394                     w.write(mInputBuffer, start, len);

5395                     count += len;

5396                 }

5397             }

5398 

5399             /* Ok; we only get this far when we hit a ']'. We got one,

5400              * so let's see if we can find at least one more bracket,

5401              * immediately followed by '>'...

5402              */

5403             int bracketCount = 0;

5404             do {

5405                 ++bracketCount;

5406                 c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++]

5407                     : getNextCharFromCurrent(SUFFIX_IN_CDATA);

5408             } while (c == ']');

5409 

5410             boolean match = (bracketCount >= 2 && c == '>');

5411             if (match) {

5412                 bracketCount -= 2;

5413             }

5414             while (bracketCount > 0) {

5415                 --bracketCount;

5416                 w.write(']');

5417                 ++count;

5418             }

5419             if (match) {

5420                 break main_loop;

5421             }

5422             /* Otherwise we'll just loop; now c is properly set to be

5423              * the next char as well.

5424              */

5425         } // while (true)

5426 

5427         return count;

5428     }

5429 

5430     /**

5431      * @return Number of characters written to Writer during the call

5432      */

5433     private int readAndWriteCoalesced(Writer w, boolean wasCData)

5434         throws IOException, XMLStreamException

5435     {

5436         mTokenState = TOKEN_FULL_COALESCED;

5437         int count = 0;

5438 

5439         /* Ok, so what do we have next? CDATA, CHARACTERS, or something

5440          * else?

5441          */

5442         main_loop:

5443         while (true) {

5444             if (mInputPtr >= mInputEnd) {

5445                 if (!loadMore()) {

5446                     /* Shouldn't normally happen, but let's just let

5447                      * caller deal with it...

5448                      */

5449                     break main_loop;

5450                 }

5451             }

5452             // Let's peek, ie. not advance it yet

5453             char c = mInputBuffer[mInputPtr];

5454             if (c == '<') { // CDATA, maybe?

5455                 // Need to distinguish "<![" from other tags/directives

5456                 if ((mInputEnd - mInputPtr) < 3) {

5457                     if (!ensureInput(3)) { // likewise, probably an error...

5458                         break main_loop;

5459                     }

5460                 }

5461                 if (mInputBuffer[mInputPtr+1] != '!'

5462                     || mInputBuffer[mInputPtr+2] != '[') {

5463                     // Nah, some other tag or directive

5464                     break main_loop;

5465                 }

5466                 // Let's skip beginning parts, then:

5467                 mInputPtr += 3;

5468                 // And verify we get proper CDATA directive

5469                 checkCData();

5470                 // cool, let's just handle it then

5471                 count += readAndWriteCData(w);

5472                 wasCData = true;

5473             } else { // text

5474                 /* Did we hit an 'unexpandable' entity? If so, need to

5475                  * just bail out (only happens when Coalescing AND not

5476                  * expanding -- a rather unlikely combination)

5477                  */

5478                 if (c == '&' && !wasCData) {

5479                     break;

5480                 }

5481                 count += readAndWriteText(w);

5482                 wasCData = false;

5483             }

5484         }

5485 

5486         return count;

5487     }

5488 

5489     /*

5490     ///////////////////////////////////////////////////////////////////////

5491     // Internal methods, low-level input access

5492     ///////////////////////////////////////////////////////////////////////

5493      */

5494     

5495     /**

5496      * Method that will skip any white space from input source(s)

5497      *

5498      * @return true If at least one white space was skipped; false

5499      *   if not (character passed was not white space)

5500      */

5501     protected final boolean skipWS(char c) 

5502         throws XMLStreamException

5503     {

5504         if (c > CHAR_SPACE) {

5505             return false;

5506         }

5507         while (true) {

5508             // Linefeed?

5509             if (c == '\n' || c == '\r') {

5510                 skipCRLF(c);

5511             } else if (c != CHAR_SPACE && c != '\t') {

5512                 throwInvalidSpace(c);

5513             }

5514             if (mInputPtr >= mInputEnd) {

5515                 // Let's see if current source has more

5516                 if (!loadMoreFromCurrent()) {

5517                     return true;

5518                 }

5519             }

5520             c = mInputBuffer[mInputPtr];

5521             if (c > CHAR_SPACE) { // not WS? Need to return

5522                 return true;

5523             }

5524             ++mInputPtr;

5525         }

5526     }

5527 

5528     /*

5529     ///////////////////////////////////////////////////////////////////////

5530     // Abstract method implementations

5531     ///////////////////////////////////////////////////////////////////////

5532      */

5533 

5534     @Override

5535     protected EntityDecl findEntity(String id, Object arg)

5536         throws XMLStreamException

5537     {

5538         EntityDecl ed = mConfig.findCustomInternalEntity(id);

5539         if (ed == null && mGeneralEntities != null) {

5540             ed = mGeneralEntities.get(id);

5541         }

5542         /* 05-Mar-2006, TSa: Externally declared entities are illegal

5543          *   if we were declared as "standalone='yes'"...

5544          */

5545         if (mDocStandalone == DOC_STANDALONE_YES) {

5546             if (ed != null && ed.wasDeclaredExternally()) {

5547                 throwParseError(ErrorConsts.ERR_WF_ENTITY_EXT_DECLARED, ed.getName(), null);

5548             }

5549         }

5550         return ed;

5551     }

5552 

5553     @Override

5554     protected void handleUndeclaredEntity(String id)

5555         throws XMLStreamException

5556     {

5557         throwParseError(((mDocStandalone == DOC_STANDALONE_YES) ?

5558                         ErrorConsts.ERR_WF_GE_UNDECLARED_SA :

5559                         ErrorConsts.ERR_WF_GE_UNDECLARED),

5560                         id, null);

5561     }

5562 

5563     @Override

5564     protected void handleIncompleteEntityProblem(WstxInputSource closing)

5565         throws XMLStreamException

5566     {

5567         String top = mElementStack.isEmpty() ? "[ROOT]" : mElementStack.getTopElementDesc();

5568         throwParseError("Unexpected end of entity expansion for entity &{0}; was expecting a close tag for element <{1}>",

5569                         closing.getEntityId(), top);

5570     }

5571 

5572     /*

5573     ///////////////////////////////////////////////////////////////////////

5574     // Internal methods, validation, error handling and reporting

5575     ///////////////////////////////////////////////////////////////////////

5576      */

5577 

5578     /**

5579      * This problem gets reported if an entity tries to expand to

5580      * a close tag matching start tag that did not came from the same

5581      * entity (but from parent).

5582      */

5583     protected void handleGreedyEntityProblem(WstxInputSource input)

5584         throws XMLStreamException

5585     {

5586         String top = mElementStack.isEmpty() ? "[ROOT]" : mElementStack.getTopElementDesc();

5587         throwParseError("Improper GE/element nesting: entity &"

5588                         +input.getEntityId()+" contains closing tag for <"+top+">");

5589     }

5590 

5591     private void throwNotTextual(int type) {

5592         throw new IllegalStateException("Not a textual event ("

5593                 +tokenTypeDesc(type)+")");

5594     }

5595 

5596     private void throwNotTextXxx(int type) {

5597         throw new IllegalStateException("getTextXxx() methods can not be called on "

5598                 +tokenTypeDesc(type));

5599     }

5600 

5601     protected void throwNotTextualOrElem(int type) {

5602         throw new IllegalStateException(MessageFormat.format(ErrorConsts.ERR_STATE_NOT_ELEM_OR_TEXT,

5603                 new Object[] { tokenTypeDesc(type) }));

5604     }

5605 

5606     /**

5607      * Method called when we get an EOF within content tree

5608      */

5609     protected void throwUnexpectedEOF() throws WstxException {

5610         throwUnexpectedEOF("; was expecting a close tag for element <"+mElementStack.getTopElementDesc()+">");

5611     }

5612 

5613     /**

5614      * Method called to report a problem with 

5615      */

5616     protected XMLStreamException _constructUnexpectedInTyped(int nextToken) {

5617         if (nextToken == START_ELEMENT) {

5618             return _constructTypeException("Element content can not contain child START_ELEMENT when using Typed Access methods", null);

5619         }

5620         return _constructTypeException("Expected a text token, got "+tokenTypeDesc(nextToken), null);

5621     }

5622 

5623     protected TypedXMLStreamException _constructTypeException(String msg, String lexicalValue) {

5624         return new TypedXMLStreamException(lexicalValue, msg, getStartLocation());

5625     }

5626 

5627     /**

5628      * Stub method implemented by validating parsers, to report content

5629      * that's not valid for current element context. Defined at this

5630      * level since some such problems need to be caught at low-level;

5631      * however, details of error reports are not needed here.

5632      * 

5633      * @param evtType Type of event that contained unexpected content

5634      */

5635     protected void reportInvalidContent(int evtType) throws XMLStreamException {

5636         // should never happen; sub-class has to override:

5637         throwParseError("Internal error: sub-class should override method");

5638     }

5639 }

5640