1 package com.ctc.wstx.api;
2
3 import javax.xml.stream.XMLResolver;
4
5 import org.codehaus.stax2.XMLInputFactory2;
6
7 /**
8 * Class that contains constant for property names used to configure
9 * cursor and event readers produced by Wstx implementation of
10 * {@link javax.xml.stream.XMLInputFactory}.
11 *<p>
12 * TODO:
13 *
14 * - CHECK_CHAR_VALIDITY (separate for white spaces?)
15 * - CATALOG_RESOLVER? (or at least, ENABLE_CATALOGS)
16 */
17 public final class WstxInputProperties
18 {
19 /**
20 * Constants used when no DTD handling is done, and we do not know the
21 * 'real' type of an attribute. Seems like CDATA is the safe choice.
22 */
23 public final static String UNKNOWN_ATTR_TYPE = "CDATA";
24
25 /*
26 ///////////////////////////////////////////////////////////////////////
27 // Simple on/off settings:
28 ///////////////////////////////////////////////////////////////////////
29 */
30
31 // // // Normalization:
32
33 /**
34 * Feature that controls whether linefeeds are normalized into
35 * canonical linefeed as mandated by xml specification.
36 *<p>
37 * Note that disabling this property (from its default enabled
38 * state) will result in non-conforming XML processing. It may
39 * be useful for use cases where changes to input content should
40 * be minimized.
41 *<p>
42 * Note: this property was initially removed from Woodstox 4.0,
43 * but was reintroduced in 4.0.8 due to user request.
44 */
45 public final static String P_NORMALIZE_LFS = "com.ctc.wstx.normalizeLFs";
46
47 //public final static String P_NORMALIZE_ATTR_VALUES = "com.ctc.wstx.normalizeAttrValues";
48
49 // // // XML character validation:
50
51 /**
52 * Whether readers will verify that characters in text content are fully
53 * valid XML characters (not just Unicode). If true, will check
54 * that they are valid (including white space); if false, will not
55 * check.
56 *<p>
57 * Note that this property will NOT have effect on all encoding problems,
58 * specifically:
59 *<ul>
60 * <li>UTF-8 decoder will still report invalid UTF-8 byte sequences (and same
61 * for other character encodings).
62 * </li>
63 * <li>XML Name character rules follow separate validation which will not be affected
64 * </li>
65 *</ul>
66 *<p>
67 * Turning this option off may improve parsing performance; leaving
68 * it on guarantees compatibility with XML 1.0 specification regarding character
69 * validity rules.
70 */
71 public final static String P_VALIDATE_TEXT_CHARS = "com.ctc.wstx.validateTextChars";
72
73
74 /**
75 * Allow all XML 1.1 characters escapes even if input document is described as XML 1.0
76 * (in addition to ones allowed in 1.l0).
77 *<p>
78 * Since this is non-standard option (that is, deviates from XML specification), it is
79 * disabled by default.
80 *
81 * @since 5.2
82 */
83 public final static String P_ALLOW_XML11_ESCAPED_CHARS_IN_XML10 = "com.ctc.wstx.allowXml11EscapedCharsInXml10";
84
85 // // // Caching:
86
87 /**
88 * Whether readers will try to cache parsed external DTD subsets or not.
89 */
90
91 public final static String P_CACHE_DTDS = "com.ctc.wstx.cacheDTDs";
92
93 /**
94 * Whether reader is to cache DTDs (when caching enabled) based on public id
95 * or not: if not, system id will be primarily used. Although theoretically
96 * public IDs should be unique, and should be good caching keys, sometimes
97 * broken documents use 'wrong' public IDs, and such by default caching keys
98 * are based on system id only.
99 */
100 public final static String P_CACHE_DTDS_BY_PUBLIC_ID = "com.ctc.wstx.cacheDTDsByPublicId";
101
102
103 // // // Enabling/disabling lazy/incomplete parsing
104
105 /**
106 * Whether stream readers are allowed to do lazy parsing, meaning
107 * to parse minimal part of the event when
108 * {@link javax.xml.stream.XMLStreamReader#next} is called, and only parse the rest
109 * as needed (or skip remainder of no extra information is needed).
110 * Alternative to lazy parsing is called "eager parsing", and is
111 * what most xml parsers use by default.
112 *<p>
113 * Enabling lazy parsing can improve performance for tasks where
114 * number of textual events are skipped. The downside is that
115 * not all well-formedness problems are reported when
116 * {@link javax.xml.stream.XMLStreamReader#next} is called, but only when the
117 * rest of event are read or skipped.
118 *<p>
119 * Default value for Woodstox is such that lazy parsing is
120 * enabled.
121 *
122 * @deprecated As of Woodstox 4.0 use
123 * {@link XMLInputFactory2#P_LAZY_PARSING} instead (from
124 * Stax2 extension API, v3.0)
125 */
126 @Deprecated
127 public final static String P_LAZY_PARSING = XMLInputFactory2.P_LAZY_PARSING;
128
129 // // // API behavior (for backwards compatibility)
130
131 /**
132 * This read-only property indicates whether null is returned for default name space prefix;
133 * Boolean.TRUE indicates it does, Boolean.FALSE that it does not.
134 *<p>
135 * Default value for 4.1 is 'false'; this will most likely change for 5.0 since
136 * Stax API actually specifies null to be used.
137 *
138 * @since 4.1.2
139 */
140 public final static String P_RETURN_NULL_FOR_DEFAULT_NAMESPACE = "com.ctc.wstx.returnNullForDefaultNamespace";
141
142 // // // Enabling/disabling support for dtd++
143
144 /**
145 * Whether the Reader will recognized DTD++ extensions when parsing
146 * DTD subsets.
147 *<p>
148 * Note: not implemented by Woodstox.
149 *
150 * @deprecated Never implement, let's phase this out (deprecated in 4.2)
151 */
152 @Deprecated
153 public final static String P_SUPPORT_DTDPP = "com.ctc.wstx.supportDTDPP";
154
155 /**
156 * Whether the Reader will treat character references as entities while parsing
157 * XML documents.
158 */
159 public static final String P_TREAT_CHAR_REFS_AS_ENTS = "com.ctc.wstx.treatCharRefsAsEnts";
160
161 // // // Enabling alternate mode for parsing XML fragments instead
162 // // // of full documents
163
164 // Automatic W3C Schema support?
165 /*
166 * Whether W3C Schema hint attributes are recognized within document,
167 * and used to locate Schema to use for validation.
168 */
169 //public final static String P_AUTOMATIC_W3C_SCHEMA = 0x00100000;
170
171 /*
172 ///////////////////////////////////////////////////////////////////////
173 // More complex settings
174 ///////////////////////////////////////////////////////////////////////
175 */
176
177 // // // Buffer sizes;
178
179 /**
180 * Size of input buffer (in chars), to use for reading XML content
181 * from input stream/reader.
182 */
183 public final static String P_INPUT_BUFFER_LENGTH = "com.ctc.wstx.inputBufferLength";
184
185 // // // Constraints on sizes of text segments parsed:
186
187
188 /**
189 * Property to specify shortest non-complete text segment (part of
190 * CDATA section or text content) that parser is allowed to return,
191 * if not required to coalesce text.
192 */
193 public final static String P_MIN_TEXT_SEGMENT = "com.ctc.wstx.minTextSegment";
194
195 // // // Other size constraints (4.2+)
196
197 /**
198 * Maximum number of attributes allowed for single XML element.
199 * @since 4.2
200 */
201 public final static String P_MAX_ATTRIBUTES_PER_ELEMENT = "com.ctc.wstx.maxAttributesPerElement";
202
203 /**
204 * Maximum length of of individual attribute values (in characters)
205 * @since 4.2
206 */
207 public final static String P_MAX_ATTRIBUTE_SIZE = "com.ctc.wstx.maxAttributeSize";
208
209 /**
210 * Maximum number of child elements for any given element.
211 * @since 4.2
212 */
213 public final static String P_MAX_CHILDREN_PER_ELEMENT = "com.ctc.wstx.maxChildrenPerElement";
214
215 /**
216 * Maximum number of all elements in a single document.
217 * @since 4.2
218 */
219 public final static String P_MAX_ELEMENT_COUNT = "com.ctc.wstx.maxElementCount";
220
221 /**
222 * Maximum level of nesting of XML elements, starting with root element.
223 * @since 4.2
224 */
225 public final static String P_MAX_ELEMENT_DEPTH = "com.ctc.wstx.maxElementDepth";
226
227 /**
228 * Maximum length of input document, in characters.
229 * @since 4.2
230 */
231 public final static String P_MAX_CHARACTERS = "com.ctc.wstx.maxCharacters";
232
233 /**
234 * Maximum length of individual text (cdata) segments in input, in characters.
235 * @since 4.2
236 */
237 public final static String P_MAX_TEXT_LENGTH = "com.ctc.wstx.maxTextLength";
238
239 // and more size constraints (4.3+)
240
241 /**
242 * Maximum number of total (general parsed) entity expansions within input.
243 *
244 * @since 4.3
245 */
246 public final static String P_MAX_ENTITY_COUNT = "com.ctc.wstx.maxEntityCount";
247
248 /**
249 * Maximum depth of nested (general parsed) entity expansions.
250 *
251 * @since 4.3
252 */
253 public final static String P_MAX_ENTITY_DEPTH = "com.ctc.wstx.maxEntityDepth";
254
255 // // // Entity handling
256
257 /**
258 * Property of type {@link java.util.Map}, that defines explicit set of
259 * internal (generic) entities that will define of override any entities
260 * defined in internal or external subsets; except for the 5 pre-defined
261 * entities (lt, gt, amp, apos, quot). Can be used to explicitly define
262 * entities that would normally come from a DTD.
263 */
264 public final static String P_CUSTOM_INTERNAL_ENTITIES = "com.ctc.wstx.customInternalEntities";
265
266 /**
267 * Property of type {@link XMLResolver}, that
268 * will allow overriding of default DTD and external parameter entity
269 * resolution.
270 */
271 public final static String P_DTD_RESOLVER = "com.ctc.wstx.dtdResolver";
272
273 /**
274 * Property of type {@link XMLResolver}, that
275 * will allow overriding of default external general entity
276 * resolution. Note that using this property overrides settings done
277 * using {@link javax.xml.stream.XMLInputFactory#RESOLVER} (and vice versa).
278 */
279 public final static String P_ENTITY_RESOLVER = "com.ctc.wstx.entityResolver";
280
281 /**
282 * Property of type {@link XMLResolver}, that
283 * will allow graceful handling of references to undeclared (general)
284 * entities.
285 */
286 public final static String P_UNDECLARED_ENTITY_RESOLVER = "com.ctc.wstx.undeclaredEntityResolver";
287
288 /**
289 * Property of type {@link java.net.URL}, that will allow specifying
290 * context URL to use when resolving relative references, for the
291 * main-level entities (external DTD subset, references from the internal
292 * DTD subset).
293 */
294 public final static String P_BASE_URL = "com.ctc.wstx.baseURL";
295
296 // // // Alternate parsing modes
297
298 /**
299 * Three-valued property (one of
300 * {@link #PARSING_MODE_DOCUMENT},
301 * {@link #PARSING_MODE_FRAGMENT} or
302 * {@link #PARSING_MODE_DOCUMENTS}; default being the document mode)
303 * that can be used to handle "non-standard" XML content. The default
304 * mode (<code>PARSING_MODE_DOCUMENT</code>) allows parsing of only
305 * well-formed XML documents, but the other two modes allow more lenient
306 * parsing. Fragment mode allows parsing of XML content that does not
307 * have a single root element (can have zero or more), nor can have
308 * XML or DOCTYPE declarations: this may be useful if parsing a subset
309 * of a full XML document. Multi-document
310 * (<code>PARSING_MODE_DOCUMENTS</code>) mode on the other hand allows
311 * parsing of a stream that contains multiple consequtive well-formed
312 * documents, with possibly multiple XML and DOCTYPE declarations.
313 *<p>
314 * The main difference from the API perspective is that in first two
315 * modes, START_DOCUMENT and END_DOCUMENT are used as usual (as the first
316 * and last events returned), whereas the multi-document mode can return
317 * multiple pairs of these events: although it is still true that the
318 * first event (one cursor points to when reader is instantiated or
319 * returned by the event reader), there may be intervening pairs that
320 * signal boundary between two adjacent enclosed documents.
321 */
322 public final static String P_INPUT_PARSING_MODE = "com.ctc.wstx.fragmentMode";
323
324 // // // DTD defaulting, overriding
325
326 /*
327 ///////////////////////////////////////////////////////////////////////
328 // Helper classes, values enumerations
329 ///////////////////////////////////////////////////////////////////////
330 */
331
332 public final static ParsingMode PARSING_MODE_DOCUMENT = new ParsingMode();
333 public final static ParsingMode PARSING_MODE_FRAGMENT = new ParsingMode();
334 public final static ParsingMode PARSING_MODE_DOCUMENTS = new ParsingMode();
335
336 /**
337 * Inner class used for creating type-safe enumerations (prior to JDK 1.5).
338 */
339 public final static class ParsingMode
340 {
341 ParsingMode() { }
342 }
343 }
344