1 /*
2 * Copyright (C) 2010 Google Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 package com.squareup.moshi;
17
18 import java.io.Closeable;
19 import java.io.IOException;
20 import java.util.ArrayList;
21 import java.util.Arrays;
22 import java.util.List;
23 import java.util.Map;
24 import javax.annotation.CheckReturnValue;
25 import javax.annotation.Nullable;
26 import okio.Buffer;
27 import okio.BufferedSource;
28 import okio.ByteString;
29
30 /**
31 * Reads a JSON (<a href="http://www.ietf.org/rfc/rfc7159.txt">RFC 7159</a>)
32 * encoded value as a stream of tokens. This stream includes both literal
33 * values (strings, numbers, booleans, and nulls) as well as the begin and
34 * end delimiters of objects and arrays. The tokens are traversed in
35 * depth-first order, the same order that they appear in the JSON document.
36 * Within JSON objects, name/value pairs are represented by a single token.
37 *
38 * <h3>Parsing JSON</h3>
39 * To create a recursive descent parser for your own JSON streams, first create
40 * an entry point method that creates a {@code JsonReader}.
41 *
42 * <p>Next, create handler methods for each structure in your JSON text. You'll
43 * need a method for each object type and for each array type.
44 * <ul>
45 * <li>Within <strong>array handling</strong> methods, first call {@link
46 * #beginArray} to consume the array's opening bracket. Then create a
47 * while loop that accumulates values, terminating when {@link #hasNext}
48 * is false. Finally, read the array's closing bracket by calling {@link
49 * #endArray}.
50 * <li>Within <strong>object handling</strong> methods, first call {@link
51 * #beginObject} to consume the object's opening brace. Then create a
52 * while loop that assigns values to local variables based on their name.
53 * This loop should terminate when {@link #hasNext} is false. Finally,
54 * read the object's closing brace by calling {@link #endObject}.
55 * </ul>
56 * <p>When a nested object or array is encountered, delegate to the
57 * corresponding handler method.
58 *
59 * <p>When an unknown name is encountered, strict parsers should fail with an
60 * exception. Lenient parsers should call {@link #skipValue()} to recursively
61 * skip the value's nested tokens, which may otherwise conflict.
62 *
63 * <p>If a value may be null, you should first check using {@link #peek()}.
64 * Null literals can be consumed using either {@link #nextNull()} or {@link
65 * #skipValue()}.
66 *
67 * <h3>Example</h3>
68 * Suppose we'd like to parse a stream of messages such as the following: <pre> {@code
69 * [
70 * {
71 * "id": 912345678901,
72 * "text": "How do I read a JSON stream in Java?",
73 * "geo": null,
74 * "user": {
75 * "name": "json_newb",
76 * "followers_count": 41
77 * }
78 * },
79 * {
80 * "id": 912345678902,
81 * "text": "@json_newb just use JsonReader!",
82 * "geo": [50.454722, -104.606667],
83 * "user": {
84 * "name": "jesse",
85 * "followers_count": 2
86 * }
87 * }
88 * ]}</pre>
89 * This code implements the parser for the above structure: <pre> {@code
90 *
91 * public List<Message> readJsonStream(BufferedSource source) throws IOException {
92 * JsonReader reader = JsonReader.of(source);
93 * try {
94 * return readMessagesArray(reader);
95 * } finally {
96 * reader.close();
97 * }
98 * }
99 *
100 * public List<Message> readMessagesArray(JsonReader reader) throws IOException {
101 * List<Message> messages = new ArrayList<Message>();
102 *
103 * reader.beginArray();
104 * while (reader.hasNext()) {
105 * messages.add(readMessage(reader));
106 * }
107 * reader.endArray();
108 * return messages;
109 * }
110 *
111 * public Message readMessage(JsonReader reader) throws IOException {
112 * long id = -1;
113 * String text = null;
114 * User user = null;
115 * List<Double> geo = null;
116 *
117 * reader.beginObject();
118 * while (reader.hasNext()) {
119 * String name = reader.nextName();
120 * if (name.equals("id")) {
121 * id = reader.nextLong();
122 * } else if (name.equals("text")) {
123 * text = reader.nextString();
124 * } else if (name.equals("geo") && reader.peek() != Token.NULL) {
125 * geo = readDoublesArray(reader);
126 * } else if (name.equals("user")) {
127 * user = readUser(reader);
128 * } else {
129 * reader.skipValue();
130 * }
131 * }
132 * reader.endObject();
133 * return new Message(id, text, user, geo);
134 * }
135 *
136 * public List<Double> readDoublesArray(JsonReader reader) throws IOException {
137 * List<Double> doubles = new ArrayList<Double>();
138 *
139 * reader.beginArray();
140 * while (reader.hasNext()) {
141 * doubles.add(reader.nextDouble());
142 * }
143 * reader.endArray();
144 * return doubles;
145 * }
146 *
147 * public User readUser(JsonReader reader) throws IOException {
148 * String username = null;
149 * int followersCount = -1;
150 *
151 * reader.beginObject();
152 * while (reader.hasNext()) {
153 * String name = reader.nextName();
154 * if (name.equals("name")) {
155 * username = reader.nextString();
156 * } else if (name.equals("followers_count")) {
157 * followersCount = reader.nextInt();
158 * } else {
159 * reader.skipValue();
160 * }
161 * }
162 * reader.endObject();
163 * return new User(username, followersCount);
164 * }}</pre>
165 *
166 * <h3>Number Handling</h3>
167 * This reader permits numeric values to be read as strings and string values to
168 * be read as numbers. For example, both elements of the JSON array {@code
169 * [1, "1"]} may be read using either {@link #nextInt} or {@link #nextString}.
170 * This behavior is intended to prevent lossy numeric conversions: double is
171 * JavaScript's only numeric type and very large values like {@code
172 * 9007199254740993} cannot be represented exactly on that platform. To minimize
173 * precision loss, extremely large values should be written and read as strings
174 * in JSON.
175 *
176 * <p>Each {@code JsonReader} may be used to read a single JSON stream. Instances
177 * of this class are not thread safe.
178 */
179 public abstract class JsonReader implements Closeable {
180 // The nesting stack. Using a manual array rather than an ArrayList saves 20%. This stack will
181 // grow itself up to 256 levels of nesting including the top-level document. Deeper nesting is
182 // prone to trigger StackOverflowErrors.
183 int stackSize;
184 int[] scopes;
185 String[] pathNames;
186 int[] pathIndices;
187
188 /** True to accept non-spec compliant JSON. */
189 boolean lenient;
190
191 /** True to throw a {@link JsonDataException} on any attempt to call {@link #skipValue()}. */
192 boolean failOnUnknown;
193
194 /** Returns a new instance that reads UTF-8 encoded JSON from {@code source}. */
195 @CheckReturnValue public static JsonReader of(BufferedSource source) {
196 return new JsonUtf8Reader(source);
197 }
198
199 // Package-private to control subclasses.
200 JsonReader() {
201 scopes = new int[32];
202 pathNames = new String[32];
203 pathIndices = new int[32];
204 }
205
206 // Package-private to control subclasses.
207 JsonReader(JsonReader copyFrom) {
208 this.stackSize = copyFrom.stackSize;
209 this.scopes = copyFrom.scopes.clone();
210 this.pathNames = copyFrom.pathNames.clone();
211 this.pathIndices = copyFrom.pathIndices.clone();
212 this.lenient = copyFrom.lenient;
213 this.failOnUnknown = copyFrom.failOnUnknown;
214 }
215
216 final void pushScope(int newTop) {
217 if (stackSize == scopes.length) {
218 if (stackSize == 256) {
219 throw new JsonDataException("Nesting too deep at " + getPath());
220 }
221 scopes = Arrays.copyOf(scopes, scopes.length * 2);
222 pathNames = Arrays.copyOf(pathNames, pathNames.length * 2);
223 pathIndices = Arrays.copyOf(pathIndices, pathIndices.length * 2);
224 }
225 scopes[stackSize++] = newTop;
226 }
227
228 /**
229 * Throws a new IO exception with the given message and a context snippet
230 * with this reader's content.
231 */
232 final JsonEncodingException syntaxError(String message) throws JsonEncodingException {
233 throw new JsonEncodingException(message + " at path " + getPath());
234 }
235
236 final JsonDataException typeMismatch(@Nullable Object value, Object expected) {
237 if (value == null) {
238 return new JsonDataException(
239 "Expected " + expected + " but was null at path " + getPath());
240 } else {
241 return new JsonDataException("Expected " + expected + " but was " + value + ", a "
242 + value.getClass().getName() + ", at path " + getPath());
243 }
244 }
245
246 /**
247 * Configure this parser to be liberal in what it accepts. By default
248 * this parser is strict and only accepts JSON as specified by <a
249 * href="http://www.ietf.org/rfc/rfc7159.txt">RFC 7159</a>. Setting the
250 * parser to lenient causes it to ignore the following syntax errors:
251 *
252 * <ul>
253 * <li>Streams that include multiple top-level values. With strict parsing,
254 * each stream must contain exactly one top-level value.
255 * <li>Numbers may be {@linkplain Double#isNaN() NaNs} or {@link
256 * Double#isInfinite() infinities}.
257 * <li>End of line comments starting with {@code //} or {@code #} and
258 * ending with a newline character.
259 * <li>C-style comments starting with {@code /*} and ending with
260 * {@code *}{@code /}. Such comments may not be nested.
261 * <li>Names that are unquoted or {@code 'single quoted'}.
262 * <li>Strings that are unquoted or {@code 'single quoted'}.
263 * <li>Array elements separated by {@code ;} instead of {@code ,}.
264 * <li>Unnecessary array separators. These are interpreted as if null
265 * was the omitted value.
266 * <li>Names and values separated by {@code =} or {@code =>} instead of
267 * {@code :}.
268 * <li>Name/value pairs separated by {@code ;} instead of {@code ,}.
269 * </ul>
270 */
271 public final void setLenient(boolean lenient) {
272 this.lenient = lenient;
273 }
274
275 /**
276 * Returns true if this parser is liberal in what it accepts.
277 */
278 @CheckReturnValue public final boolean isLenient() {
279 return lenient;
280 }
281
282 /**
283 * Configure whether this parser throws a {@link JsonDataException} when {@link #skipValue} is
284 * called. By default this parser permits values to be skipped.
285 *
286 * <p>Forbid skipping to prevent unrecognized values from being silently ignored. This option is
287 * useful in development and debugging because it means a typo like "locatiom" will be detected
288 * early. It's potentially harmful in production because it complicates revising a JSON schema.
289 */
290 public final void setFailOnUnknown(boolean failOnUnknown) {
291 this.failOnUnknown = failOnUnknown;
292 }
293
294 /**
295 * Returns true if this parser forbids skipping names and values.
296 */
297 @CheckReturnValue public final boolean failOnUnknown() {
298 return failOnUnknown;
299 }
300
301 /**
302 * Consumes the next token from the JSON stream and asserts that it is the beginning of a new
303 * array.
304 */
305 public abstract void beginArray() throws IOException;
306
307 /**
308 * Consumes the next token from the JSON stream and asserts that it is the
309 * end of the current array.
310 */
311 public abstract void endArray() throws IOException;
312
313 /**
314 * Consumes the next token from the JSON stream and asserts that it is the beginning of a new
315 * object.
316 */
317 public abstract void beginObject() throws IOException;
318
319 /**
320 * Consumes the next token from the JSON stream and asserts that it is the end of the current
321 * object.
322 */
323 public abstract void endObject() throws IOException;
324
325 /**
326 * Returns true if the current array or object has another element.
327 */
328 @CheckReturnValue public abstract boolean hasNext() throws IOException;
329
330 /**
331 * Returns the type of the next token without consuming it.
332 */
333 @CheckReturnValue public abstract Token peek() throws IOException;
334
335 /**
336 * Returns the next token, a {@linkplain Token#NAME property name}, and consumes it.
337 *
338 * @throws JsonDataException if the next token in the stream is not a property name.
339 */
340 @CheckReturnValue public abstract String nextName() throws IOException;
341
342 /**
343 * If the next token is a {@linkplain Token#NAME property name} that's in {@code options}, this
344 * consumes it and returns its index. Otherwise this returns -1 and no name is consumed.
345 */
346 @CheckReturnValue public abstract int selectName(Options options) throws IOException;
347
348 /**
349 * Skips the next token, consuming it. This method is intended for use when the JSON token stream
350 * contains unrecognized or unhandled names.
351 *
352 * <p>This throws a {@link JsonDataException} if this parser has been configured to {@linkplain
353 * #failOnUnknown fail on unknown} names.
354 */
355 public abstract void skipName() throws IOException;
356
357 /**
358 * Returns the {@linkplain Token#STRING string} value of the next token, consuming it. If the next
359 * token is a number, this method will return its string form.
360 *
361 * @throws JsonDataException if the next token is not a string or if this reader is closed.
362 */
363 public abstract String nextString() throws IOException;
364
365 /**
366 * If the next token is a {@linkplain Token#STRING string} that's in {@code options}, this
367 * consumes it and returns its index. Otherwise this returns -1 and no string is consumed.
368 */
369 @CheckReturnValue public abstract int selectString(Options options) throws IOException;
370
371 /**
372 * Returns the {@linkplain Token#BOOLEAN boolean} value of the next token, consuming it.
373 *
374 * @throws JsonDataException if the next token is not a boolean or if this reader is closed.
375 */
376 public abstract boolean nextBoolean() throws IOException;
377
378 /**
379 * Consumes the next token from the JSON stream and asserts that it is a literal null. Returns
380 * null.
381 *
382 * @throws JsonDataException if the next token is not null or if this reader is closed.
383 */
384 public abstract @Nullable <T> T nextNull() throws IOException;
385
386 /**
387 * Returns the {@linkplain Token#NUMBER double} value of the next token, consuming it. If the next
388 * token is a string, this method will attempt to parse it as a double using {@link
389 * Double#parseDouble(String)}.
390 *
391 * @throws JsonDataException if the next token is not a literal value, or if the next literal
392 * value cannot be parsed as a double, or is non-finite.
393 */
394 public abstract double nextDouble() throws IOException;
395
396 /**
397 * Returns the {@linkplain Token#NUMBER long} value of the next token, consuming it. If the next
398 * token is a string, this method will attempt to parse it as a long. If the next token's numeric
399 * value cannot be exactly represented by a Java {@code long}, this method throws.
400 *
401 * @throws JsonDataException if the next token is not a literal value, if the next literal value
402 * cannot be parsed as a number, or exactly represented as a long.
403 */
404 public abstract long nextLong() throws IOException;
405
406 /**
407 * Returns the {@linkplain Token#NUMBER int} value of the next token, consuming it. If the next
408 * token is a string, this method will attempt to parse it as an int. If the next token's numeric
409 * value cannot be exactly represented by a Java {@code int}, this method throws.
410 *
411 * @throws JsonDataException if the next token is not a literal value, if the next literal value
412 * cannot be parsed as a number, or exactly represented as an int.
413 */
414 public abstract int nextInt() throws IOException;
415
416 /**
417 * Skips the next value recursively. If it is an object or array, all nested elements are skipped.
418 * This method is intended for use when the JSON token stream contains unrecognized or unhandled
419 * values.
420 *
421 * <p>This throws a {@link JsonDataException} if this parser has been configured to {@linkplain
422 * #failOnUnknown fail on unknown} values.
423 */
424 public abstract void skipValue() throws IOException;
425
426 /**
427 * Returns the value of the next token, consuming it. The result may be a string, number, boolean,
428 * null, map, or list, according to the JSON structure.
429 *
430 * @throws JsonDataException if the next token is not a literal value, if a JSON object has a
431 * duplicate key.
432 */
433 public final @Nullable Object readJsonValue() throws IOException {
434 switch (peek()) {
435 case BEGIN_ARRAY:
436 List<Object> list = new ArrayList<>();
437 beginArray();
438 while (hasNext()) {
439 list.add(readJsonValue());
440 }
441 endArray();
442 return list;
443
444 case BEGIN_OBJECT:
445 Map<String, Object> map = new LinkedHashTreeMap<>();
446 beginObject();
447 while (hasNext()) {
448 String name = nextName();
449 Object value = readJsonValue();
450 Object replaced = map.put(name, value);
451 if (replaced != null) {
452 throw new JsonDataException("Map key '" + name + "' has multiple values at path "
453 + getPath() + ": " + replaced + " and " + value);
454 }
455 }
456 endObject();
457 return map;
458
459 case STRING:
460 return nextString();
461
462 case NUMBER:
463 return nextDouble();
464
465 case BOOLEAN:
466 return nextBoolean();
467
468 case NULL:
469 return nextNull();
470
471 default:
472 throw new IllegalStateException(
473 "Expected a value but was " + peek() + " at path " + getPath());
474 }
475 }
476
477 /**
478 * Returns a new {@code JsonReader} that can read data from this {@code JsonReader} without
479 * consuming it. The returned reader becomes invalid once this one is next read or closed.
480 *
481 * For example, we can use {@code peek()} to lookahead and read the same data multiple times.
482 *
483 * <pre> {@code
484 *
485 * Buffer buffer = new Buffer();
486 * buffer.writeUtf8("[123, 456, 789]")
487 *
488 * JsonReader jsonReader = JsonReader.of(buffer);
489 * jsonReader.beginArray();
490 * jsonReader.nextInt(); // Returns 123, reader contains 456, 789 and ].
491 *
492 * JsonReader peek = reader.peekReader();
493 * peek.nextInt() // Returns 456.
494 * peek.nextInt() // Returns 789.
495 * peek.endArray()
496 *
497 * jsonReader.nextInt() // Returns 456, reader contains 789 and ].
498 * }</pre>
499 */
500 @CheckReturnValue public abstract JsonReader peekJson();
501
502 /**
503 * Returns a <a href="http://goessner.net/articles/JsonPath/">JsonPath</a> to
504 * the current location in the JSON value.
505 */
506 @CheckReturnValue public final String getPath() {
507 return JsonScope.getPath(stackSize, scopes, pathNames, pathIndices);
508 }
509
510 /**
511 * Changes the reader to treat the next name as a string value. This is useful for map adapters so
512 * that arbitrary type adapters can use {@link #nextString} to read a name value.
513 */
514 abstract void promoteNameToValue() throws IOException;
515
516 /**
517 * A set of strings to be chosen with {@link #selectName} or {@link #selectString}. This prepares
518 * the encoded values of the strings so they can be read directly from the input source.
519 */
520 public static final class Options {
521 final String[] strings;
522 final okio.Options doubleQuoteSuffix;
523
524 private Options(String[] strings, okio.Options doubleQuoteSuffix) {
525 this.strings = strings;
526 this.doubleQuoteSuffix = doubleQuoteSuffix;
527 }
528
529 @CheckReturnValue public static Options of(String... strings) {
530 try {
531 ByteString[] result = new ByteString[strings.length];
532 Buffer buffer = new Buffer();
533 for (int i = 0; i < strings.length; i++) {
534 JsonUtf8Writer.string(buffer, strings[i]);
535 buffer.readByte(); // Skip the leading double quote (but leave the trailing one).
536 result[i] = buffer.readByteString();
537 }
538 return new Options(strings.clone(), okio.Options.of(result));
539 } catch (IOException e) {
540 throw new AssertionError(e);
541 }
542 }
543 }
544
545 /**
546 * A structure, name, or value type in a JSON-encoded string.
547 */
548 public enum Token {
549
550 /**
551 * The opening of a JSON array. Written using {@link JsonWriter#beginArray}
552 * and read using {@link JsonReader#beginArray}.
553 */
554 BEGIN_ARRAY,
555
556 /**
557 * The closing of a JSON array. Written using {@link JsonWriter#endArray}
558 * and read using {@link JsonReader#endArray}.
559 */
560 END_ARRAY,
561
562 /**
563 * The opening of a JSON object. Written using {@link JsonWriter#beginObject}
564 * and read using {@link JsonReader#beginObject}.
565 */
566 BEGIN_OBJECT,
567
568 /**
569 * The closing of a JSON object. Written using {@link JsonWriter#endObject}
570 * and read using {@link JsonReader#endObject}.
571 */
572 END_OBJECT,
573
574 /**
575 * A JSON property name. Within objects, tokens alternate between names and
576 * their values. Written using {@link JsonWriter#name} and read using {@link
577 * JsonReader#nextName}
578 */
579 NAME,
580
581 /**
582 * A JSON string.
583 */
584 STRING,
585
586 /**
587 * A JSON number represented in this API by a Java {@code double}, {@code
588 * long}, or {@code int}.
589 */
590 NUMBER,
591
592 /**
593 * A JSON {@code true} or {@code false}.
594 */
595 BOOLEAN,
596
597 /**
598 * A JSON {@code null}.
599 */
600 NULL,
601
602 /**
603 * The end of the JSON stream. This sentinel value is returned by {@link
604 * JsonReader#peek()} to signal that the JSON-encoded value has no more
605 * tokens.
606 */
607 END_DOCUMENT
608 }
609 }
610