1 package com.fasterxml.jackson.core.io;
2
3 import java.util.Arrays;
4
5 import com.fasterxml.jackson.core.util.ByteArrayBuilder;
6 import com.fasterxml.jackson.core.util.TextBuffer;
7
8 /**
9  * Helper class used for efficient encoding of JSON String values (including
10  * JSON field names) into Strings or UTF-8 byte arrays.
11  *<p>
12  * Note that methods in here are somewhat optimized, but not ridiculously so.
13  * Reason is that conversion method results are expected to be cached so that
14  * these methods will not be hot spots during normal operation.
15  */

16 public final class JsonStringEncoder
17 {
18     /*
19     /**********************************************************************
20     /* Constants
21     /**********************************************************************
22      */

23
24     private final static char[] HC = CharTypes.copyHexChars();
25
26     private final static byte[] HB = CharTypes.copyHexBytes();
27
28     private final static int SURR1_FIRST = 0xD800;
29     private final static int SURR1_LAST = 0xDBFF;
30     private final static int SURR2_FIRST = 0xDC00;
31     private final static int SURR2_LAST = 0xDFFF;
32
33     private final static int INITIAL_CHAR_BUFFER_SIZE = 120;
34     private final static int INITIAL_BYTE_BUFFER_SIZE = 200;
35
36     /*
37     /**********************************************************************
38     /* Construction, instance access
39     /**********************************************************************
40      */

41
42     // Since 2.10 we have stateless singleton and NO fancy ThreadLocal/SofRef caching!!!
43     private final static JsonStringEncoder instance = new JsonStringEncoder();
44     
45     public JsonStringEncoder() { }
46
47     /**
48      * Factory method for getting an instance; this is either recycled per-thread instance,
49      * or a newly constructed one.
50      */

51     public static JsonStringEncoder getInstance() {
52         return instance;
53     }
54
55     /*
56     /**********************************************************************
57     /* Public API
58     /**********************************************************************
59      */

60
61     /**
62      * Method that will quote text contents using JSON standard quoting,
63      * and return results as a character array
64      */

65     public char[] quoteAsString(String input)
66     {
67         char[] outputBuffer = new char[INITIAL_CHAR_BUFFER_SIZE];
68         final int[] escCodes = CharTypes.get7BitOutputEscapes();
69         final int escCodeCount = escCodes.length;
70         int inPtr = 0;
71         final int inputLen = input.length();
72         TextBuffer textBuffer = null;
73         int outPtr = 0;
74         char[] qbuf = null;
75
76         outer:
77         while (inPtr < inputLen) {
78             tight_loop:
79             while (true) {
80                 char c = input.charAt(inPtr);
81                 if (c < escCodeCount && escCodes[c] != 0) {
82                     break tight_loop;
83                 }
84                 if (outPtr >= outputBuffer.length) {
85                     if (textBuffer == null) {
86                         textBuffer = TextBuffer.fromInitial(outputBuffer);
87                     }
88                     outputBuffer = textBuffer.finishCurrentSegment();
89                     outPtr = 0;
90                 }
91                 outputBuffer[outPtr++] = c;
92                 if (++inPtr >= inputLen) {
93                     break outer;
94                 }
95             }
96             // something to escape; 2 or 6-char variant? 
97             if (qbuf == null) {
98                 qbuf = _qbuf();
99             }
100             char d = input.charAt(inPtr++);
101             int escCode = escCodes[d];
102             int length = (escCode < 0)
103                     ? _appendNumeric(d, qbuf)
104                     : _appendNamed(escCode, qbuf);
105                     ;
106             if ((outPtr + length) > outputBuffer.length) {
107                 int first = outputBuffer.length - outPtr;
108                 if (first > 0) {
109                     System.arraycopy(qbuf, 0, outputBuffer, outPtr, first);
110                 }
111                 if (textBuffer == null) {
112                     textBuffer = TextBuffer.fromInitial(outputBuffer);
113                 }
114                 outputBuffer = textBuffer.finishCurrentSegment();
115                 int second = length - first;
116                 System.arraycopy(qbuf, first, outputBuffer, 0, second);
117                 outPtr = second;
118             } else {
119                 System.arraycopy(qbuf, 0, outputBuffer, outPtr, length);
120                 outPtr += length;
121             }
122         }
123
124         if (textBuffer == null) {
125             return Arrays.copyOfRange(outputBuffer, 0, outPtr);
126         }
127         textBuffer.setCurrentLength(outPtr);
128         return textBuffer.contentsAsArray();
129     }
130
131     /**
132      * Overloaded variant of {@link #quoteAsString(String)}.
133      *
134      * @since 2.10
135      */

136     public char[] quoteAsString(CharSequence input)
137     {
138         // 15-Aug-2019, tatu: Optimize common case as JIT can't get rid of overhead otherwise
139         if (input instanceof String) {
140             return quoteAsString((String) input);
141         }
142
143         TextBuffer textBuffer = null;
144
145         char[] outputBuffer = new char[INITIAL_CHAR_BUFFER_SIZE];
146         final int[] escCodes = CharTypes.get7BitOutputEscapes();
147         final int escCodeCount = escCodes.length;
148         int inPtr = 0;
149         final int inputLen = input.length();
150         int outPtr = 0;
151         char[] qbuf = null;
152  
153         outer:
154         while (inPtr < inputLen) {
155             tight_loop:
156             while (true) {
157                 char c = input.charAt(inPtr);
158                 if (c < escCodeCount && escCodes[c] != 0) {
159                     break tight_loop;
160                 }
161                 if (outPtr >= outputBuffer.length) {
162                     if (textBuffer == null) {
163                         textBuffer = TextBuffer.fromInitial(outputBuffer);
164                     }
165                     outputBuffer = textBuffer.finishCurrentSegment();
166                     outPtr = 0;
167                 }
168                 outputBuffer[outPtr++] = c;
169                 if (++inPtr >= inputLen) {
170                     break outer;
171                 }
172             }
173             // something to escape; 2 or 6-char variant? 
174             if (qbuf == null) {
175                 qbuf = _qbuf();
176             }
177             char d = input.charAt(inPtr++);
178             int escCode = escCodes[d];
179             int length = (escCode < 0)
180                     ? _appendNumeric(d, qbuf)
181                     : _appendNamed(escCode, qbuf);
182                     ;
183             if ((outPtr + length) > outputBuffer.length) {
184                 int first = outputBuffer.length - outPtr;
185                 if (first > 0) {
186                     System.arraycopy(qbuf, 0, outputBuffer, outPtr, first);
187                 }
188                 if (textBuffer == null) {
189                     textBuffer = TextBuffer.fromInitial(outputBuffer);
190                 }
191                 outputBuffer = textBuffer.finishCurrentSegment();
192                 int second = length - first;
193                 System.arraycopy(qbuf, first, outputBuffer, 0, second);
194                 outPtr = second;
195             } else {
196                 System.arraycopy(qbuf, 0, outputBuffer, outPtr, length);
197                 outPtr += length;
198             }
199         }
200
201         if (textBuffer == null) {
202             return Arrays.copyOfRange(outputBuffer, 0, outPtr);
203         }
204         textBuffer.setCurrentLength(outPtr);
205         return textBuffer.contentsAsArray();
206     }
207
208     /**
209      * Method that will quote text contents using JSON standard quoting,
210      * and append results to a supplied {@link StringBuilder}.
211      * Use this variant if you have e.g. a {@link StringBuilder} and want to avoid superfluous copying of it.
212      *
213      * @since 2.8
214      */

215     public void quoteAsString(CharSequence input, StringBuilder output)
216     {
217         final int[] escCodes = CharTypes.get7BitOutputEscapes();
218         final int escCodeCount = escCodes.length;
219         int inPtr = 0;
220         final int inputLen = input.length();
221         char[] qbuf = null;
222
223         outer:
224         while (inPtr < inputLen) {
225             tight_loop:
226             while (true) {
227                 char c = input.charAt(inPtr);
228                 if (c < escCodeCount && escCodes[c] != 0) {
229                     break tight_loop;
230                 }
231                 output.append(c);
232                 if (++inPtr >= inputLen) {
233                     break outer;
234                 }
235             }
236             // something to escape; 2 or 6-char variant?
237             if (qbuf == null) {
238                 qbuf = _qbuf();
239             }
240             char d = input.charAt(inPtr++);
241             int escCode = escCodes[d];
242             int length = (escCode < 0)
243                     ? _appendNumeric(d, qbuf)
244                     : _appendNamed(escCode, qbuf);
245             output.append(qbuf, 0, length);
246         }
247     }
248
249     /**
250      * Will quote given JSON String value using standard quoting, encode
251      * results as UTF-8, and return result as a byte array.
252      */

253     @SuppressWarnings("resource")
254     public byte[] quoteAsUTF8(String text)
255     {
256         int inputPtr = 0;
257         int inputEnd = text.length();
258         int outputPtr = 0;
259         byte[] outputBuffer = new byte[INITIAL_BYTE_BUFFER_SIZE];
260         ByteArrayBuilder bb = null;
261         
262         main:
263         while (inputPtr < inputEnd) {
264             final int[] escCodes = CharTypes.get7BitOutputEscapes();
265
266             inner_loop: // ASCII and escapes
267             while (true) {
268                 int ch = text.charAt(inputPtr);
269                 if (ch > 0x7F || escCodes[ch] != 0) {
270                     break inner_loop;
271                 }
272                 if (outputPtr >= outputBuffer.length) {
273                     if (bb == null) {
274                         bb = ByteArrayBuilder.fromInitial(outputBuffer, outputPtr);
275                     }
276                     outputBuffer = bb.finishCurrentSegment();
277                     outputPtr = 0;
278                 }
279                 outputBuffer[outputPtr++] = (byte) ch;
280                 if (++inputPtr >= inputEnd) {
281                     break main;
282                 }
283             }
284             if (bb == null) {
285                 bb = ByteArrayBuilder.fromInitial(outputBuffer, outputPtr);
286             }
287             if (outputPtr >= outputBuffer.length) {
288                 outputBuffer = bb.finishCurrentSegment();
289                 outputPtr = 0;
290             }
291             // Ok, so what did we hit?
292             int ch = (int) text.charAt(inputPtr++);
293             if (ch <= 0x7F) { // needs quoting
294                 int escape = escCodes[ch];
295                 // ctrl-char, 6-byte escape...
296                 outputPtr = _appendByte(ch, escape, bb, outputPtr);
297                 outputBuffer = bb.getCurrentSegment();
298                 continue main;
299             }
300             if (ch <= 0x7FF) { // fine, just needs 2 byte output
301                 outputBuffer[outputPtr++] = (byte) (0xc0 | (ch >> 6));
302                 ch = (0x80 | (ch & 0x3f));
303             } else { // 3 or 4 bytes
304                 // Surrogates?
305                 if (ch < SURR1_FIRST || ch > SURR2_LAST) { // nope
306                     outputBuffer[outputPtr++] = (byte) (0xe0 | (ch >> 12));
307                     if (outputPtr >= outputBuffer.length) {
308                         outputBuffer = bb.finishCurrentSegment();
309                         outputPtr = 0;
310                     }
311                     outputBuffer[outputPtr++] = (byte) (0x80 | ((ch >> 6) & 0x3f));
312                     ch = (0x80 | (ch & 0x3f));
313                 } else { // yes, surrogate pair
314                     if (ch > SURR1_LAST) { // must be from first range
315                         _illegal(ch);
316                     }
317                     // and if so, followed by another from next range
318                     if (inputPtr >= inputEnd) {
319                         _illegal(ch);
320                     }
321                     ch = _convert(ch, text.charAt(inputPtr++));
322                     if (ch > 0x10FFFF) { // illegal, as per RFC 4627
323                         _illegal(ch);
324                     }
325                     outputBuffer[outputPtr++] = (byte) (0xf0 | (ch >> 18));
326                     if (outputPtr >= outputBuffer.length) {
327                         outputBuffer = bb.finishCurrentSegment();
328                         outputPtr = 0;
329                     }
330                     outputBuffer[outputPtr++] = (byte) (0x80 | ((ch >> 12) & 0x3f));
331                     if (outputPtr >= outputBuffer.length) {
332                         outputBuffer = bb.finishCurrentSegment();
333                         outputPtr = 0;
334                     }
335                     outputBuffer[outputPtr++] = (byte) (0x80 | ((ch >> 6) & 0x3f));
336                     ch = (0x80 | (ch & 0x3f));
337                 }
338             }
339             if (outputPtr >= outputBuffer.length) {
340                 outputBuffer = bb.finishCurrentSegment();
341                 outputPtr = 0;
342             }
343             outputBuffer[outputPtr++] = (byte) ch;
344         }
345         if (bb == null) {
346             return Arrays.copyOfRange(outputBuffer, 0, outputPtr);
347         }
348         return bb.completeAndCoalesce(outputPtr);
349     }
350
351     /**
352      * Will encode given String as UTF-8 (without any quoting), return
353      * resulting byte array.
354      */

355     @SuppressWarnings("resource")
356     public byte[] encodeAsUTF8(String text)
357     {
358         int inputPtr = 0;
359         int inputEnd = text.length();
360         int outputPtr = 0;
361         byte[] outputBuffer = new byte[INITIAL_BYTE_BUFFER_SIZE];
362         int outputEnd = outputBuffer.length;
363         ByteArrayBuilder bb = null;
364
365         main_loop:
366         while (inputPtr < inputEnd) {
367             int c = text.charAt(inputPtr++);
368
369             // first tight loop for ascii
370             while (c <= 0x7F) {
371                 if (outputPtr >= outputEnd) {
372                     if (bb == null) {
373                         bb = ByteArrayBuilder.fromInitial(outputBuffer, outputPtr);
374                     }
375                     outputBuffer = bb.finishCurrentSegment();
376                     outputEnd = outputBuffer.length;
377                     outputPtr = 0;
378                 }
379                 outputBuffer[outputPtr++] = (byte) c;
380                 if (inputPtr >= inputEnd) {
381                     break main_loop;
382                 }
383                 c = text.charAt(inputPtr++);
384             }
385
386             // then multi-byte...
387             if (bb == null) {
388                 bb = ByteArrayBuilder.fromInitial(outputBuffer, outputPtr);
389             }
390             if (outputPtr >= outputEnd) {
391                 outputBuffer = bb.finishCurrentSegment();
392                 outputEnd = outputBuffer.length;
393                 outputPtr = 0;
394             }
395             if (c < 0x800) { // 2-byte
396                 outputBuffer[outputPtr++] = (byte) (0xc0 | (c >> 6));
397             } else { // 3 or 4 bytes
398                 // Surrogates?
399                 if (c < SURR1_FIRST || c > SURR2_LAST) { // nope
400                     outputBuffer[outputPtr++] = (byte) (0xe0 | (c >> 12));
401                     if (outputPtr >= outputEnd) {
402                         outputBuffer = bb.finishCurrentSegment();
403                         outputEnd = outputBuffer.length;
404                         outputPtr = 0;
405                     }
406                     outputBuffer[outputPtr++] = (byte) (0x80 | ((c >> 6) & 0x3f));
407                 } else { // yes, surrogate pair
408                     if (c > SURR1_LAST) { // must be from first range
409                         _illegal(c);
410                     }
411                     // and if so, followed by another from next range
412                     if (inputPtr >= inputEnd) {
413                         _illegal(c);
414                     }
415                     c = _convert(c, text.charAt(inputPtr++));
416                     if (c > 0x10FFFF) { // illegal, as per RFC 4627
417                         _illegal(c);
418                     }
419                     outputBuffer[outputPtr++] = (byte) (0xf0 | (c >> 18));
420                     if (outputPtr >= outputEnd) {
421                         outputBuffer = bb.finishCurrentSegment();
422                         outputEnd = outputBuffer.length;
423                         outputPtr = 0;
424                     }
425                     outputBuffer[outputPtr++] = (byte) (0x80 | ((c >> 12) & 0x3f));
426                     if (outputPtr >= outputEnd) {
427                         outputBuffer = bb.finishCurrentSegment();
428                         outputEnd = outputBuffer.length;
429                         outputPtr = 0;
430                     }
431                     outputBuffer[outputPtr++] = (byte) (0x80 | ((c >> 6) & 0x3f));
432                 }
433             }
434             if (outputPtr >= outputEnd) {
435                 outputBuffer = bb.finishCurrentSegment();
436                 outputEnd = outputBuffer.length;
437                 outputPtr = 0;
438             }
439             outputBuffer[outputPtr++] = (byte) (0x80 | (c & 0x3f));
440         }
441         if (bb == null) {
442             return Arrays.copyOfRange(outputBuffer, 0, outputPtr);
443         }
444         return bb.completeAndCoalesce(outputPtr);
445     }
446
447     /**
448      * Overloaded variant of {@link #encodeAsUTF8(String)}.
449      *
450      * @since 2.11
451      */

452     @SuppressWarnings("resource")
453     public byte[] encodeAsUTF8(CharSequence text)
454     {
455         int inputPtr = 0;
456         int inputEnd = text.length();
457         int outputPtr = 0;
458         byte[] outputBuffer = new byte[INITIAL_BYTE_BUFFER_SIZE];
459         int outputEnd = outputBuffer.length;
460         ByteArrayBuilder bb = null;
461
462         main_loop:
463         while (inputPtr < inputEnd) {
464             int c = text.charAt(inputPtr++);
465
466             // first tight loop for ascii
467             while (c <= 0x7F) {
468                 if (outputPtr >= outputEnd) {
469                     if (bb == null) {
470                         bb = ByteArrayBuilder.fromInitial(outputBuffer, outputPtr);
471                     }
472                     outputBuffer = bb.finishCurrentSegment();
473                     outputEnd = outputBuffer.length;
474                     outputPtr = 0;
475                 }
476                 outputBuffer[outputPtr++] = (byte) c;
477                 if (inputPtr >= inputEnd) {
478                     break main_loop;
479                 }
480                 c = text.charAt(inputPtr++);
481             }
482
483             // then multi-byte...
484             if (bb == null) {
485                 bb = ByteArrayBuilder.fromInitial(outputBuffer, outputPtr);
486             }
487             if (outputPtr >= outputEnd) {
488                 outputBuffer = bb.finishCurrentSegment();
489                 outputEnd = outputBuffer.length;
490                 outputPtr = 0;
491             }
492             if (c < 0x800) { // 2-byte
493                 outputBuffer[outputPtr++] = (byte) (0xc0 | (c >> 6));
494             } else { // 3 or 4 bytes
495                 // Surrogates?
496                 if (c < SURR1_FIRST || c > SURR2_LAST) { // nope
497                     outputBuffer[outputPtr++] = (byte) (0xe0 | (c >> 12));
498                     if (outputPtr >= outputEnd) {
499                         outputBuffer = bb.finishCurrentSegment();
500                         outputEnd = outputBuffer.length;
501                         outputPtr = 0;
502                     }
503                     outputBuffer[outputPtr++] = (byte) (0x80 | ((c >> 6) & 0x3f));
504                 } else { // yes, surrogate pair
505                     if (c > SURR1_LAST) { // must be from first range
506                         _illegal(c);
507                     }
508                     // and if so, followed by another from next range
509                     if (inputPtr >= inputEnd) {
510                         _illegal(c);
511                     }
512                     c = _convert(c, text.charAt(inputPtr++));
513                     if (c > 0x10FFFF) { // illegal, as per RFC 4627
514                         _illegal(c);
515                     }
516                     outputBuffer[outputPtr++] = (byte) (0xf0 | (c >> 18));
517                     if (outputPtr >= outputEnd) {
518                         outputBuffer = bb.finishCurrentSegment();
519                         outputEnd = outputBuffer.length;
520                         outputPtr = 0;
521                     }
522                     outputBuffer[outputPtr++] = (byte) (0x80 | ((c >> 12) & 0x3f));
523                     if (outputPtr >= outputEnd) {
524                         outputBuffer = bb.finishCurrentSegment();
525                         outputEnd = outputBuffer.length;
526                         outputPtr = 0;
527                     }
528                     outputBuffer[outputPtr++] = (byte) (0x80 | ((c >> 6) & 0x3f));
529                 }
530             }
531             if (outputPtr >= outputEnd) {
532                 outputBuffer = bb.finishCurrentSegment();
533                 outputEnd = outputBuffer.length;
534                 outputPtr = 0;
535             }
536             outputBuffer[outputPtr++] = (byte) (0x80 | (c & 0x3f));
537         }
538         if (bb == null) {
539             return Arrays.copyOfRange(outputBuffer, 0, outputPtr);
540         }
541         return bb.completeAndCoalesce(outputPtr);
542     }
543
544     /*
545     /**********************************************************************
546     /* Internal methods
547     /**********************************************************************
548      */

549
550     private char[] _qbuf() {
551         char[] qbuf = new char[6];
552         qbuf[0] = '\\';
553         qbuf[2] = '0';
554         qbuf[3] = '0';
555         return qbuf;
556     }
557
558     private int _appendNumeric(int value, char[] qbuf) {
559         qbuf[1] = 'u';
560         // We know it's a control char, so only the last 2 chars are non-0
561         qbuf[4] = HC[value >> 4];
562         qbuf[5] = HC[value & 0xF];
563         return 6;
564     }
565
566     private int _appendNamed(int esc, char[] qbuf) {
567         qbuf[1] = (char) esc;
568         return 2;
569     }
570
571     private int _appendByte(int ch, int esc, ByteArrayBuilder bb, int ptr)
572     {
573         bb.setCurrentSegmentLength(ptr);
574         bb.append('\\');
575         if (esc < 0) { // standard escape
576             bb.append('u');
577             if (ch > 0xFF) {
578                 int hi = (ch >> 8);
579                 bb.append(HB[hi >> 4]);
580                 bb.append(HB[hi & 0xF]);
581                 ch &= 0xFF;
582             } else {
583                 bb.append('0');
584                 bb.append('0');
585             }
586             bb.append(HB[ch >> 4]);
587             bb.append(HB[ch & 0xF]);
588         } else { // 2-char simple escape
589             bb.append((byte) esc);
590         }
591         return bb.getCurrentSegmentLength();
592     }
593
594     private static int _convert(int p1, int p2) {
595         // Ok, then, is the second part valid?
596         if (p2 < SURR2_FIRST || p2 > SURR2_LAST) {
597             throw new IllegalArgumentException("Broken surrogate pair: first char 0x"+Integer.toHexString(p1)+", second 0x"+Integer.toHexString(p2)+"; illegal combination");
598         }
599         return 0x10000 + ((p1 - SURR1_FIRST) << 10) + (p2 - SURR2_FIRST);
600     }
601
602     private static void _illegal(int c) {
603         throw new IllegalArgumentException(UTF8Writer.illegalSurrogateDesc(c));
604     }
605 }
606