1 /* Jackson JSON-processor.
2 *
3 * Copyright (c) 2007- Tatu Saloranta, tatu.saloranta@iki.fi
4 */
5 package com.fasterxml.jackson.core;
6
7 import java.util.Arrays;
8
9 import com.fasterxml.jackson.core.util.ByteArrayBuilder;
10
11 /**
12 * Class used to define specific details of which
13 * variant of Base64 encoding/decoding is to be used. Although there is
14 * somewhat standard basic version (so-called "MIME Base64"), other variants
15 * exists, see <a href="http://en.wikipedia.org/wiki/Base64">Base64 Wikipedia entry</a> for details.
16 *
17 * @author Tatu Saloranta
18 */
19 public final class Base64Variant
20 implements java.io.Serializable
21 {
22 private final static int INT_SPACE = 0x20;
23
24 // We'll only serialize name
25 private static final long serialVersionUID = 1L;
26
27 /**
28 * Placeholder used by "no padding" variant, to be used when a character
29 * value is needed.
30 */
31 final static char PADDING_CHAR_NONE = '\0';
32
33 /**
34 * Marker used to denote ascii characters that do not correspond
35 * to a 6-bit value (in this variant), and is not used as a padding
36 * character.
37 */
38 public final static int BASE64_VALUE_INVALID = -1;
39
40 /**
41 * Marker used to denote ascii character (in decoding table) that
42 * is the padding character using this variant (if any).
43 */
44 public final static int BASE64_VALUE_PADDING = -2;
45
46 /*
47 /**********************************************************
48 /* Encoding/decoding tables
49 /**********************************************************
50 */
51
52 /**
53 * Decoding table used for base 64 decoding.
54 */
55 private final transient int[] _asciiToBase64 = new int[128];
56
57 /**
58 * Encoding table used for base 64 decoding when output is done
59 * as characters.
60 */
61 private final transient char[] _base64ToAsciiC = new char[64];
62
63 /**
64 * Alternative encoding table used for base 64 decoding when output is done
65 * as ascii bytes.
66 */
67 private final transient byte[] _base64ToAsciiB = new byte[64];
68
69 /*
70 /**********************************************************
71 /* Other configuration
72 /**********************************************************
73 */
74
75 /**
76 * Symbolic name of variant; used for diagnostics/debugging.
77 *<p>
78 * Note that this is the only non-transient field; used when reading
79 * back from serialized state.
80 *<p>
81 * Also: must not be private, accessed from `BaseVariants`
82 */
83 final String _name;
84
85 /**
86 * Whether this variant uses padding or not.
87 */
88 private final transient boolean _usesPadding;
89
90 /**
91 * Character used for padding, if any ({@link #PADDING_CHAR_NONE} if not).
92 */
93 private final transient char _paddingChar;
94
95 /**
96 * Maximum number of encoded base64 characters to output during encoding
97 * before adding a linefeed, if line length is to be limited
98 * ({@link java.lang.Integer#MAX_VALUE} if not limited).
99 *<p>
100 * Note: for some output modes (when writing attributes) linefeeds may
101 * need to be avoided, and this value ignored.
102 */
103 private final transient int _maxLineLength;
104
105 /*
106 /**********************************************************
107 /* Life-cycle
108 /**********************************************************
109 */
110
111 public Base64Variant(String name, String base64Alphabet, boolean usesPadding, char paddingChar, int maxLineLength)
112 {
113 _name = name;
114 _usesPadding = usesPadding;
115 _paddingChar = paddingChar;
116 _maxLineLength = maxLineLength;
117
118 // Ok and then we need to create codec tables.
119
120 // First the main encoding table:
121 int alphaLen = base64Alphabet.length();
122 if (alphaLen != 64) {
123 throw new IllegalArgumentException("Base64Alphabet length must be exactly 64 (was "+alphaLen+")");
124 }
125
126 // And then secondary encoding table and decoding table:
127 base64Alphabet.getChars(0, alphaLen, _base64ToAsciiC, 0);
128 Arrays.fill(_asciiToBase64, BASE64_VALUE_INVALID);
129 for (int i = 0; i < alphaLen; ++i) {
130 char alpha = _base64ToAsciiC[i];
131 _base64ToAsciiB[i] = (byte) alpha;
132 _asciiToBase64[alpha] = i;
133 }
134
135 // Plus if we use padding, add that in too
136 if (usesPadding) {
137 _asciiToBase64[(int) paddingChar] = BASE64_VALUE_PADDING;
138 }
139 }
140
141 /**
142 * "Copy constructor" that can be used when the base alphabet is identical
143 * to one used by another variant except for the maximum line length
144 * (and obviously, name).
145 */
146 public Base64Variant(Base64Variant base, String name, int maxLineLength)
147 {
148 this(base, name, base._usesPadding, base._paddingChar, maxLineLength);
149 }
150
151 /**
152 * "Copy constructor" that can be used when the base alphabet is identical
153 * to one used by another variant, but other details (padding, maximum
154 * line length) differ
155 */
156 public Base64Variant(Base64Variant base, String name, boolean usesPadding, char paddingChar, int maxLineLength)
157 {
158 _name = name;
159 byte[] srcB = base._base64ToAsciiB;
160 System.arraycopy(srcB, 0, this._base64ToAsciiB, 0, srcB.length);
161 char[] srcC = base._base64ToAsciiC;
162 System.arraycopy(srcC, 0, this._base64ToAsciiC, 0, srcC.length);
163 int[] srcV = base._asciiToBase64;
164 System.arraycopy(srcV, 0, this._asciiToBase64, 0, srcV.length);
165
166 _usesPadding = usesPadding;
167 _paddingChar = paddingChar;
168 _maxLineLength = maxLineLength;
169 }
170
171 /*
172 /**********************************************************
173 /* Serializable overrides
174 /**********************************************************
175 */
176
177 /**
178 * Method used to "demote" deserialized instances back to
179 * canonical ones
180 */
181 protected Object readResolve() {
182 return Base64Variants.valueOf(_name);
183 }
184
185 /*
186 /**********************************************************
187 /* Public accessors
188 /**********************************************************
189 */
190
191 public String getName() { return _name; }
192
193 public boolean usesPadding() { return _usesPadding; }
194 public boolean usesPaddingChar(char c) { return c == _paddingChar; }
195 public boolean usesPaddingChar(int ch) { return ch == (int) _paddingChar; }
196 public char getPaddingChar() { return _paddingChar; }
197 public byte getPaddingByte() { return (byte)_paddingChar; }
198
199 public int getMaxLineLength() { return _maxLineLength; }
200
201 /*
202 /**********************************************************
203 /* Decoding support
204 /**********************************************************
205 */
206
207 /**
208 * @return 6-bit decoded value, if valid character;
209 */
210 public int decodeBase64Char(char c)
211 {
212 int ch = (int) c;
213 return (ch <= 127) ? _asciiToBase64[ch] : BASE64_VALUE_INVALID;
214 }
215
216 public int decodeBase64Char(int ch)
217 {
218 return (ch <= 127) ? _asciiToBase64[ch] : BASE64_VALUE_INVALID;
219 }
220
221 public int decodeBase64Byte(byte b)
222 {
223 int ch = (int) b;
224 // note: cast retains sign, so it's from -128 to +127
225 if (ch < 0) {
226 return BASE64_VALUE_INVALID;
227 }
228 return _asciiToBase64[ch];
229 }
230
231 /*
232 /**********************************************************
233 /* Encoding support
234 /**********************************************************
235 */
236
237 public char encodeBase64BitsAsChar(int value)
238 {
239 /* Let's assume caller has done necessary checks; this
240 * method must be fast and inlinable
241 */
242 return _base64ToAsciiC[value];
243 }
244
245 /**
246 * Method that encodes given right-aligned (LSB) 24-bit value
247 * into 4 base64 characters, stored in given result buffer.
248 */
249 public int encodeBase64Chunk(int b24, char[] buffer, int ptr)
250 {
251 buffer[ptr++] = _base64ToAsciiC[(b24 >> 18) & 0x3F];
252 buffer[ptr++] = _base64ToAsciiC[(b24 >> 12) & 0x3F];
253 buffer[ptr++] = _base64ToAsciiC[(b24 >> 6) & 0x3F];
254 buffer[ptr++] = _base64ToAsciiC[b24 & 0x3F];
255 return ptr;
256 }
257
258 public void encodeBase64Chunk(StringBuilder sb, int b24)
259 {
260 sb.append(_base64ToAsciiC[(b24 >> 18) & 0x3F]);
261 sb.append(_base64ToAsciiC[(b24 >> 12) & 0x3F]);
262 sb.append(_base64ToAsciiC[(b24 >> 6) & 0x3F]);
263 sb.append(_base64ToAsciiC[b24 & 0x3F]);
264 }
265
266 /**
267 * Method that outputs partial chunk (which only encodes one
268 * or two bytes of data). Data given is still aligned same as if
269 * it as full data; that is, missing data is at the "right end"
270 * (LSB) of int.
271 *
272 * @param outputBytes Number of encoded bytes included (either 1 or 2)
273 */
274 public int encodeBase64Partial(int bits, int outputBytes, char[] buffer, int outPtr)
275 {
276 buffer[outPtr++] = _base64ToAsciiC[(bits >> 18) & 0x3F];
277 buffer[outPtr++] = _base64ToAsciiC[(bits >> 12) & 0x3F];
278 if (_usesPadding) {
279 buffer[outPtr++] = (outputBytes == 2) ?
280 _base64ToAsciiC[(bits >> 6) & 0x3F] : _paddingChar;
281 buffer[outPtr++] = _paddingChar;
282 } else {
283 if (outputBytes == 2) {
284 buffer[outPtr++] = _base64ToAsciiC[(bits >> 6) & 0x3F];
285 }
286 }
287 return outPtr;
288 }
289
290 public void encodeBase64Partial(StringBuilder sb, int bits, int outputBytes)
291 {
292 sb.append(_base64ToAsciiC[(bits >> 18) & 0x3F]);
293 sb.append(_base64ToAsciiC[(bits >> 12) & 0x3F]);
294 if (_usesPadding) {
295 sb.append((outputBytes == 2) ?
296 _base64ToAsciiC[(bits >> 6) & 0x3F] : _paddingChar);
297 sb.append(_paddingChar);
298 } else {
299 if (outputBytes == 2) {
300 sb.append(_base64ToAsciiC[(bits >> 6) & 0x3F]);
301 }
302 }
303 }
304
305 public byte encodeBase64BitsAsByte(int value)
306 {
307 // As with above, assuming it is 6-bit value
308 return _base64ToAsciiB[value];
309 }
310
311 /**
312 * Method that encodes given right-aligned (LSB) 24-bit value
313 * into 4 base64 bytes (ascii), stored in given result buffer.
314 */
315 public int encodeBase64Chunk(int b24, byte[] buffer, int ptr)
316 {
317 buffer[ptr++] = _base64ToAsciiB[(b24 >> 18) & 0x3F];
318 buffer[ptr++] = _base64ToAsciiB[(b24 >> 12) & 0x3F];
319 buffer[ptr++] = _base64ToAsciiB[(b24 >> 6) & 0x3F];
320 buffer[ptr++] = _base64ToAsciiB[b24 & 0x3F];
321 return ptr;
322 }
323
324 /**
325 * Method that outputs partial chunk (which only encodes one
326 * or two bytes of data). Data given is still aligned same as if
327 * it as full data; that is, missing data is at the "right end"
328 * (LSB) of int.
329 *
330 * @param outputBytes Number of encoded bytes included (either 1 or 2)
331 */
332 public int encodeBase64Partial(int bits, int outputBytes, byte[] buffer, int outPtr)
333 {
334 buffer[outPtr++] = _base64ToAsciiB[(bits >> 18) & 0x3F];
335 buffer[outPtr++] = _base64ToAsciiB[(bits >> 12) & 0x3F];
336 if (_usesPadding) {
337 byte pb = (byte) _paddingChar;
338 buffer[outPtr++] = (outputBytes == 2) ?
339 _base64ToAsciiB[(bits >> 6) & 0x3F] : pb;
340 buffer[outPtr++] = pb;
341 } else {
342 if (outputBytes == 2) {
343 buffer[outPtr++] = _base64ToAsciiB[(bits >> 6) & 0x3F];
344 }
345 }
346 return outPtr;
347 }
348
349 /*
350 /**********************************************************
351 /* Convenience conversion methods for String to/from bytes
352 /* use case.
353 /**********************************************************
354 */
355
356 /**
357 * Convenience method for converting given byte array as base64 encoded
358 * String using this variant's settings.
359 * Resulting value is "raw", that is, not enclosed in double-quotes.
360 *
361 * @param input Byte array to encode
362 */
363 public String encode(byte[] input)
364 {
365 return encode(input, false);
366 }
367
368 /**
369 * Convenience method for converting given byte array as base64 encoded String
370 * using this variant's settings, optionally enclosed in double-quotes.
371 * Linefeeds added, if needed, are expressed as 2-character JSON (and Java source)
372 * escape sequence of backslash + `n`.
373 *
374 * @param input Byte array to encode
375 * @param addQuotes Whether to surround resulting value in double quotes or not
376 */
377 public String encode(byte[] input, boolean addQuotes)
378 {
379 final int inputEnd = input.length;
380 final StringBuilder sb = new StringBuilder(inputEnd + (inputEnd >> 2) + (inputEnd >> 3));
381 if (addQuotes) {
382 sb.append('"');
383 }
384
385 int chunksBeforeLF = getMaxLineLength() >> 2;
386
387 // Ok, first we loop through all full triplets of data:
388 int inputPtr = 0;
389 int safeInputEnd = inputEnd-3; // to get only full triplets
390
391 while (inputPtr <= safeInputEnd) {
392 // First, mash 3 bytes into lsb of 32-bit int
393 int b24 = ((int) input[inputPtr++]) << 8;
394 b24 |= ((int) input[inputPtr++]) & 0xFF;
395 b24 = (b24 << 8) | (((int) input[inputPtr++]) & 0xFF);
396 encodeBase64Chunk(sb, b24);
397 if (--chunksBeforeLF <= 0) {
398 // note: must quote in JSON value, so not really useful...
399 sb.append('\\');
400 sb.append('n');
401 chunksBeforeLF = getMaxLineLength() >> 2;
402 }
403 }
404
405 // And then we may have 1 or 2 leftover bytes to encode
406 int inputLeft = inputEnd - inputPtr; // 0, 1 or 2
407 if (inputLeft > 0) { // yes, but do we have room for output?
408 int b24 = ((int) input[inputPtr++]) << 16;
409 if (inputLeft == 2) {
410 b24 |= (((int) input[inputPtr++]) & 0xFF) << 8;
411 }
412 encodeBase64Partial(sb, b24, inputLeft);
413 }
414
415 if (addQuotes) {
416 sb.append('"');
417 }
418 return sb.toString();
419 }
420
421 /**
422 * Convenience method for converting given byte array as base64 encoded String
423 * using this variant's settings, optionally enclosed in double-quotes.
424 * Linefeed character to use is passed explicitly.
425 *
426 * @param input Byte array to encode
427 * @param addQuotes Whether to surround resulting value in double quotes or not
428 *
429 * @since 2.10
430 */
431 public String encode(byte[] input, boolean addQuotes, String linefeed)
432 {
433 final int inputEnd = input.length;
434 final StringBuilder sb = new StringBuilder(inputEnd + (inputEnd >> 2) + (inputEnd >> 3));
435 if (addQuotes) {
436 sb.append('"');
437 }
438
439 int chunksBeforeLF = getMaxLineLength() >> 2;
440
441 int inputPtr = 0;
442 int safeInputEnd = inputEnd-3;
443
444 while (inputPtr <= safeInputEnd) {
445 int b24 = ((int) input[inputPtr++]) << 8;
446 b24 |= ((int) input[inputPtr++]) & 0xFF;
447 b24 = (b24 << 8) | (((int) input[inputPtr++]) & 0xFF);
448 encodeBase64Chunk(sb, b24);
449 if (--chunksBeforeLF <= 0) {
450 sb.append(linefeed);
451 chunksBeforeLF = getMaxLineLength() >> 2;
452 }
453 }
454 int inputLeft = inputEnd - inputPtr;
455 if (inputLeft > 0) {
456 int b24 = ((int) input[inputPtr++]) << 16;
457 if (inputLeft == 2) {
458 b24 |= (((int) input[inputPtr++]) & 0xFF) << 8;
459 }
460 encodeBase64Partial(sb, b24, inputLeft);
461 }
462
463 if (addQuotes) {
464 sb.append('"');
465 }
466 return sb.toString();
467 }
468
469 /**
470 * Convenience method for decoding contents of a Base64-encoded String,
471 * using this variant's settings.
472 *
473 * @param input
474 *
475 * @since 2.3
476 *
477 * @throws IllegalArgumentException if input is not valid base64 encoded data
478 */
479 @SuppressWarnings("resource")
480 public byte[] decode(String input) throws IllegalArgumentException
481 {
482 ByteArrayBuilder b = new ByteArrayBuilder();
483 decode(input, b);
484 return b.toByteArray();
485 }
486
487 /**
488 * Convenience method for decoding contents of a Base64-encoded String,
489 * using this variant's settings
490 * and appending decoded binary data using provided {@link ByteArrayBuilder}.
491 *<p>
492 * NOTE: builder will NOT be reset before decoding (nor cleared afterwards);
493 * assumption is that caller will ensure it is given in proper state, and
494 * used as appropriate afterwards.
495 *
496 * @since 2.3
497 *
498 * @throws IllegalArgumentException if input is not valid base64 encoded data
499 */
500 public void decode(String str, ByteArrayBuilder builder) throws IllegalArgumentException
501 {
502 int ptr = 0;
503 int len = str.length();
504
505 main_loop:
506 while (true) {
507 // first, we'll skip preceding white space, if any
508 char ch;
509 do {
510 if (ptr >= len) {
511 break main_loop;
512 }
513 ch = str.charAt(ptr++);
514 } while (ch <= INT_SPACE);
515 int bits = decodeBase64Char(ch);
516 if (bits < 0) {
517 _reportInvalidBase64(ch, 0, null);
518 }
519 int decodedData = bits;
520 // then second base64 char; can't get padding yet, nor ws
521 if (ptr >= len) {
522 _reportBase64EOF();
523 }
524 ch = str.charAt(ptr++);
525 bits = decodeBase64Char(ch);
526 if (bits < 0) {
527 _reportInvalidBase64(ch, 1, null);
528 }
529 decodedData = (decodedData << 6) | bits;
530 // third base64 char; can be padding, but not ws
531 if (ptr >= len) {
532 // but as per [JACKSON-631] can be end-of-input, iff not using padding
533 if (!usesPadding()) {
534 decodedData >>= 4;
535 builder.append(decodedData);
536 break;
537 }
538 _reportBase64EOF();
539 }
540 ch = str.charAt(ptr++);
541 bits = decodeBase64Char(ch);
542
543 // First branch: can get padding (-> 1 byte)
544 if (bits < 0) {
545 if (bits != Base64Variant.BASE64_VALUE_PADDING) {
546 _reportInvalidBase64(ch, 2, null);
547 }
548 // Ok, must get padding
549 if (ptr >= len) {
550 _reportBase64EOF();
551 }
552 ch = str.charAt(ptr++);
553 if (!usesPaddingChar(ch)) {
554 _reportInvalidBase64(ch, 3, "expected padding character '"+getPaddingChar()+"'");
555 }
556 // Got 12 bits, only need 8, need to shift
557 decodedData >>= 4;
558 builder.append(decodedData);
559 continue;
560 }
561 // Nope, 2 or 3 bytes
562 decodedData = (decodedData << 6) | bits;
563 // fourth and last base64 char; can be padding, but not ws
564 if (ptr >= len) {
565 // but as per [JACKSON-631] can be end-of-input, iff not using padding
566 if (!usesPadding()) {
567 decodedData >>= 2;
568 builder.appendTwoBytes(decodedData);
569 break;
570 }
571 _reportBase64EOF();
572 }
573 ch = str.charAt(ptr++);
574 bits = decodeBase64Char(ch);
575 if (bits < 0) {
576 if (bits != Base64Variant.BASE64_VALUE_PADDING) {
577 _reportInvalidBase64(ch, 3, null);
578 }
579 decodedData >>= 2;
580 builder.appendTwoBytes(decodedData);
581 } else {
582 // otherwise, our triple is now complete
583 decodedData = (decodedData << 6) | bits;
584 builder.appendThreeBytes(decodedData);
585 }
586 }
587 }
588
589 /*
590 /**********************************************************
591 /* Overridden standard methods
592 /**********************************************************
593 */
594
595 @Override
596 public String toString() { return _name; }
597
598 @Override
599 public boolean equals(Object o) {
600 // identity comparison should be dine
601 return (o == this);
602 }
603
604 @Override
605 public int hashCode() {
606 return _name.hashCode();
607 }
608
609 /*
610 /**********************************************************
611 /* Internal helper methods
612 /**********************************************************
613 */
614
615 /**
616 * @param bindex Relative index within base64 character unit; between 0
617 * and 3 (as unit has exactly 4 characters)
618 */
619 protected void _reportInvalidBase64(char ch, int bindex, String msg)
620 throws IllegalArgumentException
621 {
622 String base;
623 if (ch <= INT_SPACE) {
624 base = "Illegal white space character (code 0x"+Integer.toHexString(ch)+") as character #"+(bindex+1)+" of 4-char base64 unit: can only used between units";
625 } else if (usesPaddingChar(ch)) {
626 base = "Unexpected padding character ('"+getPaddingChar()+"') as character #"+(bindex+1)+" of 4-char base64 unit: padding only legal as 3rd or 4th character";
627 } else if (!Character.isDefined(ch) || Character.isISOControl(ch)) {
628 // Not sure if we can really get here... ? (most illegal xml chars are caught at lower level)
629 base = "Illegal character (code 0x"+Integer.toHexString(ch)+") in base64 content";
630 } else {
631 base = "Illegal character '"+ch+"' (code 0x"+Integer.toHexString(ch)+") in base64 content";
632 }
633 if (msg != null) {
634 base = base + ": " + msg;
635 }
636 throw new IllegalArgumentException(base);
637 }
638
639 protected void _reportBase64EOF() throws IllegalArgumentException {
640 throw new IllegalArgumentException(missingPaddingMessage());
641 }
642
643 /**
644 * Helper method that will construct a message to use in exceptions for cases where input ends
645 * prematurely in place where padding would be expected.
646 *
647 * @since 2.10
648 */
649 public String missingPaddingMessage() {
650 return String.format("Unexpected end of base64-encoded String: base64 variant '%s' expects padding (one or more '%c' characters) at the end",
651 getName(), getPaddingChar());
652 }
653
654 }
655
656