Monitoring JavaMelody on _ip-10-0-9-233.ec2.internal

1 /* Copyright (c) 2008 Google Inc.

2  *

3  * Licensed under the Apache License, Version 2.0 (the "License");

4  * you may not use this file except in compliance with the License.

5  * You may obtain a copy of the License at

6  *

7  *     http://www.apache.org/licenses/LICENSE-2.0

8  *

9  * Unless required by applicable law or agreed to in writing, software

10  * distributed under the License is distributed on an "AS IS" BASIS,

11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

12  * See the License for the specific language governing permissions and

13  * limitations under the License.

14  */

15 

16 package org.yaml.snakeyaml.external.com.google.gdata.util.common.base;

17 

18 /**

19  * A {@code UnicodeEscaper} that escapes some set of Java characters using the

20  * URI percent encoding scheme. The set of safe characters (those which remain

21  * unescaped) can be specified on construction.

22  * 

23  * <p>

24  * For details on escaping URIs for use in web pages, see section 2.4 of <a

25  * href="http://www.ietf.org/rfc/rfc3986.txt">RFC 3986</a>.

26  * 

27  * <p>

28  * In most cases this class should not need to be used directly. If you have no

29  * special requirements for escaping your URIs, you should use either

30  * {@link CharEscapers#uriEscaper()} or {@link CharEscapers#uriEscaper(boolean)}.

31  * 

32  * <p>

33  * When encoding a String, the following rules apply:

34  * <ul>

35  * <li>The alphanumeric characters "a" through "z", "A" through "Z" and "0"

36  * through "9" remain the same.

37  * <li>Any additionally specified safe characters remain the same.

38  * <li>If {@code plusForSpace} was specified, the space character " " is

39  * converted into a plus sign "+".

40  * <li>All other characters are converted into one or more bytes using UTF-8

41  * encoding and each byte is then represented by the 3-character string "%XY",

42  * where "XY" is the two-digit, uppercase, hexadecimal representation of the

43  * byte value.

44  * </ul>

45  * 

46  * <p>

47  * RFC 2396 specifies the set of unreserved characters as "-", "_", ".", "!",

48  * "~", "*", "'", "(" and ")". It goes on to state:

49  * 

50  * <p>

51  * <i>Unreserved characters can be escaped without changing the semantics of the

52  * URI, but this should not be done unless the URI is being used in a context

53  * that does not allow the unescaped character to appear.</i>

54  * 

55  * <p>

56  * For performance reasons the only currently supported character encoding of

57  * this class is UTF-8.

58  * 

59  * <p>

60  * <b>Note</b>: This escaper produces uppercase hexidecimal sequences. From <a

61  * href="http://www.ietf.org/rfc/rfc3986.txt">RFC 3986</a>:<br>

62  * <i>"URI producers and normalizers should use uppercase hexadecimal digits for

63  * all percent-encodings."</i>

64  * 

65  * 

66  */

67 public class PercentEscaper extends UnicodeEscaper {

68     /**

69      * A string of safe characters that mimics the behavior of

70      * {@link java.net.URLEncoder}.

71      * 

72      */

73     public static final String SAFECHARS_URLENCODER = "-_.*";

74 

75     /**

76      * A string of characters that do not need to be encoded when used in URI

77      * path segments, as specified in RFC 3986. Note that some of these

78      * characters do need to be escaped when used in other parts of the URI.

79      */

80     public static final String SAFEPATHCHARS_URLENCODER = "-_.!~*'()@:$&,;=";

81 

82     /**

83      * A string of characters that do not need to be encoded when used in URI

84      * query strings, as specified in RFC 3986. Note that some of these

85      * characters do need to be escaped when used in other parts of the URI.

86      */

87     public static final String SAFEQUERYSTRINGCHARS_URLENCODER = "-_.!~*'()@:$,;/?:";

88 

89     // In some uri escapers spaces are escaped to '+'

90     private static final char[] URI_ESCAPED_SPACE = { '+' };

91 

92     private static final char[] UPPER_HEX_DIGITS = "0123456789ABCDEF".toCharArray();

93 

94     /**

95      * If true we should convert space to the {@code +} character.

96      */

97     private final boolean plusForSpace;

98 

99     /**

100      * An array of flags where for any {@code char c} if {@code safeOctets[c]}

101      * is true then {@code c} should remain unmodified in the output. If

102      * {@code c > safeOctets.length} then it should be escaped.

103      */

104     private final boolean[] safeOctets;

105 

106     /**

107      * Constructs a URI escaper with the specified safe characters and optional

108      * handling of the space character.

109      * 

110      * @param safeChars

111      *            a non null string specifying additional safe characters for

112      *            this escaper (the ranges 0..9, a..z and A..Z are always safe

113      *            and should not be specified here)

114      * @param plusForSpace

115      *            true if ASCII space should be escaped to {@code +} rather than

116      *            {@code %20}

117      * @throws IllegalArgumentException

118      *             if any of the parameters were invalid

119      */

120     public PercentEscaper(String safeChars, boolean plusForSpace) {

121         // Avoid any misunderstandings about the behavior of this escaper

122         if (safeChars.matches(".*[0-9A-Za-z].*")) {

123             throw new IllegalArgumentException(

124                     "Alphanumeric characters are always 'safe' and should not be "

125                             + "explicitly specified");

126         }

127         // Avoid ambiguous parameters. Safe characters are never modified so if

128         // space is a safe character then setting plusForSpace is meaningless.

129         if (plusForSpace && safeChars.contains(" ")) {

130             throw new IllegalArgumentException(

131                     "plusForSpace cannot be specified when space is a 'safe' character");

132         }

133         if (safeChars.contains("%")) {

134             throw new IllegalArgumentException("The '%' character cannot be specified as 'safe'");

135         }

136         this.plusForSpace = plusForSpace;

137         this.safeOctets = createSafeOctets(safeChars);

138     }

139 

140     /**

141      * Creates a boolean[] with entries corresponding to the character values

142      * for 0-9, A-Z, a-z and those specified in safeChars set to true. The array

143      * is as small as is required to hold the given character information.

144      */

145     private static boolean[] createSafeOctets(String safeChars) {

146         int maxChar = 'z';

147         char[] safeCharArray = safeChars.toCharArray();

148         for (char c : safeCharArray) {

149             maxChar = Math.max(c, maxChar);

150         }

151         boolean[] octets = new boolean[maxChar + 1];

152         for (int c = '0'; c <= '9'; c++) {

153             octets[c] = true;

154         }

155         for (int c = 'A'; c <= 'Z'; c++) {

156             octets[c] = true;

157         }

158         for (int c = 'a'; c <= 'z'; c++) {

159             octets[c] = true;

160         }

161         for (char c : safeCharArray) {

162             octets[c] = true;

163         }

164         return octets;

165     }

166 

167     /*

168      * Overridden for performance. For unescaped strings this improved the

169      * performance of the uri escaper from ~760ns to ~400ns as measured by

170      * {@link CharEscapersBenchmark}.

171      */

172     @Override

173     protected int nextEscapeIndex(CharSequence csq, int index, int end) {

174         for (; index < end; index++) {

175             char c = csq.charAt(index);

176             if (c >= safeOctets.length || !safeOctets[c]) {

177                 break;

178             }

179         }

180         return index;

181     }

182 

183     /*

184      * Overridden for performance. For unescaped strings this improved the

185      * performance of the uri escaper from ~400ns to ~170ns as measured by

186      * {@link CharEscapersBenchmark}.

187      */

188     @Override

189     public String escape(String s) {

190         int slen = s.length();

191         for (int index = 0; index < slen; index++) {

192             char c = s.charAt(index);

193             if (c >= safeOctets.length || !safeOctets[c]) {

194                 return escapeSlow(s, index);

195             }

196         }

197         return s;

198     }

199 

200     /**

201      * Escapes the given Unicode code point in UTF-8.

202      */

203     @Override

204     protected char[] escape(int cp) {

205         // We should never get negative values here but if we do it will throw

206         // an

207         // IndexOutOfBoundsException, so at least it will get spotted.

208         if (cp < safeOctets.length && safeOctets[cp]) {

209             return null;

210         } else if (cp == ' ' && plusForSpace) {

211             return URI_ESCAPED_SPACE;

212         } else if (cp <= 0x7F) {

213             // Single byte UTF-8 characters

214             // Start with "%--" and fill in the blanks

215             char[] dest = new char[3];

216             dest[0] = '%';

217             dest[2] = UPPER_HEX_DIGITS[cp & 0xF];

218             dest[1] = UPPER_HEX_DIGITS[cp >>> 4];

219             return dest;

220         } else if (cp <= 0x7ff) {

221             // Two byte UTF-8 characters [cp >= 0x80 && cp <= 0x7ff]

222             // Start with "%--%--" and fill in the blanks

223             char[] dest = new char[6];

224             dest[0] = '%';

225             dest[3] = '%';

226             dest[5] = UPPER_HEX_DIGITS[cp & 0xF];

227             cp >>>= 4;

228             dest[4] = UPPER_HEX_DIGITS[0x8 | (cp & 0x3)];

229             cp >>>= 2;

230             dest[2] = UPPER_HEX_DIGITS[cp & 0xF];

231             cp >>>= 4;

232             dest[1] = UPPER_HEX_DIGITS[0xC | cp];

233             return dest;

234         } else if (cp <= 0xffff) {

235             // Three byte UTF-8 characters [cp >= 0x800 && cp <= 0xffff]

236             // Start with "%E-%--%--" and fill in the blanks

237             char[] dest = new char[9];

238             dest[0] = '%';

239             dest[1] = 'E';

240             dest[3] = '%';

241             dest[6] = '%';

242             dest[8] = UPPER_HEX_DIGITS[cp & 0xF];

243             cp >>>= 4;

244             dest[7] = UPPER_HEX_DIGITS[0x8 | (cp & 0x3)];

245             cp >>>= 2;

246             dest[5] = UPPER_HEX_DIGITS[cp & 0xF];

247             cp >>>= 4;

248             dest[4] = UPPER_HEX_DIGITS[0x8 | (cp & 0x3)];

249             cp >>>= 2;

250             dest[2] = UPPER_HEX_DIGITS[cp];

251             return dest;

252         } else if (cp <= 0x10ffff) {

253             char[] dest = new char[12];

254             // Four byte UTF-8 characters [cp >= 0xffff && cp <= 0x10ffff]

255             // Start with "%F-%--%--%--" and fill in the blanks

256             dest[0] = '%';

257             dest[1] = 'F';

258             dest[3] = '%';

259             dest[6] = '%';

260             dest[9] = '%';

261             dest[11] = UPPER_HEX_DIGITS[cp & 0xF];

262             cp >>>= 4;

263             dest[10] = UPPER_HEX_DIGITS[0x8 | (cp & 0x3)];

264             cp >>>= 2;

265             dest[8] = UPPER_HEX_DIGITS[cp & 0xF];

266             cp >>>= 4;

267             dest[7] = UPPER_HEX_DIGITS[0x8 | (cp & 0x3)];

268             cp >>>= 2;

269             dest[5] = UPPER_HEX_DIGITS[cp & 0xF];

270             cp >>>= 4;

271             dest[4] = UPPER_HEX_DIGITS[0x8 | (cp & 0x3)];

272             cp >>>= 2;

273             dest[2] = UPPER_HEX_DIGITS[cp & 0x7];

274             return dest;

275         } else {

276             // If this ever happens it is due to bug in UnicodeEscaper, not bad

277             // input.

278             throw new IllegalArgumentException("Invalid unicode character value " + cp);

279         }

280     }

281 }

282