1 /*
2 * ====================================================================
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing,
14 * software distributed under the License is distributed on an
15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 * KIND, either express or implied. See the License for the
17 * specific language governing permissions and limitations
18 * under the License.
19 * ====================================================================
20 *
21 * This software consists of voluntary contributions made by many
22 * individuals on behalf of the Apache Software Foundation. For more
23 * information on the Apache Software Foundation, please see
24 * <http://www.apache.org/>.
25 *
26 */
27
28 package com.amazonaws.util;
29
30 import java.nio.ByteBuffer;
31 import java.nio.charset.Charset;
32 import java.util.BitSet;
33 import java.util.List;
34
35 import com.amazonaws.annotation.Immutable;
36
37 // Copied and extracted from httpcomponents-client-4.3.6.
38 /**
39 * A collection of utilities for encoding URLs.
40 *
41 * @since 4.0
42 */
43 @Immutable
44 class URLEncodedUtils {
45 private static final char QP_SEP_A = '&';
46 private static final String NAME_VALUE_SEPARATOR = "=";
47
48 /**
49 * Returns a String that is suitable for use as an {@code application/x-www-form-urlencoded}
50 * list of parameters in an HTTP PUT or HTTP POST.
51 *
52 * @param parameters The parameters to include.
53 * @param charset The encoding to use.
54 * @return An {@code application/x-www-form-urlencoded} string
55 */
56 public static String format(
57 final List <? extends NameValuePair> parameters,
58 final String charset) {
59 return format(parameters, QP_SEP_A, charset);
60 }
61
62 /**
63 * Returns a String that is suitable for use as an {@code application/x-www-form-urlencoded}
64 * list of parameters in an HTTP PUT or HTTP POST.
65 *
66 * @param parameters The parameters to include.
67 * @param parameterSeparator The parameter separator, by convention, {@code '&'} or {@code ';'}.
68 * @param charset The encoding to use.
69 * @return An {@code application/x-www-form-urlencoded} string
70 *
71 * @since 4.3
72 */
73 public static String format(
74 final List <? extends NameValuePair> parameters,
75 final char parameterSeparator,
76 final String charset) {
77 final StringBuilder result = new StringBuilder();
78 for (final NameValuePair parameter : parameters) {
79 final String encodedName = encodeFormFields(parameter.getName(), charset);
80 final String encodedValue = encodeFormFields(parameter.getValue(), charset);
81 if (result.length() > 0) {
82 result.append(parameterSeparator);
83 }
84 result.append(encodedName);
85 if (encodedValue != null) {
86 result.append(NAME_VALUE_SEPARATOR);
87 result.append(encodedValue);
88 }
89 }
90 return result.toString();
91 }
92
93 /**
94 * Returns a String that is suitable for use as an {@code application/x-www-form-urlencoded}
95 * list of parameters in an HTTP PUT or HTTP POST.
96 *
97 * @param parameters The parameters to include.
98 * @param charset The encoding to use.
99 * @return An {@code application/x-www-form-urlencoded} string
100 *
101 * @since 4.2
102 */
103 public static String format(
104 final Iterable<? extends NameValuePair> parameters,
105 final Charset charset) {
106 return format(parameters, QP_SEP_A, charset);
107 }
108
109 /**
110 * Returns a String that is suitable for use as an {@code application/x-www-form-urlencoded}
111 * list of parameters in an HTTP PUT or HTTP POST.
112 *
113 * @param parameters The parameters to include.
114 * @param parameterSeparator The parameter separator, by convention, {@code '&'} or {@code ';'}.
115 * @param charset The encoding to use.
116 * @return An {@code application/x-www-form-urlencoded} string
117 *
118 * @since 4.3
119 */
120 public static String format(
121 final Iterable<? extends NameValuePair> parameters,
122 final char parameterSeparator,
123 final Charset charset) {
124 final StringBuilder result = new StringBuilder();
125 for (final NameValuePair parameter : parameters) {
126 final String encodedName = encodeFormFields(parameter.getName(), charset);
127 final String encodedValue = encodeFormFields(parameter.getValue(), charset);
128 if (result.length() > 0) {
129 result.append(parameterSeparator);
130 }
131 result.append(encodedName);
132 if (encodedValue != null) {
133 result.append(NAME_VALUE_SEPARATOR);
134 result.append(encodedValue);
135 }
136 }
137 return result.toString();
138 }
139
140 /**
141 * Unreserved characters, i.e. alphanumeric, plus: {@code _ - ! . ~ ' ( ) *}
142 * <p>
143 * This list is the same as the {@code unreserved} list in
144 * <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>
145 */
146 private static final BitSet UNRESERVED = new BitSet(256);
147 /**
148 * Punctuation characters: , ; : $ & + =
149 * <p>
150 * These are the additional characters allowed by userinfo.
151 */
152 private static final BitSet PUNCT = new BitSet(256);
153 /** Characters which are safe to use in userinfo,
154 * i.e. {@link #UNRESERVED} plus {@link #PUNCT}uation */
155 private static final BitSet USERINFO = new BitSet(256);
156 /** Characters which are safe to use in a path,
157 * i.e. {@link #UNRESERVED} plus {@link #PUNCT}uation plus / @ */
158 private static final BitSet PATHSAFE = new BitSet(256);
159 /** Characters which are safe to use in a query or a fragment,
160 * i.e. {@link #RESERVED} plus {@link #UNRESERVED} */
161 private static final BitSet URIC = new BitSet(256);
162
163 /**
164 * Reserved characters, i.e. {@code ;/?:@&=+$,[]}
165 * <p>
166 * This list is the same as the {@code reserved} list in
167 * <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>
168 * as augmented by
169 * <a href="http://www.ietf.org/rfc/rfc2732.txt">RFC 2732</a>
170 */
171 private static final BitSet RESERVED = new BitSet(256);
172
173
174 /**
175 * Safe characters for x-www-form-urlencoded data, as per java.net.URLEncoder and browser behaviour,
176 * i.e. alphanumeric plus {@code "-", "_", ".", "*"}
177 */
178 private static final BitSet URLENCODER = new BitSet(256);
179
180 static {
181 // unreserved chars
182 // alpha characters
183 for (int i = 'a'; i <= 'z'; i++) {
184 UNRESERVED.set(i);
185 }
186 for (int i = 'A'; i <= 'Z'; i++) {
187 UNRESERVED.set(i);
188 }
189 // numeric characters
190 for (int i = '0'; i <= '9'; i++) {
191 UNRESERVED.set(i);
192 }
193 UNRESERVED.set('_'); // these are the charactes of the "mark" list
194 UNRESERVED.set('-');
195 UNRESERVED.set('.');
196 UNRESERVED.set('*');
197 URLENCODER.or(UNRESERVED); // skip remaining unreserved characters
198 UNRESERVED.set('!');
199 UNRESERVED.set('~');
200 UNRESERVED.set('\'');
201 UNRESERVED.set('(');
202 UNRESERVED.set(')');
203 // punct chars
204 PUNCT.set(',');
205 PUNCT.set(';');
206 PUNCT.set(':');
207 PUNCT.set('$');
208 PUNCT.set('&');
209 PUNCT.set('+');
210 PUNCT.set('=');
211 // Safe for userinfo
212 USERINFO.or(UNRESERVED);
213 USERINFO.or(PUNCT);
214
215 // URL path safe
216 PATHSAFE.or(UNRESERVED);
217 PATHSAFE.set('/'); // segment separator
218 PATHSAFE.set(';'); // param separator
219 PATHSAFE.set(':'); // rest as per list in 2396, i.e. : @ & = + $ ,
220 PATHSAFE.set('@');
221 PATHSAFE.set('&');
222 PATHSAFE.set('=');
223 PATHSAFE.set('+');
224 PATHSAFE.set('$');
225 PATHSAFE.set(',');
226
227 RESERVED.set(';');
228 RESERVED.set('/');
229 RESERVED.set('?');
230 RESERVED.set(':');
231 RESERVED.set('@');
232 RESERVED.set('&');
233 RESERVED.set('=');
234 RESERVED.set('+');
235 RESERVED.set('$');
236 RESERVED.set(',');
237 RESERVED.set('['); // added by RFC 2732
238 RESERVED.set(']'); // added by RFC 2732
239
240 URIC.or(RESERVED);
241 URIC.or(UNRESERVED);
242 }
243
244 private static final int RADIX = 16;
245
246 private static String urlEncode(
247 final String content,
248 final Charset charset,
249 final BitSet safechars,
250 final boolean blankAsPlus) {
251 if (content == null) {
252 return null;
253 }
254 final StringBuilder buf = new StringBuilder();
255 final ByteBuffer bb = charset.encode(content);
256 while (bb.hasRemaining()) {
257 final int b = bb.get() & 0xff;
258 if (safechars.get(b)) {
259 buf.append((char) b);
260 } else if (blankAsPlus && b == ' ') {
261 buf.append('+');
262 } else {
263 buf.append("%");
264 final char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, RADIX));
265 final char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, RADIX));
266 buf.append(hex1);
267 buf.append(hex2);
268 }
269 }
270 return buf.toString();
271 }
272
273 /**
274 * Encode/escape www-url-form-encoded content.
275 * <p>
276 * Uses the {@link #URLENCODER} set of characters, rather than
277 * the {@link #UNRSERVED} set; this is for compatibilty with previous
278 * releases, URLEncoder.encode() and most browsers.
279 *
280 * @param content the content to encode, will convert space to '+'
281 * @param charset the charset to use
282 * @return encoded string
283 */
284 private static String encodeFormFields(final String content, final String charset) {
285 if (content == null) {
286 return null;
287 }
288 return urlEncode(content, charset != null ? Charset.forName(charset) : StringUtils.UTF8, URLENCODER, true);
289 }
290
291 /**
292 * Encode/escape www-url-form-encoded content.
293 * <p>
294 * Uses the {@link #URLENCODER} set of characters, rather than
295 * the {@link #UNRSERVED} set; this is for compatibilty with previous
296 * releases, URLEncoder.encode() and most browsers.
297 *
298 * @param content the content to encode, will convert space to '+'
299 * @param charset the charset to use
300 * @return encoded string
301 */
302 private static String encodeFormFields (final String content, final Charset charset) {
303 if (content == null) {
304 return null;
305 }
306 return urlEncode(content, charset != null ? charset : StringUtils.UTF8, URLENCODER, true);
307 }
308
309 /**
310 * Encode a String using the {@link #USERINFO} set of characters.
311 * <p>
312 * Used by URIBuilder to encode the userinfo segment.
313 *
314 * @param content the string to encode, does not convert space to '+'
315 * @param charset the charset to use
316 * @return the encoded string
317 */
318 static String encUserInfo(final String content, final Charset charset) {
319 return urlEncode(content, charset, USERINFO, false);
320 }
321
322 /**
323 * Encode a String using the {@link #URIC} set of characters.
324 * <p>
325 * Used by URIBuilder to encode the query and fragment segments.
326 *
327 * @param content the string to encode, does not convert space to '+'
328 * @param charset the charset to use
329 * @return the encoded string
330 */
331 static String encUric(final String content, final Charset charset) {
332 return urlEncode(content, charset, URIC, false);
333 }
334
335 /**
336 * Encode a String using the {@link #PATHSAFE} set of characters.
337 * <p>
338 * Used by URIBuilder to encode path segments.
339 *
340 * @param content the string to encode, does not convert space to '+'
341 * @param charset the charset to use
342 * @return the encoded string
343 */
344 static String encPath(final String content, final Charset charset) {
345 return urlEncode(content, charset, PATHSAFE, false);
346 }
347 }
348