1 /*
2 * JBoss, Home of Professional Open Source.
3 * Copyright 2014 Red Hat, Inc., and individual contributors
4 * as indicated by the @author tags.
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18
19 package io.undertow.util;
20
21 import java.io.UnsupportedEncodingException;
22 import java.util.regex.Pattern;
23
24 import io.undertow.UndertowMessages;
25 import io.undertow.server.HttpServerExchange;
26
27 /**
28 * Utilities for dealing with URLs
29 *
30 * @author Stuart Douglas
31 * @author Andre Schaefer
32 */
33 public class URLUtils {
34
35 private static final char PATH_SEPARATOR = '/';
36
37 private static final QueryStringParser QUERY_STRING_PARSER = new QueryStringParser('&', false) {
38 @Override
39 void handle(HttpServerExchange exchange, String key, String value) {
40 exchange.addQueryParam(key, value);
41 }
42 };
43 private static final QueryStringParser PATH_PARAM_PARSER = new QueryStringParser(';', true) {
44 @Override
45 void handle(HttpServerExchange exchange, String key, String value) {
46 exchange.addPathParam(key, value);
47 }
48 };
49
50 // RFC-3986 (URI Generic Syntax) states:
51 // URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
52 // scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
53 // "The scheme and path components are required, though the path may be empty (no characters)."
54 private static final Pattern SCHEME_PATTERN = Pattern.compile("^[a-zA-Z][a-zA-Z0-9+-.]*:.*");
55
56 private URLUtils() {
57
58 }
59
60 public static void parseQueryString(final String string, final HttpServerExchange exchange, final String charset, final boolean doDecode, int maxParameters) throws ParameterLimitException {
61 QUERY_STRING_PARSER.parse(string, exchange, charset, doDecode, maxParameters);
62 }
63
64 @Deprecated
65 public static void parsePathParms(final String string, final HttpServerExchange exchange, final String charset, final boolean doDecode, int maxParameters) throws ParameterLimitException {
66 parsePathParams(string, exchange, charset, doDecode, maxParameters);
67 }
68
69 public static int parsePathParams(final String string, final HttpServerExchange exchange, final String charset, final boolean doDecode, int maxParameters) throws ParameterLimitException {
70 return PATH_PARAM_PARSER.parse(string, exchange, charset, doDecode, maxParameters);
71 }
72
73 /**
74 * Decodes a URL. If the decoding fails for any reason then an IllegalArgumentException will be thrown.
75 *
76 * @param s The string to decode
77 * @param enc The encoding
78 * @param decodeSlash If slash characters should be decoded
79 * @param buffer The string builder to use as a buffer.
80 * @return The decoded URL
81 */
82 public static String decode(String s, String enc, boolean decodeSlash, StringBuilder buffer) {
83 return decode(s, enc, decodeSlash, true, buffer);
84 }
85
86 /**
87 * Decodes a URL. If the decoding fails for any reason then an IllegalArgumentException will be thrown.
88 *
89 * @param s The string to decode
90 * @param enc The encoding
91 * @param decodeSlash If slash characters should be decoded
92 * @param buffer The string builder to use as a buffer.
93 * @return The decoded URL
94 */
95 public static String decode(String s, String enc, boolean decodeSlash, boolean formEncoding, StringBuilder buffer) {
96 buffer.setLength(0);
97 boolean needToChange = false;
98 int numChars = s.length();
99 int i = 0;
100
101 while (i < numChars) {
102 char c = s.charAt(i);
103 if (c == '+') {
104 if (formEncoding) {
105 buffer.append(' ');
106 i++;
107 needToChange = true;
108 } else {
109 i++;
110 buffer.append(c);
111 }
112 } else if (c == '%' || c > 127) {
113 /*
114 * Starting with this instance of a character
115 * that needs to be encoded, process all
116 * consecutive substrings of the form %xy. Each
117 * substring %xy will yield a byte. Convert all
118 * consecutive bytes obtained this way to whatever
119 * character(s) they represent in the provided
120 * encoding.
121 *
122 * Note that we need to decode the whole rest of the value, we can't just decode
123 * three characters. For multi code point characters there if the code point can be
124 * represented as an alphanumeric
125 */
126 try {
127 // guess the size of the remaining bytes
128 // of remaining bytes
129 // this works for percent encoded characters,
130 // not so much for unencoded bytes
131 byte[] bytes = new byte[numChars - i + 1];
132
133 int pos = 0;
134
135 while ((i < numChars)) {
136 if (c == '%') {
137 // we need 2 more characters to decode the % construct
138 if ((i + 2) >= s.length()) {
139 throw UndertowMessages.MESSAGES.failedToDecodeURL(s, enc, null);
140 }
141 char p1 = Character.toLowerCase(s.charAt(i + 1));
142 char p2 = Character.toLowerCase(s.charAt(i + 2));
143 if (!decodeSlash && ((p1 == '2' && p2 == 'f') || (p1 == '5' && p2 == 'c'))) {
144 if(pos + 2 >= bytes.length) {
145 bytes = expandBytes(bytes);
146 }
147 bytes[pos++] = (byte) c;
148 // should be copied with preserved upper/lower case
149 bytes[pos++] = (byte) s.charAt(i + 1);
150 bytes[pos++] = (byte) s.charAt(i + 2);
151 i += 3;
152
153 if (i < numChars) {
154 c = s.charAt(i);
155 }
156 continue;
157 }
158 int v = 0;
159 if (p1 >= '0' && p1 <= '9') {
160 v = (p1 - '0') << 4;
161 } else if (p1 >= 'a' && p1 <= 'f') {
162 v = (p1 - 'a' + 10) << 4;
163 } else {
164 throw UndertowMessages.MESSAGES.failedToDecodeURL(s, enc, null);
165 }
166 if (p2 >= '0' && p2 <= '9') {
167 v += (p2 - '0');
168 } else if (p2 >= 'a' && p2 <= 'f') {
169 v += (p2 - 'a' + 10);
170 } else {
171 throw UndertowMessages.MESSAGES.failedToDecodeURL(s, enc, null);
172 }
173 if (v < 0) {
174 throw UndertowMessages.MESSAGES.failedToDecodeURL(s, enc, null);
175 }
176
177 if(pos == bytes.length) {
178 bytes = expandBytes(bytes);
179 }
180 bytes[pos++] = (byte) v;
181 i += 3;
182 if (i < numChars) {
183 c = s.charAt(i);
184 }
185 } else if (c == '+' && formEncoding) {
186 if(pos == bytes.length) {
187 bytes = expandBytes(bytes);
188 }
189 bytes[pos++] = (byte) ' ';
190 ++i;
191 if (i < numChars) {
192 c = s.charAt(i);
193 }
194 } else {
195 if (pos == bytes.length) {
196 bytes = expandBytes(bytes);
197 }
198 ++i;
199 if(c >> 8 != 0) {
200 bytes[pos++] = (byte) (c >> 8);
201 if (pos == bytes.length) {
202 bytes = expandBytes(bytes);
203 }
204 bytes[pos++] = (byte) c;
205 } else {
206 bytes[pos++] = (byte) c;
207 if (i < numChars) {
208 c = s.charAt(i);
209 }
210 }
211
212 }
213 }
214
215 String decoded = new String(bytes, 0, pos, enc);
216 buffer.append(decoded);
217 } catch (NumberFormatException e) {
218 throw UndertowMessages.MESSAGES.failedToDecodeURL(s, enc, e);
219 } catch (UnsupportedEncodingException e) {
220 throw UndertowMessages.MESSAGES.failedToDecodeURL(s, enc, e);
221 }
222 needToChange = true;
223 break;
224 } else {
225 buffer.append(c);
226 i++;
227 }
228 }
229
230 return (needToChange ? buffer.toString() : s);
231 }
232
233 private static byte[] expandBytes(byte[] bytes) {
234 byte[] newBytes = new byte[bytes.length + 10];
235 System.arraycopy(bytes, 0, newBytes, 0, bytes.length);
236 return newBytes;
237 }
238
239 private abstract static class QueryStringParser {
240
241 private final char separator;
242 private final boolean parseUntilSeparator;
243
244 QueryStringParser(final char separator, final boolean parseUntilSeparator) {
245 this.separator = separator;
246 this.parseUntilSeparator = parseUntilSeparator;
247 }
248
249 int parse(final String string, final HttpServerExchange exchange, final String charset, final boolean doDecode, int max) throws ParameterLimitException {
250 int count = 0;
251 int i = 0;
252 try {
253 int stringStart = 0;
254 String attrName = null;
255 for (i = 0; i < string.length(); ++i) {
256 char c = string.charAt(i);
257 if (c == '=' && attrName == null) {
258 attrName = string.substring(stringStart, i);
259 stringStart = i + 1;
260 } else if (c == separator) {
261 if (attrName != null) {
262 handle(exchange, decode(charset, attrName, doDecode), decode(charset, string.substring(stringStart, i), doDecode));
263 if(++count > max) {
264 throw UndertowMessages.MESSAGES.tooManyParameters(max);
265 }
266 } else {
267 handle(exchange, decode(charset, string.substring(stringStart, i), doDecode), "");
268 if(++count > max) {
269 throw UndertowMessages.MESSAGES.tooManyParameters(max);
270 }
271 }
272 stringStart = i + 1;
273 attrName = null;
274 } else if (parseUntilSeparator && (c == '?' || c == '/')) {
275 break;
276 }
277 }
278 if (attrName != null) {
279 handle(exchange, decode(charset, attrName, doDecode), decode(charset, string.substring(stringStart, i), doDecode));
280 if(++count > max) {
281 throw UndertowMessages.MESSAGES.tooManyParameters(max);
282 }
283 } else if (string.length() != stringStart) {
284 handle(exchange, decode(charset, string.substring(stringStart, i), doDecode), "");
285 if(++count > max) {
286 throw UndertowMessages.MESSAGES.tooManyParameters(max);
287 }
288 }
289 } catch (UnsupportedEncodingException e) {
290 throw new RuntimeException(e);
291 }
292 return i;
293 }
294
295 private String decode(String charset, String attrName, final boolean doDecode) throws UnsupportedEncodingException {
296 if (doDecode) {
297 return URLUtils.decode(attrName, charset, true, true, new StringBuilder());
298 }
299 return attrName;
300 }
301
302 abstract void handle(final HttpServerExchange exchange, final String key, final String value);
303 }
304
305
306 /**
307 * Adds a '/' prefix to the beginning of a path if one isn't present
308 * and removes trailing slashes if any are present.
309 *
310 * @param path the path to normalize
311 * @return a normalized (with respect to slashes) result
312 */
313 public static String normalizeSlashes(final String path) {
314 // prepare
315 final StringBuilder builder = new StringBuilder(path);
316 boolean modified = false;
317
318 // remove all trailing '/'s except the first one
319 while (builder.length() > 0 && builder.length() != 1 && PATH_SEPARATOR == builder.charAt(builder.length() - 1)) {
320 builder.deleteCharAt(builder.length() - 1);
321 modified = true;
322 }
323
324 // add a slash at the beginning if one isn't present
325 if (builder.length() == 0 || PATH_SEPARATOR != builder.charAt(0)) {
326 builder.insert(0, PATH_SEPARATOR);
327 modified = true;
328 }
329
330 // only create string when it was modified
331 if (modified) {
332 return builder.toString();
333 }
334
335 return path;
336 }
337
338
339 /**
340 * Test if provided location is an absolute URI or not.
341 *
342 * @param location location to check, null = relative, having scheme = absolute
343 * @return true if location is considered absolute
344 */
345 public static boolean isAbsoluteUrl(String location) {
346 if (location != null && location.length() > 0 && location.contains(":")) {
347 // consider it absolute URL if location contains valid scheme part
348 return SCHEME_PATTERN.matcher(location).matches();
349 }
350 return false;
351 }
352 }
353