1 /*
2 * ====================================================================
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing,
14 * software distributed under the License is distributed on an
15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 * KIND, either express or implied. See the License for the
17 * specific language governing permissions and limitations
18 * under the License.
19 * ====================================================================
20 *
21 * This software consists of voluntary contributions made by many
22 * individuals on behalf of the Apache Software Foundation. For more
23 * information on the Apache Software Foundation, please see
24 * <http://www.apache.org/>.
25 *
26 */
27 package org.apache.http.client.utils;
28
29 import java.net.URI;
30 import java.net.URISyntaxException;
31 import java.util.ArrayList;
32 import java.util.EnumSet;
33 import java.util.Iterator;
34 import java.util.List;
35 import java.util.Locale;
36 import java.util.Stack;
37
38 import org.apache.http.HttpHost;
39 import org.apache.http.conn.routing.RouteInfo;
40 import org.apache.http.util.Args;
41 import org.apache.http.util.TextUtils;
42
43 /**
44 * A collection of utilities for {@link URI URIs}, to workaround
45 * bugs within the class or for ease-of-use features.
46 *
47 * @since 4.0
48 */
49 public class URIUtils {
50
51 /**
52 * Flags that control how URI is being rewritten.
53 *
54 * @since 4.5.8
55 */
56 public enum UriFlag {
57 DROP_FRAGMENT,
58 NORMALIZE
59 }
60
61 /**
62 * Empty set of uri flags.
63 *
64 * @since 4.5.8
65 */
66 public static final EnumSet<UriFlag> NO_FLAGS = EnumSet.noneOf(UriFlag.class);
67
68 /**
69 * Set of uri flags containing {@link UriFlag#DROP_FRAGMENT}.
70 *
71 * @since 4.5.8
72 */
73 public static final EnumSet<UriFlag> DROP_FRAGMENT = EnumSet.of(UriFlag.DROP_FRAGMENT);
74
75 /**
76 * Set of uri flags containing {@link UriFlag#NORMALIZE}.
77 *
78 * @since 4.5.8
79 */
80 public static final EnumSet<UriFlag> NORMALIZE = EnumSet.of(UriFlag.NORMALIZE);
81
82 /**
83 * Set of uri flags containing {@link UriFlag#DROP_FRAGMENT} and {@link UriFlag#NORMALIZE}.
84 *
85 * @since 4.5.8
86 */
87 public static final EnumSet<UriFlag> DROP_FRAGMENT_AND_NORMALIZE = EnumSet.of(UriFlag.DROP_FRAGMENT, UriFlag.NORMALIZE);
88
89 /**
90 * Constructs a {@link URI} using all the parameters. This should be
91 * used instead of
92 * {@link URI#URI(String, String, String, int, String, String, String)}
93 * or any of the other URI multi-argument URI constructors.
94 *
95 * @param scheme
96 * Scheme name
97 * @param host
98 * Host name
99 * @param port
100 * Port number
101 * @param path
102 * Path
103 * @param query
104 * Query
105 * @param fragment
106 * Fragment
107 *
108 * @throws URISyntaxException
109 * If both a scheme and a path are given but the path is
110 * relative, if the URI string constructed from the given
111 * components violates RFC 2396, or if the authority
112 * component of the string is present but cannot be parsed
113 * as a server-based authority
114 *
115 * @deprecated (4.2) use {@link URIBuilder}.
116 */
117 @Deprecated
118 public static URI createURI(
119 final String scheme,
120 final String host,
121 final int port,
122 final String path,
123 final String query,
124 final String fragment) throws URISyntaxException {
125 final StringBuilder buffer = new StringBuilder();
126 if (host != null) {
127 if (scheme != null) {
128 buffer.append(scheme);
129 buffer.append("://");
130 }
131 buffer.append(host);
132 if (port > 0) {
133 buffer.append(':');
134 buffer.append(port);
135 }
136 }
137 if (path == null || !path.startsWith("/")) {
138 buffer.append('/');
139 }
140 if (path != null) {
141 buffer.append(path);
142 }
143 if (query != null) {
144 buffer.append('?');
145 buffer.append(query);
146 }
147 if (fragment != null) {
148 buffer.append('#');
149 buffer.append(fragment);
150 }
151 return new URI(buffer.toString());
152 }
153
154 /**
155 * A convenience method for creating a new {@link URI} whose scheme, host
156 * and port are taken from the target host, but whose path, query and
157 * fragment are taken from the existing URI. The fragment is only used if
158 * dropFragment is false. The path is set to "/" if not explicitly specified.
159 *
160 * @param uri
161 * Contains the path, query and fragment to use.
162 * @param target
163 * Contains the scheme, host and port to use.
164 * @param dropFragment
165 * True if the fragment should not be copied.
166 *
167 * @throws URISyntaxException
168 * If the resulting URI is invalid.
169 * @deprecated (4.5.8) Use {@link #rewriteURI(URI, HttpHost, EnumSet)}
170 */
171 @Deprecated
172 public static URI rewriteURI(
173 final URI uri,
174 final HttpHost target,
175 final boolean dropFragment) throws URISyntaxException
176 {
177 return rewriteURI(uri, target, dropFragment ? DROP_FRAGMENT : NO_FLAGS);
178 }
179
180 /**
181 * A convenience method for creating a new {@link URI} whose scheme, host
182 * and port are taken from the target host, but whose path, query and
183 * fragment are taken from the existing URI. What exactly is used and how
184 * is driven by the passed in flags. The path is set to "/" if not explicitly specified.
185 *
186 * @param uri
187 * Contains the path, query and fragment to use.
188 * @param target
189 * Contains the scheme, host and port to use.
190 * @param flags
191 * True if the fragment should not be copied.
192 *
193 * @throws URISyntaxException
194 * If the resulting URI is invalid.
195 * @since 4.5.8
196 */
197 public static URI rewriteURI(
198 final URI uri,
199 final HttpHost target,
200 final EnumSet<UriFlag> flags) throws URISyntaxException {
201 Args.notNull(uri, "URI");
202 Args.notNull(flags, "URI flags");
203 if (uri.isOpaque()) {
204 return uri;
205 }
206 final URIBuilder uribuilder = new URIBuilder(uri);
207 if (target != null) {
208 uribuilder.setScheme(target.getSchemeName());
209 uribuilder.setHost(target.getHostName());
210 uribuilder.setPort(target.getPort());
211 } else {
212 uribuilder.setScheme(null);
213 uribuilder.setHost(null);
214 uribuilder.setPort(-1);
215 }
216 if (flags.contains(UriFlag.DROP_FRAGMENT)) {
217 uribuilder.setFragment(null);
218 }
219 if (flags.contains(UriFlag.NORMALIZE)) {
220 final List<String> originalPathSegments = uribuilder.getPathSegments();
221 final List<String> pathSegments = new ArrayList<String>(originalPathSegments);
222 for (final Iterator<String> it = pathSegments.iterator(); it.hasNext(); ) {
223 final String pathSegment = it.next();
224 if (pathSegment.isEmpty() && it.hasNext()) {
225 it.remove();
226 }
227 }
228 if (pathSegments.size() != originalPathSegments.size()) {
229 uribuilder.setPathSegments(pathSegments);
230 }
231 }
232 if (uribuilder.isPathEmpty()) {
233 uribuilder.setPathSegments("");
234 }
235 return uribuilder.build();
236 }
237
238 /**
239 * A convenience method for
240 * {@link URIUtils#rewriteURI(URI, HttpHost, EnumSet)} that always keeps the
241 * fragment.
242 */
243 public static URI rewriteURI(
244 final URI uri,
245 final HttpHost target) throws URISyntaxException {
246 return rewriteURI(uri, target, NORMALIZE);
247 }
248
249 /**
250 * A convenience method that creates a new {@link URI} whose scheme, host, port, path,
251 * query are taken from the existing URI, dropping any fragment or user-information.
252 * The path is set to "/" if not explicitly specified. The existing URI is returned
253 * unmodified if it has no fragment or user-information and has a path.
254 *
255 * @param uri
256 * original URI.
257 * @throws URISyntaxException
258 * If the resulting URI is invalid.
259 */
260 public static URI rewriteURI(final URI uri) throws URISyntaxException {
261 Args.notNull(uri, "URI");
262 if (uri.isOpaque()) {
263 return uri;
264 }
265 final URIBuilder uribuilder = new URIBuilder(uri);
266 if (uribuilder.getUserInfo() != null) {
267 uribuilder.setUserInfo(null);
268 }
269 if (uribuilder.getPathSegments().isEmpty()) {
270 uribuilder.setPathSegments("");
271 }
272 if (TextUtils.isEmpty(uribuilder.getPath())) {
273 uribuilder.setPath("/");
274 }
275 if (uribuilder.getHost() != null) {
276 uribuilder.setHost(uribuilder.getHost().toLowerCase(Locale.ROOT));
277 }
278 uribuilder.setFragment(null);
279 return uribuilder.build();
280 }
281
282 /**
283 * A convenience method that optionally converts the original {@link java.net.URI} either
284 * to a relative or an absolute form as required by the specified route.
285 *
286 * @param uri
287 * original URI.
288 * @throws URISyntaxException
289 * If the resulting URI is invalid.
290 *
291 * @since 4.4
292 */
293 public static URI rewriteURIForRoute(final URI uri, final RouteInfo route) throws URISyntaxException {
294 return rewriteURIForRoute(uri, route, true);
295 }
296
297 /**
298 * A convenience method that optionally converts the original {@link java.net.URI} either
299 * to a relative or an absolute form as required by the specified route.
300 *
301 * @param uri
302 * original URI.
303 * @throws URISyntaxException
304 * If the resulting URI is invalid.
305 *
306 * @since 4.5.8
307 */
308 public static URI rewriteURIForRoute(final URI uri, final RouteInfo route, final boolean normalizeUri) throws URISyntaxException {
309 if (uri == null) {
310 return null;
311 }
312 if (route.getProxyHost() != null && !route.isTunnelled()) {
313 // Make sure the request URI is absolute
314 return uri.isAbsolute()
315 ? rewriteURI(uri)
316 : rewriteURI(uri, route.getTargetHost(), normalizeUri ? DROP_FRAGMENT_AND_NORMALIZE : DROP_FRAGMENT);
317 }
318 // Make sure the request URI is relative
319 return uri.isAbsolute() ? rewriteURI(uri, null, normalizeUri ? DROP_FRAGMENT_AND_NORMALIZE : DROP_FRAGMENT) : rewriteURI(uri);
320 }
321
322 /**
323 * Resolves a URI reference against a base URI. Work-around for bug in
324 * java.net.URI (http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4708535)
325 *
326 * @param baseURI the base URI
327 * @param reference the URI reference
328 * @return the resulting URI
329 */
330 public static URI resolve(final URI baseURI, final String reference) {
331 return resolve(baseURI, URI.create(reference));
332 }
333
334 /**
335 * Resolves a URI reference against a base URI. Work-around for bugs in
336 * java.net.URI (e.g. http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4708535)
337 *
338 * @param baseURI the base URI
339 * @param reference the URI reference
340 * @return the resulting URI
341 */
342 public static URI resolve(final URI baseURI, final URI reference){
343 Args.notNull(baseURI, "Base URI");
344 Args.notNull(reference, "Reference URI");
345 final String s = reference.toASCIIString();
346 if (s.startsWith("?")) {
347 String baseUri = baseURI.toASCIIString();
348 final int i = baseUri.indexOf('?');
349 baseUri = i > -1 ? baseUri.substring(0, i) : baseUri;
350 return URI.create(baseUri + s);
351 }
352 final boolean emptyReference = s.isEmpty();
353 URI resolved;
354 if (emptyReference) {
355 resolved = baseURI.resolve(URI.create("#"));
356 final String resolvedString = resolved.toASCIIString();
357 resolved = URI.create(resolvedString.substring(0, resolvedString.indexOf('#')));
358 } else {
359 resolved = baseURI.resolve(reference);
360 }
361 try {
362 return normalizeSyntax(resolved);
363 } catch (final URISyntaxException ex) {
364 throw new IllegalArgumentException(ex);
365 }
366 }
367
368 /**
369 * Removes dot segments according to RFC 3986, section 5.2.4 and
370 * Syntax-Based Normalization according to RFC 3986, section 6.2.2.
371 *
372 * @param uri the original URI
373 * @return the URI without dot segments
374 *
375 * @since 4.5
376 */
377 public static URI normalizeSyntax(final URI uri) throws URISyntaxException {
378 if (uri.isOpaque() || uri.getAuthority() == null) {
379 // opaque and file: URIs
380 return uri;
381 }
382 final URIBuilder builder = new URIBuilder(uri);
383 final List<String> inputSegments = builder.getPathSegments();
384 final Stack<String> outputSegments = new Stack<String>();
385 for (final String inputSegment : inputSegments) {
386 if (".".equals(inputSegment)) {
387 // Do nothing
388 } else if ("..".equals(inputSegment)) {
389 if (!outputSegments.isEmpty()) {
390 outputSegments.pop();
391 }
392 } else {
393 outputSegments.push(inputSegment);
394 }
395 }
396 if (outputSegments.size() == 0) {
397 outputSegments.add("");
398 }
399 builder.setPathSegments(outputSegments);
400 if (builder.getScheme() != null) {
401 builder.setScheme(builder.getScheme().toLowerCase(Locale.ROOT));
402 }
403 if (builder.getHost() != null) {
404 builder.setHost(builder.getHost().toLowerCase(Locale.ROOT));
405 }
406 return builder.build();
407 }
408
409 /**
410 * Extracts target host from the given {@link URI}.
411 *
412 * @param uri
413 * @return the target host if the URI is absolute or {@code null} if the URI is
414 * relative or does not contain a valid host name.
415 *
416 * @since 4.1
417 */
418 public static HttpHost extractHost(final URI uri) {
419 if (uri == null) {
420 return null;
421 }
422 if (uri.isAbsolute()) {
423 if (uri.getHost() == null) { // normal parse failed; let's do it ourselves
424 // authority does not seem to care about the valid character-set for host names
425 if (uri.getAuthority() != null) {
426 String content = uri.getAuthority();
427 // Strip off any leading user credentials
428 int at = content.indexOf('@');
429 if (at != -1) {
430 content = content.substring(at + 1);
431 }
432 final String scheme = uri.getScheme();
433 final String hostname;
434 final int port;
435 at = content.indexOf(":");
436 if (at != -1) {
437 hostname = content.substring(0, at);
438 try {
439 final String portText = content.substring(at + 1);
440 port = !TextUtils.isEmpty(portText) ? Integer.parseInt(portText) : -1;
441 } catch (final NumberFormatException ex) {
442 return null;
443 }
444 } else {
445 hostname = content;
446 port = -1;
447 }
448 try {
449 return new HttpHost(hostname, port, scheme);
450 } catch (final IllegalArgumentException ex) {
451 return null;
452 }
453 }
454 } else {
455 return new HttpHost(uri.getHost(), uri.getPort(), uri.getScheme());
456 }
457 }
458 return null;
459 }
460
461 /**
462 * Derives the interpreted (absolute) URI that was used to generate the last
463 * request. This is done by extracting the request-uri and target origin for
464 * the last request and scanning all the redirect locations for the last
465 * fragment identifier, then combining the result into a {@link URI}.
466 *
467 * @param originalURI
468 * original request before any redirects
469 * @param target
470 * if the last URI is relative, it is resolved against this target,
471 * or {@code null} if not available.
472 * @param redirects
473 * collection of redirect locations since the original request
474 * or {@code null} if not available.
475 * @return interpreted (absolute) URI
476 */
477 public static URI resolve(
478 final URI originalURI,
479 final HttpHost target,
480 final List<URI> redirects) throws URISyntaxException {
481 Args.notNull(originalURI, "Request URI");
482 final URIBuilder uribuilder;
483 if (redirects == null || redirects.isEmpty()) {
484 uribuilder = new URIBuilder(originalURI);
485 } else {
486 uribuilder = new URIBuilder(redirects.get(redirects.size() - 1));
487 String frag = uribuilder.getFragment();
488 // read interpreted fragment identifier from redirect locations
489 for (int i = redirects.size() - 1; frag == null && i >= 0; i--) {
490 frag = redirects.get(i).getFragment();
491 }
492 uribuilder.setFragment(frag);
493 }
494 // read interpreted fragment identifier from original request
495 if (uribuilder.getFragment() == null) {
496 uribuilder.setFragment(originalURI.getFragment());
497 }
498 // last target origin
499 if (target != null && !uribuilder.isAbsolute()) {
500 uribuilder.setScheme(target.getSchemeName());
501 uribuilder.setHost(target.getHostName());
502 uribuilder.setPort(target.getPort());
503 }
504 return uribuilder.build();
505 }
506
507 /**
508 * This class should not be instantiated.
509 */
510 private URIUtils() {
511 }
512
513 }
514