1 /*
2 * Copyright (C) 2015 Square, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 package okhttp3;
17
18 import java.net.InetAddress;
19 import java.net.MalformedURLException;
20 import java.net.URI;
21 import java.net.URISyntaxException;
22 import java.net.URL;
23 import java.nio.charset.Charset;
24 import java.util.ArrayList;
25 import java.util.Collections;
26 import java.util.LinkedHashSet;
27 import java.util.List;
28 import java.util.Set;
29 import javax.annotation.Nullable;
30 import okhttp3.internal.Util;
31 import okhttp3.internal.publicsuffix.PublicSuffixDatabase;
32 import okio.Buffer;
33
34 import static java.nio.charset.StandardCharsets.UTF_8;
35 import static okhttp3.internal.Util.decodeHexDigit;
36 import static okhttp3.internal.Util.delimiterOffset;
37 import static okhttp3.internal.Util.skipLeadingAsciiWhitespace;
38 import static okhttp3.internal.Util.skipTrailingAsciiWhitespace;
39 import static okhttp3.internal.Util.verifyAsIpAddress;
40
41 /**
42 * A uniform resource locator (URL) with a scheme of either {@code http} or {@code https}. Use this
43 * class to compose and decompose Internet addresses. For example, this code will compose and print
44 * a URL for Google search: <pre> {@code
45 *
46 * HttpUrl url = new HttpUrl.Builder()
47 * .scheme("https")
48 * .host("www.google.com")
49 * .addPathSegment("search")
50 * .addQueryParameter("q", "polar bears")
51 * .build();
52 * System.out.println(url);
53 * }</pre>
54 *
55 * which prints: <pre> {@code
56 *
57 * https://www.google.com/search?q=polar%20bears
58 * }</pre>
59 *
60 * As another example, this code prints the human-readable query parameters of a Twitter search:
61 * <pre> {@code
62 *
63 * HttpUrl url = HttpUrl.parse("https://twitter.com/search?q=cute%20%23puppies&f=images");
64 * for (int i = 0, size = url.querySize(); i < size; i++) {
65 * System.out.println(url.queryParameterName(i) + ": " + url.queryParameterValue(i));
66 * }
67 * }</pre>
68 *
69 * which prints: <pre> {@code
70 *
71 * q: cute #puppies
72 * f: images
73 * }</pre>
74 *
75 * In addition to composing URLs from their component parts and decomposing URLs into their
76 * component parts, this class implements relative URL resolution: what address you'd reach by
77 * clicking a relative link on a specified page. For example: <pre> {@code
78 *
79 * HttpUrl base = HttpUrl.parse("https://www.youtube.com/user/WatchTheDaily/videos");
80 * HttpUrl link = base.resolve("../../watch?v=cbP2N1BQdYc");
81 * System.out.println(link);
82 * }</pre>
83 *
84 * which prints: <pre> {@code
85 *
86 * https://www.youtube.com/watch?v=cbP2N1BQdYc
87 * }</pre>
88 *
89 * <h3>What's in a URL?</h3>
90 *
91 * A URL has several components.
92 *
93 * <h4>Scheme</h4>
94 *
95 * <p>Sometimes referred to as <i>protocol</i>, A URL's scheme describes what mechanism should be
96 * used to retrieve the resource. Although URLs have many schemes ({@code mailto}, {@code file},
97 * {@code ftp}), this class only supports {@code http} and {@code https}. Use {@link URI
98 * java.net.URI} for URLs with arbitrary schemes.
99 *
100 * <h4>Username and Password</h4>
101 *
102 * <p>Username and password are either present, or the empty string {@code ""} if absent. This class
103 * offers no mechanism to differentiate empty from absent. Neither of these components are popular
104 * in practice. Typically HTTP applications use other mechanisms for user identification and
105 * authentication.
106 *
107 * <h4>Host</h4>
108 *
109 * <p>The host identifies the webserver that serves the URL's resource. It is either a hostname like
110 * {@code square.com} or {@code localhost}, an IPv4 address like {@code 192.168.0.1}, or an IPv6
111 * address like {@code ::1}.
112 *
113 * <p>Usually a webserver is reachable with multiple identifiers: its IP addresses, registered
114 * domain names, and even {@code localhost} when connecting from the server itself. Each of a
115 * webserver's names is a distinct URL and they are not interchangeable. For example, even if {@code
116 * http://square.github.io/dagger} and {@code http://google.github.io/dagger} are served by the same
117 * IP address, the two URLs identify different resources.
118 *
119 * <h4>Port</h4>
120 *
121 * <p>The port used to connect to the webserver. By default this is 80 for HTTP and 443 for HTTPS.
122 * This class never returns -1 for the port: if no port is explicitly specified in the URL then the
123 * scheme's default is used.
124 *
125 * <h4>Path</h4>
126 *
127 * <p>The path identifies a specific resource on the host. Paths have a hierarchical structure like
128 * "/square/okhttp/issues/1486" and decompose into a list of segments like ["square", "okhttp",
129 * "issues", "1486"].
130 *
131 * <p>This class offers methods to compose and decompose paths by segment. It composes each path
132 * from a list of segments by alternating between "/" and the encoded segment. For example the
133 * segments ["a", "b"] build "/a/b" and the segments ["a", "b", ""] build "/a/b/".
134 *
135 * <p>If a path's last segment is the empty string then the path ends with "/". This class always
136 * builds non-empty paths: if the path is omitted it defaults to "/". The default path's segment
137 * list is a single empty string: [""].
138 *
139 * <h4>Query</h4>
140 *
141 * <p>The query is optional: it can be null, empty, or non-empty. For many HTTP URLs the query
142 * string is subdivided into a collection of name-value parameters. This class offers methods to set
143 * the query as the single string, or as individual name-value parameters. With name-value
144 * parameters the values are optional and names may be repeated.
145 *
146 * <h4>Fragment</h4>
147 *
148 * <p>The fragment is optional: it can be null, empty, or non-empty. Unlike host, port, path, and
149 * query the fragment is not sent to the webserver: it's private to the client.
150 *
151 * <h3>Encoding</h3>
152 *
153 * <p>Each component must be encoded before it is embedded in the complete URL. As we saw above, the
154 * string {@code cute #puppies} is encoded as {@code cute%20%23puppies} when used as a query
155 * parameter value.
156 *
157 * <h4>Percent encoding</h4>
158 *
159 * <p>Percent encoding replaces a character (like {@code \ud83c\udf69}) with its UTF-8 hex bytes
160 * (like {@code %F0%9F%8D%A9}). This approach works for whitespace characters, control characters,
161 * non-ASCII characters, and characters that already have another meaning in a particular context.
162 *
163 * <p>Percent encoding is used in every URL component except for the hostname. But the set of
164 * characters that need to be encoded is different for each component. For example, the path
165 * component must escape all of its {@code ?} characters, otherwise it could be interpreted as the
166 * start of the URL's query. But within the query and fragment components, the {@code ?} character
167 * doesn't delimit anything and doesn't need to be escaped. <pre> {@code
168 *
169 * HttpUrl url = HttpUrl.parse("http://who-let-the-dogs.out").newBuilder()
170 * .addPathSegment("_Who?_")
171 * .query("_Who?_")
172 * .fragment("_Who?_")
173 * .build();
174 * System.out.println(url);
175 * }</pre>
176 *
177 * This prints: <pre> {@code
178 *
179 * http://who-let-the-dogs.out/_Who%3F_?_Who?_#_Who?_
180 * }</pre>
181 *
182 * When parsing URLs that lack percent encoding where it is required, this class will percent encode
183 * the offending characters.
184 *
185 * <h4>IDNA Mapping and Punycode encoding</h4>
186 *
187 * <p>Hostnames have different requirements and use a different encoding scheme. It consists of IDNA
188 * mapping and Punycode encoding.
189 *
190 * <p>In order to avoid confusion and discourage phishing attacks, <a
191 * href="http://www.unicode.org/reports/tr46/#ToASCII">IDNA Mapping</a> transforms names to avoid
192 * confusing characters. This includes basic case folding: transforming shouting {@code SQUARE.COM}
193 * into cool and casual {@code square.com}. It also handles more exotic characters. For example, the
194 * Unicode trademark sign (™) could be confused for the letters "TM" in {@code http://ho™mail.com}.
195 * To mitigate this, the single character (™) maps to the string (tm). There is similar policy for
196 * all of the 1.1 million Unicode code points. Note that some code points such as "\ud83c\udf69" are
197 * not mapped and cannot be used in a hostname.
198 *
199 * <p><a href="http://ietf.org/rfc/rfc3492.txt">Punycode</a> converts a Unicode string to an ASCII
200 * string to make international domain names work everywhere. For example, "σ" encodes as "xn--4xa".
201 * The encoded string is not human readable, but can be used with classes like {@link InetAddress}
202 * to establish connections.
203 *
204 * <h3>Why another URL model?</h3>
205 *
206 * <p>Java includes both {@link URL java.net.URL} and {@link URI java.net.URI}. We offer a new URL
207 * model to address problems that the others don't.
208 *
209 * <h4>Different URLs should be different</h4>
210 *
211 * <p>Although they have different content, {@code java.net.URL} considers the following two URLs
212 * equal, and the {@link Object#equals equals()} method between them returns true:
213 *
214 * <ul>
215 * <li>http://square.github.io/
216 * <li>http://google.github.io/
217 * </ul>
218 *
219 * This is because those two hosts share the same IP address. This is an old, bad design decision
220 * that makes {@code java.net.URL} unusable for many things. It shouldn't be used as a {@link
221 * java.util.Map Map} key or in a {@link Set}. Doing so is both inefficient because equality may
222 * require a DNS lookup, and incorrect because unequal URLs may be equal because of how they are
223 * hosted.
224 *
225 * <h4>Equal URLs should be equal</h4>
226 *
227 * <p>These two URLs are semantically identical, but {@code java.net.URI} disagrees:
228 *
229 * <ul>
230 * <li>http://host:80/
231 * <li>http://host
232 * </ul>
233 *
234 * Both the unnecessary port specification ({@code :80}) and the absent trailing slash ({@code /})
235 * cause URI to bucket the two URLs separately. This harms URI's usefulness in collections. Any
236 * application that stores information-per-URL will need to either canonicalize manually, or suffer
237 * unnecessary redundancy for such URLs.
238 *
239 * <p>Because they don't attempt canonical form, these classes are surprisingly difficult to use
240 * securely. Suppose you're building a webservice that checks that incoming paths are prefixed
241 * "/static/images/" before serving the corresponding assets from the filesystem. <pre> {@code
242 *
243 * String attack = "http://example.com/static/images/../../../../../etc/passwd";
244 * System.out.println(new URL(attack).getPath());
245 * System.out.println(new URI(attack).getPath());
246 * System.out.println(HttpUrl.parse(attack).encodedPath());
247 * }</pre>
248 *
249 * By canonicalizing the input paths, they are complicit in directory traversal attacks. Code that
250 * checks only the path prefix may suffer!
251 * <pre> {@code
252 *
253 * /static/images/../../../../../etc/passwd
254 * /static/images/../../../../../etc/passwd
255 * /etc/passwd
256 * }</pre>
257 *
258 * <h4>If it works on the web, it should work in your application</h4>
259 *
260 * <p>The {@code java.net.URI} class is strict around what URLs it accepts. It rejects URLs like
261 * "http://example.com/abc|def" because the '|' character is unsupported. This class is more
262 * forgiving: it will automatically percent-encode the '|', yielding "http://example.com/abc%7Cdef".
263 * This kind behavior is consistent with web browsers. {@code HttpUrl} prefers consistency with
264 * major web browsers over consistency with obsolete specifications.
265 *
266 * <h4>Paths and Queries should decompose</h4>
267 *
268 * <p>Neither of the built-in URL models offer direct access to path segments or query parameters.
269 * Manually using {@code StringBuilder} to assemble these components is cumbersome: do '+'
270 * characters get silently replaced with spaces? If a query parameter contains a '&', does that
271 * get escaped? By offering methods to read and write individual query parameters directly,
272 * application developers are saved from the hassles of encoding and decoding.
273 *
274 * <h4>Plus a modern API</h4>
275 *
276 * <p>The URL (JDK1.0) and URI (Java 1.4) classes predate builders and instead use telescoping
277 * constructors. For example, there's no API to compose a URI with a custom port without also
278 * providing a query and fragment.
279 *
280 * <p>Instances of {@link HttpUrl} are well-formed and always have a scheme, host, and path. With
281 * {@code java.net.URL} it's possible to create an awkward URL like {@code http:/} with scheme and
282 * path but no hostname. Building APIs that consume such malformed values is difficult!
283 *
284 * <p>This class has a modern API. It avoids punitive checked exceptions: {@link #get get()}
285 * throws {@link IllegalArgumentException} on invalid input or {@link #parse parse()}
286 * returns null if the input is an invalid URL. You can even be explicit about whether each
287 * component has been encoded already.
288 */
289 public final class HttpUrl {
290 private static final char[] HEX_DIGITS =
291 {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
292 static final String USERNAME_ENCODE_SET = " \"':;<=>@[]^`{}|/\\?#";
293 static final String PASSWORD_ENCODE_SET = " \"':;<=>@[]^`{}|/\\?#";
294 static final String PATH_SEGMENT_ENCODE_SET = " \"<>^`{}|/\\?#";
295 static final String PATH_SEGMENT_ENCODE_SET_URI = "[]";
296 static final String QUERY_ENCODE_SET = " \"'<>#";
297 static final String QUERY_COMPONENT_REENCODE_SET = " \"'<>#&=";
298 static final String QUERY_COMPONENT_ENCODE_SET = " !\"#$&'(),/:;<=>?@[]\\^`{|}~";
299 static final String QUERY_COMPONENT_ENCODE_SET_URI = "\\^`{|}";
300 static final String FORM_ENCODE_SET = " \"':;<=>@[]^`{}|/\\?#&!$(),~";
301 static final String FRAGMENT_ENCODE_SET = "";
302 static final String FRAGMENT_ENCODE_SET_URI = " \"#<>\\^`{|}";
303
304 /** Either "http" or "https". */
305 final String scheme;
306
307 /** Decoded username. */
308 private final String username;
309
310 /** Decoded password. */
311 private final String password;
312
313 /** Canonical hostname. */
314 final String host;
315
316 /** Either 80, 443 or a user-specified port. In range [1..65535]. */
317 final int port;
318
319 /**
320 * A list of canonical path segments. This list always contains at least one element, which may be
321 * the empty string. Each segment is formatted with a leading '/', so if path segments were ["a",
322 * "b", ""], then the encoded path would be "/a/b/".
323 */
324 private final List<String> pathSegments;
325
326 /**
327 * Alternating, decoded query names and values, or null for no query. Names may be empty or
328 * non-empty, but never null. Values are null if the name has no corresponding '=' separator, or
329 * empty, or non-empty.
330 */
331 private final @Nullable List<String> queryNamesAndValues;
332
333 /** Decoded fragment. */
334 private final @Nullable String fragment;
335
336 /** Canonical URL. */
337 private final String url;
338
339 HttpUrl(Builder builder) {
340 this.scheme = builder.scheme;
341 this.username = percentDecode(builder.encodedUsername, false);
342 this.password = percentDecode(builder.encodedPassword, false);
343 this.host = builder.host;
344 this.port = builder.effectivePort();
345 this.pathSegments = percentDecode(builder.encodedPathSegments, false);
346 this.queryNamesAndValues = builder.encodedQueryNamesAndValues != null
347 ? percentDecode(builder.encodedQueryNamesAndValues, true)
348 : null;
349 this.fragment = builder.encodedFragment != null
350 ? percentDecode(builder.encodedFragment, false)
351 : null;
352 this.url = builder.toString();
353 }
354
355 /** Returns this URL as a {@link URL java.net.URL}. */
356 public URL url() {
357 try {
358 return new URL(url);
359 } catch (MalformedURLException e) {
360 throw new RuntimeException(e); // Unexpected!
361 }
362 }
363
364 /**
365 * Returns this URL as a {@link URI java.net.URI}. Because {@code URI} is more strict than this
366 * class, the returned URI may be semantically different from this URL:
367 *
368 * <ul>
369 * <li>Characters forbidden by URI like {@code [} and {@code |} will be escaped.
370 * <li>Invalid percent-encoded sequences like {@code %xx} will be encoded like {@code %25xx}.
371 * <li>Whitespace and control characters in the fragment will be stripped.
372 * </ul>
373 *
374 * <p>These differences may have a significant consequence when the URI is interpreted by a
375 * webserver. For this reason the {@linkplain URI URI class} and this method should be avoided.
376 */
377 public URI uri() {
378 String uri = newBuilder().reencodeForUri().toString();
379 try {
380 return new URI(uri);
381 } catch (URISyntaxException e) {
382 // Unlikely edge case: the URI has a forbidden character in the fragment. Strip it & retry.
383 try {
384 String stripped = uri.replaceAll("[\\u0000-\\u001F\\u007F-\\u009F\\p{javaWhitespace}]", "");
385 return URI.create(stripped);
386 } catch (Exception e1) {
387 throw new RuntimeException(e); // Unexpected!
388 }
389 }
390 }
391
392 /** Returns either "http" or "https". */
393 public String scheme() {
394 return scheme;
395 }
396
397 public boolean isHttps() {
398 return scheme.equals("https");
399 }
400
401 /**
402 * Returns the username, or an empty string if none is set.
403 *
404 * <p><table summary="">
405 * <tr><th>URL</th><th>{@code encodedUsername()}</th></tr>
406 * <tr><td>{@code http://host/}</td><td>{@code ""}</td></tr>
407 * <tr><td>{@code http://username@host/}</td><td>{@code "username"}</td></tr>
408 * <tr><td>{@code http://username:password@host/}</td><td>{@code "username"}</td></tr>
409 * <tr><td>{@code http://a%20b:c%20d@host/}</td><td>{@code "a%20b"}</td></tr>
410 * </table>
411 */
412 public String encodedUsername() {
413 if (username.isEmpty()) return "";
414 int usernameStart = scheme.length() + 3; // "://".length() == 3.
415 int usernameEnd = delimiterOffset(url, usernameStart, url.length(), ":@");
416 return url.substring(usernameStart, usernameEnd);
417 }
418
419 /**
420 * Returns the decoded username, or an empty string if none is present.
421 *
422 * <p><table summary="">
423 * <tr><th>URL</th><th>{@code username()}</th></tr>
424 * <tr><td>{@code http://host/}</td><td>{@code ""}</td></tr>
425 * <tr><td>{@code http://username@host/}</td><td>{@code "username"}</td></tr>
426 * <tr><td>{@code http://username:password@host/}</td><td>{@code "username"}</td></tr>
427 * <tr><td>{@code http://a%20b:c%20d@host/}</td><td>{@code "a b"}</td></tr>
428 * </table>
429 */
430 public String username() {
431 return username;
432 }
433
434 /**
435 * Returns the password, or an empty string if none is set.
436 *
437 * <p><table summary="">
438 * <tr><th>URL</th><th>{@code encodedPassword()}</th></tr>
439 * <tr><td>{@code http://host/}</td><td>{@code ""}</td></tr>
440 * <tr><td>{@code http://username@host/}</td><td>{@code ""}</td></tr>
441 * <tr><td>{@code http://username:password@host/}</td><td>{@code "password"}</td></tr>
442 * <tr><td>{@code http://a%20b:c%20d@host/}</td><td>{@code "c%20d"}</td></tr>
443 * </table>
444 */
445 public String encodedPassword() {
446 if (password.isEmpty()) return "";
447 int passwordStart = url.indexOf(':', scheme.length() + 3) + 1;
448 int passwordEnd = url.indexOf('@');
449 return url.substring(passwordStart, passwordEnd);
450 }
451
452 /**
453 * Returns the decoded password, or an empty string if none is present.
454 *
455 * <p><table summary="">
456 * <tr><th>URL</th><th>{@code password()}</th></tr>
457 * <tr><td>{@code http://host/}</td><td>{@code ""}</td></tr>
458 * <tr><td>{@code http://username@host/}</td><td>{@code ""}</td></tr>
459 * <tr><td>{@code http://username:password@host/}</td><td>{@code "password"}</td></tr>
460 * <tr><td>{@code http://a%20b:c%20d@host/}</td><td>{@code "c d"}</td></tr>
461 * </table>
462 */
463 public String password() {
464 return password;
465 }
466
467 /**
468 * Returns the host address suitable for use with {@link InetAddress#getAllByName(String)}. May
469 * be:
470 *
471 * <ul>
472 * <li>A regular host name, like {@code android.com}.
473 * <li>An IPv4 address, like {@code 127.0.0.1}.
474 * <li>An IPv6 address, like {@code ::1}. Note that there are no square braces.
475 * <li>An encoded IDN, like {@code xn--n3h.net}.
476 * </ul>
477 *
478 * <p><table summary="">
479 * <tr><th>URL</th><th>{@code host()}</th></tr>
480 * <tr><td>{@code http://android.com/}</td><td>{@code "android.com"}</td></tr>
481 * <tr><td>{@code http://127.0.0.1/}</td><td>{@code "127.0.0.1"}</td></tr>
482 * <tr><td>{@code http://[::1]/}</td><td>{@code "::1"}</td></tr>
483 * <tr><td>{@code http://xn--n3h.net/}</td><td>{@code "xn--n3h.net"}</td></tr>
484 * </table>
485 */
486 public String host() {
487 return host;
488 }
489
490 /**
491 * Returns the explicitly-specified port if one was provided, or the default port for this URL's
492 * scheme. For example, this returns 8443 for {@code https://square.com:8443/} and 443 for {@code
493 * https://square.com/}. The result is in {@code [1..65535]}.
494 *
495 * <p><table summary="">
496 * <tr><th>URL</th><th>{@code port()}</th></tr>
497 * <tr><td>{@code http://host/}</td><td>{@code 80}</td></tr>
498 * <tr><td>{@code http://host:8000/}</td><td>{@code 8000}</td></tr>
499 * <tr><td>{@code https://host/}</td><td>{@code 443}</td></tr>
500 * </table>
501 */
502 public int port() {
503 return port;
504 }
505
506 /**
507 * Returns 80 if {@code scheme.equals("http")}, 443 if {@code scheme.equals("https")} and -1
508 * otherwise.
509 */
510 public static int defaultPort(String scheme) {
511 if (scheme.equals("http")) {
512 return 80;
513 } else if (scheme.equals("https")) {
514 return 443;
515 } else {
516 return -1;
517 }
518 }
519
520 /**
521 * Returns the number of segments in this URL's path. This is also the number of slashes in the
522 * URL's path, like 3 in {@code http://host/a/b/c}. This is always at least 1.
523 *
524 * <p><table summary="">
525 * <tr><th>URL</th><th>{@code pathSize()}</th></tr>
526 * <tr><td>{@code http://host/}</td><td>{@code 1}</td></tr>
527 * <tr><td>{@code http://host/a/b/c}</td><td>{@code 3}</td></tr>
528 * <tr><td>{@code http://host/a/b/c/}</td><td>{@code 4}</td></tr>
529 * </table>
530 */
531 public int pathSize() {
532 return pathSegments.size();
533 }
534
535 /**
536 * Returns the entire path of this URL encoded for use in HTTP resource resolution. The returned
537 * path will start with {@code "/"}.
538 *
539 * <p><table summary="">
540 * <tr><th>URL</th><th>{@code encodedPath()}</th></tr>
541 * <tr><td>{@code http://host/}</td><td>{@code "/"}</td></tr>
542 * <tr><td>{@code http://host/a/b/c}</td><td>{@code "/a/b/c"}</td></tr>
543 * <tr><td>{@code http://host/a/b%20c/d}</td><td>{@code "/a/b%20c/d"}</td></tr>
544 * </table>
545 */
546 public String encodedPath() {
547 int pathStart = url.indexOf('/', scheme.length() + 3); // "://".length() == 3.
548 int pathEnd = delimiterOffset(url, pathStart, url.length(), "?#");
549 return url.substring(pathStart, pathEnd);
550 }
551
552 static void pathSegmentsToString(StringBuilder out, List<String> pathSegments) {
553 for (int i = 0, size = pathSegments.size(); i < size; i++) {
554 out.append('/');
555 out.append(pathSegments.get(i));
556 }
557 }
558
559 /**
560 * Returns a list of encoded path segments like {@code ["a", "b", "c"]} for the URL {@code
561 * http://host/a/b/c}. This list is never empty though it may contain a single empty string.
562 *
563 * <p><table summary="">
564 * <tr><th>URL</th><th>{@code encodedPathSegments()}</th></tr>
565 * <tr><td>{@code http://host/}</td><td>{@code [""]}</td></tr>
566 * <tr><td>{@code http://host/a/b/c}</td><td>{@code ["a", "b", "c"]}</td></tr>
567 * <tr><td>{@code http://host/a/b%20c/d}</td><td>{@code ["a", "b%20c", "d"]}</td></tr>
568 * </table>
569 */
570 public List<String> encodedPathSegments() {
571 int pathStart = url.indexOf('/', scheme.length() + 3);
572 int pathEnd = delimiterOffset(url, pathStart, url.length(), "?#");
573 List<String> result = new ArrayList<>();
574 for (int i = pathStart; i < pathEnd; ) {
575 i++; // Skip the '/'.
576 int segmentEnd = delimiterOffset(url, i, pathEnd, '/');
577 result.add(url.substring(i, segmentEnd));
578 i = segmentEnd;
579 }
580 return result;
581 }
582
583 /**
584 * Returns a list of path segments like {@code ["a", "b", "c"]} for the URL {@code
585 * http://host/a/b/c}. This list is never empty though it may contain a single empty string.
586 *
587 * <p><table summary="">
588 * <tr><th>URL</th><th>{@code pathSegments()}</th></tr>
589 * <tr><td>{@code http://host/}</td><td>{@code [""]}</td></tr>
590 * <tr><td>{@code http://host/a/b/c"}</td><td>{@code ["a", "b", "c"]}</td></tr>
591 * <tr><td>{@code http://host/a/b%20c/d"}</td><td>{@code ["a", "b c", "d"]}</td></tr>
592 * </table>
593 */
594 public List<String> pathSegments() {
595 return pathSegments;
596 }
597
598 /**
599 * Returns the query of this URL, encoded for use in HTTP resource resolution. The returned string
600 * may be null (for URLs with no query), empty (for URLs with an empty query) or non-empty (all
601 * other URLs).
602 *
603 * <p><table summary="">
604 * <tr><th>URL</th><th>{@code encodedQuery()}</th></tr>
605 * <tr><td>{@code http://host/}</td><td>null</td></tr>
606 * <tr><td>{@code http://host/?}</td><td>{@code ""}</td></tr>
607 * <tr><td>{@code http://host/?a=apple&k=key+lime}</td><td>{@code
608 * "a=apple&k=key+lime"}</td></tr>
609 * <tr><td>{@code http://host/?a=apple&a=apricot}</td><td>{@code "a=apple&a=apricot"}</td></tr>
610 * <tr><td>{@code http://host/?a=apple&b}</td><td>{@code "a=apple&b"}</td></tr>
611 * </table>
612 */
613 public @Nullable String encodedQuery() {
614 if (queryNamesAndValues == null) return null; // No query.
615 int queryStart = url.indexOf('?') + 1;
616 int queryEnd = delimiterOffset(url, queryStart, url.length(), '#');
617 return url.substring(queryStart, queryEnd);
618 }
619
620 static void namesAndValuesToQueryString(StringBuilder out, List<String> namesAndValues) {
621 for (int i = 0, size = namesAndValues.size(); i < size; i += 2) {
622 String name = namesAndValues.get(i);
623 String value = namesAndValues.get(i + 1);
624 if (i > 0) out.append('&');
625 out.append(name);
626 if (value != null) {
627 out.append('=');
628 out.append(value);
629 }
630 }
631 }
632
633 /**
634 * Cuts {@code encodedQuery} up into alternating parameter names and values. This divides a query
635 * string like {@code subject=math&easy&problem=5-2=3} into the list {@code ["subject", "math",
636 * "easy", null, "problem", "5-2=3"]}. Note that values may be null and may contain '='
637 * characters.
638 */
639 static List<String> queryStringToNamesAndValues(String encodedQuery) {
640 List<String> result = new ArrayList<>();
641 for (int pos = 0; pos <= encodedQuery.length(); ) {
642 int ampersandOffset = encodedQuery.indexOf('&', pos);
643 if (ampersandOffset == -1) ampersandOffset = encodedQuery.length();
644
645 int equalsOffset = encodedQuery.indexOf('=', pos);
646 if (equalsOffset == -1 || equalsOffset > ampersandOffset) {
647 result.add(encodedQuery.substring(pos, ampersandOffset));
648 result.add(null); // No value for this name.
649 } else {
650 result.add(encodedQuery.substring(pos, equalsOffset));
651 result.add(encodedQuery.substring(equalsOffset + 1, ampersandOffset));
652 }
653 pos = ampersandOffset + 1;
654 }
655 return result;
656 }
657
658 /**
659 * Returns this URL's query, like {@code "abc"} for {@code http://host/?abc}. Most callers should
660 * prefer {@link #queryParameterName} and {@link #queryParameterValue} because these methods offer
661 * direct access to individual query parameters.
662 *
663 * <p><table summary="">
664 * <tr><th>URL</th><th>{@code query()}</th></tr>
665 * <tr><td>{@code http://host/}</td><td>null</td></tr>
666 * <tr><td>{@code http://host/?}</td><td>{@code ""}</td></tr>
667 * <tr><td>{@code http://host/?a=apple&k=key+lime}</td><td>{@code "a=apple&k=key
668 * lime"}</td></tr>
669 * <tr><td>{@code http://host/?a=apple&a=apricot}</td><td>{@code "a=apple&a=apricot"}</td></tr>
670 * <tr><td>{@code http://host/?a=apple&b}</td><td>{@code "a=apple&b"}</td></tr>
671 * </table>
672 */
673 public @Nullable String query() {
674 if (queryNamesAndValues == null) return null; // No query.
675 StringBuilder result = new StringBuilder();
676 namesAndValuesToQueryString(result, queryNamesAndValues);
677 return result.toString();
678 }
679
680 /**
681 * Returns the number of query parameters in this URL, like 2 for {@code
682 * http://host/?a=apple&b=banana}. If this URL has no query this returns 0. Otherwise it returns
683 * one more than the number of {@code "&"} separators in the query.
684 *
685 * <p><table summary="">
686 * <tr><th>URL</th><th>{@code querySize()}</th></tr>
687 * <tr><td>{@code http://host/}</td><td>{@code 0}</td></tr>
688 * <tr><td>{@code http://host/?}</td><td>{@code 1}</td></tr>
689 * <tr><td>{@code http://host/?a=apple&k=key+lime}</td><td>{@code 2}</td></tr>
690 * <tr><td>{@code http://host/?a=apple&a=apricot}</td><td>{@code 2}</td></tr>
691 * <tr><td>{@code http://host/?a=apple&b}</td><td>{@code 2}</td></tr>
692 * </table>
693 */
694 public int querySize() {
695 return queryNamesAndValues != null ? queryNamesAndValues.size() / 2 : 0;
696 }
697
698 /**
699 * Returns the first query parameter named {@code name} decoded using UTF-8, or null if there is
700 * no such query parameter.
701 *
702 * <p><table summary="">
703 * <tr><th>URL</th><th>{@code queryParameter("a")}</th></tr>
704 * <tr><td>{@code http://host/}</td><td>null</td></tr>
705 * <tr><td>{@code http://host/?}</td><td>null</td></tr>
706 * <tr><td>{@code http://host/?a=apple&k=key+lime}</td><td>{@code "apple"}</td></tr>
707 * <tr><td>{@code http://host/?a=apple&a=apricot}</td><td>{@code "apple"}</td></tr>
708 * <tr><td>{@code http://host/?a=apple&b}</td><td>{@code "apple"}</td></tr>
709 * </table>
710 */
711 public @Nullable String queryParameter(String name) {
712 if (queryNamesAndValues == null) return null;
713 for (int i = 0, size = queryNamesAndValues.size(); i < size; i += 2) {
714 if (name.equals(queryNamesAndValues.get(i))) {
715 return queryNamesAndValues.get(i + 1);
716 }
717 }
718 return null;
719 }
720
721 /**
722 * Returns the distinct query parameter names in this URL, like {@code ["a", "b"]} for {@code
723 * http://host/?a=apple&b=banana}. If this URL has no query this returns the empty set.
724 *
725 * <p><table summary="">
726 * <tr><th>URL</th><th>{@code queryParameterNames()}</th></tr>
727 * <tr><td>{@code http://host/}</td><td>{@code []}</td></tr>
728 * <tr><td>{@code http://host/?}</td><td>{@code [""]}</td></tr>
729 * <tr><td>{@code http://host/?a=apple&k=key+lime}</td><td>{@code ["a", "k"]}</td></tr>
730 * <tr><td>{@code http://host/?a=apple&a=apricot}</td><td>{@code ["a"]}</td></tr>
731 * <tr><td>{@code http://host/?a=apple&b}</td><td>{@code ["a", "b"]}</td></tr>
732 * </table>
733 */
734 public Set<String> queryParameterNames() {
735 if (queryNamesAndValues == null) return Collections.emptySet();
736 Set<String> result = new LinkedHashSet<>();
737 for (int i = 0, size = queryNamesAndValues.size(); i < size; i += 2) {
738 result.add(queryNamesAndValues.get(i));
739 }
740 return Collections.unmodifiableSet(result);
741 }
742
743 /**
744 * Returns all values for the query parameter {@code name} ordered by their appearance in this
745 * URL. For example this returns {@code ["banana"]} for {@code queryParameterValue("b")} on {@code
746 * http://host/?a=apple&b=banana}.
747 *
748 * <p><table summary="">
749 * <tr><th>URL</th><th>{@code queryParameterValues("a")}</th><th>{@code
750 * queryParameterValues("b")}</th></tr>
751 * <tr><td>{@code http://host/}</td><td>{@code []}</td><td>{@code []}</td></tr>
752 * <tr><td>{@code http://host/?}</td><td>{@code []}</td><td>{@code []}</td></tr>
753 * <tr><td>{@code http://host/?a=apple&k=key+lime}</td><td>{@code ["apple"]}</td><td>{@code
754 * []}</td></tr>
755 * <tr><td>{@code http://host/?a=apple&a=apricot}</td><td>{@code ["apple",
756 * "apricot"]}</td><td>{@code []}</td></tr>
757 * <tr><td>{@code http://host/?a=apple&b}</td><td>{@code ["apple"]}</td><td>{@code
758 * [null]}</td></tr>
759 * </table>
760 */
761 public List<String> queryParameterValues(String name) {
762 if (queryNamesAndValues == null) return Collections.emptyList();
763 List<String> result = new ArrayList<>();
764 for (int i = 0, size = queryNamesAndValues.size(); i < size; i += 2) {
765 if (name.equals(queryNamesAndValues.get(i))) {
766 result.add(queryNamesAndValues.get(i + 1));
767 }
768 }
769 return Collections.unmodifiableList(result);
770 }
771
772 /**
773 * Returns the name of the query parameter at {@code index}. For example this returns {@code "a"}
774 * for {@code queryParameterName(0)} on {@code http://host/?a=apple&b=banana}. This throws if
775 * {@code index} is not less than the {@linkplain #querySize query size}.
776 *
777 * <p><table summary="">
778 * <tr><th>URL</th><th>{@code queryParameterName(0)}</th><th>{@code
779 * queryParameterName(1)}</th></tr>
780 * <tr><td>{@code http://host/}</td><td>exception</td><td>exception</td></tr>
781 * <tr><td>{@code http://host/?}</td><td>{@code ""}</td><td>exception</td></tr>
782 * <tr><td>{@code http://host/?a=apple&k=key+lime}</td><td>{@code "a"}</td><td>{@code
783 * "k"}</td></tr>
784 * <tr><td>{@code http://host/?a=apple&a=apricot}</td><td>{@code "a"}</td><td>{@code
785 * "a"}</td></tr>
786 * <tr><td>{@code http://host/?a=apple&b}</td><td>{@code "a"}</td><td>{@code "b"}</td></tr>
787 * </table>
788 */
789 public String queryParameterName(int index) {
790 if (queryNamesAndValues == null) throw new IndexOutOfBoundsException();
791 return queryNamesAndValues.get(index * 2);
792 }
793
794 /**
795 * Returns the value of the query parameter at {@code index}. For example this returns {@code
796 * "apple"} for {@code queryParameterName(0)} on {@code http://host/?a=apple&b=banana}. This
797 * throws if {@code index} is not less than the {@linkplain #querySize query size}.
798 *
799 * <p><table summary="">
800 * <tr><th>URL</th><th>{@code queryParameterValue(0)}</th><th>{@code
801 * queryParameterValue(1)}</th></tr>
802 * <tr><td>{@code http://host/}</td><td>exception</td><td>exception</td></tr>
803 * <tr><td>{@code http://host/?}</td><td>null</td><td>exception</td></tr>
804 * <tr><td>{@code http://host/?a=apple&k=key+lime}</td><td>{@code "apple"}</td><td>{@code
805 * "key lime"}</td></tr>
806 * <tr><td>{@code http://host/?a=apple&a=apricot}</td><td>{@code "apple"}</td><td>{@code
807 * "apricot"}</td></tr>
808 * <tr><td>{@code http://host/?a=apple&b}</td><td>{@code "apple"}</td><td>null</td></tr>
809 * </table>
810 */
811 public String queryParameterValue(int index) {
812 if (queryNamesAndValues == null) throw new IndexOutOfBoundsException();
813 return queryNamesAndValues.get(index * 2 + 1);
814 }
815
816 /**
817 * Returns this URL's encoded fragment, like {@code "abc"} for {@code http://host/#abc}. This
818 * returns null if the URL has no fragment.
819 *
820 * <p><table summary="">
821 * <tr><th>URL</th><th>{@code encodedFragment()}</th></tr>
822 * <tr><td>{@code http://host/}</td><td>null</td></tr>
823 * <tr><td>{@code http://host/#}</td><td>{@code ""}</td></tr>
824 * <tr><td>{@code http://host/#abc}</td><td>{@code "abc"}</td></tr>
825 * <tr><td>{@code http://host/#abc|def}</td><td>{@code "abc|def"}</td></tr>
826 * </table>
827 */
828 public @Nullable String encodedFragment() {
829 if (fragment == null) return null;
830 int fragmentStart = url.indexOf('#') + 1;
831 return url.substring(fragmentStart);
832 }
833
834 /**
835 * Returns this URL's fragment, like {@code "abc"} for {@code http://host/#abc}. This returns null
836 * if the URL has no fragment.
837 *
838 * <p><table summary="">
839 * <tr><th>URL</th><th>{@code fragment()}</th></tr>
840 * <tr><td>{@code http://host/}</td><td>null</td></tr>
841 * <tr><td>{@code http://host/#}</td><td>{@code ""}</td></tr>
842 * <tr><td>{@code http://host/#abc}</td><td>{@code "abc"}</td></tr>
843 * <tr><td>{@code http://host/#abc|def}</td><td>{@code "abc|def"}</td></tr>
844 * </table>
845 */
846 public @Nullable String fragment() {
847 return fragment;
848 }
849
850 /**
851 * Returns a string with containing this URL with its username, password, query, and fragment
852 * stripped, and its path replaced with {@code /...}. For example, redacting {@code
853 * http://username:password@example.com/path} returns {@code http://example.com/...}.
854 */
855 public String redact() {
856 return newBuilder("/...")
857 .username("")
858 .password("")
859 .build()
860 .toString();
861 }
862
863 /**
864 * Returns the URL that would be retrieved by following {@code link} from this URL, or null if
865 * the resulting URL is not well-formed.
866 */
867 public @Nullable HttpUrl resolve(String link) {
868 Builder builder = newBuilder(link);
869 return builder != null ? builder.build() : null;
870 }
871
872 public Builder newBuilder() {
873 Builder result = new Builder();
874 result.scheme = scheme;
875 result.encodedUsername = encodedUsername();
876 result.encodedPassword = encodedPassword();
877 result.host = host;
878 // If we're set to a default port, unset it in case of a scheme change.
879 result.port = port != defaultPort(scheme) ? port : -1;
880 result.encodedPathSegments.clear();
881 result.encodedPathSegments.addAll(encodedPathSegments());
882 result.encodedQuery(encodedQuery());
883 result.encodedFragment = encodedFragment();
884 return result;
885 }
886
887 /**
888 * Returns a builder for the URL that would be retrieved by following {@code link} from this URL,
889 * or null if the resulting URL is not well-formed.
890 */
891 public @Nullable Builder newBuilder(String link) {
892 try {
893 return new Builder().parse(this, link);
894 } catch (IllegalArgumentException ignored) {
895 return null;
896 }
897 }
898
899 /**
900 * Returns a new {@code HttpUrl} representing {@code url} if it is a well-formed HTTP or HTTPS
901 * URL, or null if it isn't.
902 */
903 public static @Nullable HttpUrl parse(String url) {
904 try {
905 return get(url);
906 } catch (IllegalArgumentException ignored) {
907 return null;
908 }
909 }
910
911 /**
912 * Returns a new {@code HttpUrl} representing {@code url}.
913 *
914 * @throws IllegalArgumentException If {@code url} is not a well-formed HTTP or HTTPS URL.
915 */
916 public static HttpUrl get(String url) {
917 return new Builder().parse(null, url).build();
918 }
919
920 /**
921 * Returns an {@link HttpUrl} for {@code url} if its protocol is {@code http} or {@code https}, or
922 * null if it has any other protocol.
923 */
924 public static @Nullable HttpUrl get(URL url) {
925 return parse(url.toString());
926 }
927
928 public static @Nullable HttpUrl get(URI uri) {
929 return parse(uri.toString());
930 }
931
932 @Override public boolean equals(@Nullable Object other) {
933 return other instanceof HttpUrl && ((HttpUrl) other).url.equals(url);
934 }
935
936 @Override public int hashCode() {
937 return url.hashCode();
938 }
939
940 @Override public String toString() {
941 return url;
942 }
943
944 /**
945 * Returns the domain name of this URL's {@link #host()} that is one level beneath the public
946 * suffix by consulting the <a href="https://publicsuffix.org">public suffix list</a>. Returns
947 * null if this URL's {@link #host()} is an IP address or is considered a public suffix by the
948 * public suffix list.
949 *
950 * <p>In general this method <strong>should not</strong> be used to test whether a domain is valid
951 * or routable. Instead, DNS is the recommended source for that information.
952 *
953 * <p><table summary="">
954 * <tr><th>URL</th><th>{@code topPrivateDomain()}</th></tr>
955 * <tr><td>{@code http://google.com}</td><td>{@code "google.com"}</td></tr>
956 * <tr><td>{@code http://adwords.google.co.uk}</td><td>{@code "google.co.uk"}</td></tr>
957 * <tr><td>{@code http://square}</td><td>null</td></tr>
958 * <tr><td>{@code http://co.uk}</td><td>null</td></tr>
959 * <tr><td>{@code http://localhost}</td><td>null</td></tr>
960 * <tr><td>{@code http://127.0.0.1}</td><td>null</td></tr>
961 * </table>
962 */
963 public @Nullable String topPrivateDomain() {
964 if (verifyAsIpAddress(host)) return null;
965 return PublicSuffixDatabase.get().getEffectiveTldPlusOne(host);
966 }
967
968 public static final class Builder {
969 @Nullable String scheme;
970 String encodedUsername = "";
971 String encodedPassword = "";
972 @Nullable String host;
973 int port = -1;
974 final List<String> encodedPathSegments = new ArrayList<>();
975 @Nullable List<String> encodedQueryNamesAndValues;
976 @Nullable String encodedFragment;
977
978 public Builder() {
979 encodedPathSegments.add(""); // The default path is '/' which needs a trailing space.
980 }
981
982 public Builder scheme(String scheme) {
983 if (scheme == null) {
984 throw new NullPointerException("scheme == null");
985 } else if (scheme.equalsIgnoreCase("http")) {
986 this.scheme = "http";
987 } else if (scheme.equalsIgnoreCase("https")) {
988 this.scheme = "https";
989 } else {
990 throw new IllegalArgumentException("unexpected scheme: " + scheme);
991 }
992 return this;
993 }
994
995 public Builder username(String username) {
996 if (username == null) throw new NullPointerException("username == null");
997 this.encodedUsername = canonicalize(username, USERNAME_ENCODE_SET, false, false, false, true);
998 return this;
999 }
1000
1001 public Builder encodedUsername(String encodedUsername) {
1002 if (encodedUsername == null) throw new NullPointerException("encodedUsername == null");
1003 this.encodedUsername = canonicalize(
1004 encodedUsername, USERNAME_ENCODE_SET, true, false, false, true);
1005 return this;
1006 }
1007
1008 public Builder password(String password) {
1009 if (password == null) throw new NullPointerException("password == null");
1010 this.encodedPassword = canonicalize(password, PASSWORD_ENCODE_SET, false, false, false, true);
1011 return this;
1012 }
1013
1014 public Builder encodedPassword(String encodedPassword) {
1015 if (encodedPassword == null) throw new NullPointerException("encodedPassword == null");
1016 this.encodedPassword = canonicalize(
1017 encodedPassword, PASSWORD_ENCODE_SET, true, false, false, true);
1018 return this;
1019 }
1020
1021 /**
1022 * @param host either a regular hostname, International Domain Name, IPv4 address, or IPv6
1023 * address.
1024 */
1025 public Builder host(String host) {
1026 if (host == null) throw new NullPointerException("host == null");
1027 String encoded = canonicalizeHost(host, 0, host.length());
1028 if (encoded == null) throw new IllegalArgumentException("unexpected host: " + host);
1029 this.host = encoded;
1030 return this;
1031 }
1032
1033 public Builder port(int port) {
1034 if (port <= 0 || port > 65535) throw new IllegalArgumentException("unexpected port: " + port);
1035 this.port = port;
1036 return this;
1037 }
1038
1039 int effectivePort() {
1040 return port != -1 ? port : defaultPort(scheme);
1041 }
1042
1043 public Builder addPathSegment(String pathSegment) {
1044 if (pathSegment == null) throw new NullPointerException("pathSegment == null");
1045 push(pathSegment, 0, pathSegment.length(), false, false);
1046 return this;
1047 }
1048
1049 /**
1050 * Adds a set of path segments separated by a slash (either {@code \} or {@code /}). If
1051 * {@code pathSegments} starts with a slash, the resulting URL will have empty path segment.
1052 */
1053 public Builder addPathSegments(String pathSegments) {
1054 if (pathSegments == null) throw new NullPointerException("pathSegments == null");
1055 return addPathSegments(pathSegments, false);
1056 }
1057
1058 public Builder addEncodedPathSegment(String encodedPathSegment) {
1059 if (encodedPathSegment == null) {
1060 throw new NullPointerException("encodedPathSegment == null");
1061 }
1062 push(encodedPathSegment, 0, encodedPathSegment.length(), false, true);
1063 return this;
1064 }
1065
1066 /**
1067 * Adds a set of encoded path segments separated by a slash (either {@code \} or {@code /}). If
1068 * {@code encodedPathSegments} starts with a slash, the resulting URL will have empty path
1069 * segment.
1070 */
1071 public Builder addEncodedPathSegments(String encodedPathSegments) {
1072 if (encodedPathSegments == null) {
1073 throw new NullPointerException("encodedPathSegments == null");
1074 }
1075 return addPathSegments(encodedPathSegments, true);
1076 }
1077
1078 private Builder addPathSegments(String pathSegments, boolean alreadyEncoded) {
1079 int offset = 0;
1080 do {
1081 int segmentEnd = delimiterOffset(pathSegments, offset, pathSegments.length(), "/\\");
1082 boolean addTrailingSlash = segmentEnd < pathSegments.length();
1083 push(pathSegments, offset, segmentEnd, addTrailingSlash, alreadyEncoded);
1084 offset = segmentEnd + 1;
1085 } while (offset <= pathSegments.length());
1086 return this;
1087 }
1088
1089 public Builder setPathSegment(int index, String pathSegment) {
1090 if (pathSegment == null) throw new NullPointerException("pathSegment == null");
1091 String canonicalPathSegment = canonicalize(pathSegment, 0, pathSegment.length(),
1092 PATH_SEGMENT_ENCODE_SET, false, false, false, true, null);
1093 if (isDot(canonicalPathSegment) || isDotDot(canonicalPathSegment)) {
1094 throw new IllegalArgumentException("unexpected path segment: " + pathSegment);
1095 }
1096 encodedPathSegments.set(index, canonicalPathSegment);
1097 return this;
1098 }
1099
1100 public Builder setEncodedPathSegment(int index, String encodedPathSegment) {
1101 if (encodedPathSegment == null) {
1102 throw new NullPointerException("encodedPathSegment == null");
1103 }
1104 String canonicalPathSegment = canonicalize(encodedPathSegment, 0, encodedPathSegment.length(),
1105 PATH_SEGMENT_ENCODE_SET, true, false, false, true, null);
1106 encodedPathSegments.set(index, canonicalPathSegment);
1107 if (isDot(canonicalPathSegment) || isDotDot(canonicalPathSegment)) {
1108 throw new IllegalArgumentException("unexpected path segment: " + encodedPathSegment);
1109 }
1110 return this;
1111 }
1112
1113 public Builder removePathSegment(int index) {
1114 encodedPathSegments.remove(index);
1115 if (encodedPathSegments.isEmpty()) {
1116 encodedPathSegments.add(""); // Always leave at least one '/'.
1117 }
1118 return this;
1119 }
1120
1121 public Builder encodedPath(String encodedPath) {
1122 if (encodedPath == null) throw new NullPointerException("encodedPath == null");
1123 if (!encodedPath.startsWith("/")) {
1124 throw new IllegalArgumentException("unexpected encodedPath: " + encodedPath);
1125 }
1126 resolvePath(encodedPath, 0, encodedPath.length());
1127 return this;
1128 }
1129
1130 public Builder query(@Nullable String query) {
1131 this.encodedQueryNamesAndValues = query != null
1132 ? queryStringToNamesAndValues(canonicalize(
1133 query, QUERY_ENCODE_SET, false, false, true, true))
1134 : null;
1135 return this;
1136 }
1137
1138 public Builder encodedQuery(@Nullable String encodedQuery) {
1139 this.encodedQueryNamesAndValues = encodedQuery != null
1140 ? queryStringToNamesAndValues(
1141 canonicalize(encodedQuery, QUERY_ENCODE_SET, true, false, true, true))
1142 : null;
1143 return this;
1144 }
1145
1146 /** Encodes the query parameter using UTF-8 and adds it to this URL's query string. */
1147 public Builder addQueryParameter(String name, @Nullable String value) {
1148 if (name == null) throw new NullPointerException("name == null");
1149 if (encodedQueryNamesAndValues == null) encodedQueryNamesAndValues = new ArrayList<>();
1150 encodedQueryNamesAndValues.add(
1151 canonicalize(name, QUERY_COMPONENT_ENCODE_SET, false, false, true, true));
1152 encodedQueryNamesAndValues.add(value != null
1153 ? canonicalize(value, QUERY_COMPONENT_ENCODE_SET, false, false, true, true)
1154 : null);
1155 return this;
1156 }
1157
1158 /** Adds the pre-encoded query parameter to this URL's query string. */
1159 public Builder addEncodedQueryParameter(String encodedName, @Nullable String encodedValue) {
1160 if (encodedName == null) throw new NullPointerException("encodedName == null");
1161 if (encodedQueryNamesAndValues == null) encodedQueryNamesAndValues = new ArrayList<>();
1162 encodedQueryNamesAndValues.add(
1163 canonicalize(encodedName, QUERY_COMPONENT_REENCODE_SET, true, false, true, true));
1164 encodedQueryNamesAndValues.add(encodedValue != null
1165 ? canonicalize(encodedValue, QUERY_COMPONENT_REENCODE_SET, true, false, true, true)
1166 : null);
1167 return this;
1168 }
1169
1170 public Builder setQueryParameter(String name, @Nullable String value) {
1171 removeAllQueryParameters(name);
1172 addQueryParameter(name, value);
1173 return this;
1174 }
1175
1176 public Builder setEncodedQueryParameter(String encodedName, @Nullable String encodedValue) {
1177 removeAllEncodedQueryParameters(encodedName);
1178 addEncodedQueryParameter(encodedName, encodedValue);
1179 return this;
1180 }
1181
1182 public Builder removeAllQueryParameters(String name) {
1183 if (name == null) throw new NullPointerException("name == null");
1184 if (encodedQueryNamesAndValues == null) return this;
1185 String nameToRemove = canonicalize(
1186 name, QUERY_COMPONENT_ENCODE_SET, false, false, true, true);
1187 removeAllCanonicalQueryParameters(nameToRemove);
1188 return this;
1189 }
1190
1191 public Builder removeAllEncodedQueryParameters(String encodedName) {
1192 if (encodedName == null) throw new NullPointerException("encodedName == null");
1193 if (encodedQueryNamesAndValues == null) return this;
1194 removeAllCanonicalQueryParameters(
1195 canonicalize(encodedName, QUERY_COMPONENT_REENCODE_SET, true, false, true, true));
1196 return this;
1197 }
1198
1199 private void removeAllCanonicalQueryParameters(String canonicalName) {
1200 for (int i = encodedQueryNamesAndValues.size() - 2; i >= 0; i -= 2) {
1201 if (canonicalName.equals(encodedQueryNamesAndValues.get(i))) {
1202 encodedQueryNamesAndValues.remove(i + 1);
1203 encodedQueryNamesAndValues.remove(i);
1204 if (encodedQueryNamesAndValues.isEmpty()) {
1205 encodedQueryNamesAndValues = null;
1206 return;
1207 }
1208 }
1209 }
1210 }
1211
1212 public Builder fragment(@Nullable String fragment) {
1213 this.encodedFragment = fragment != null
1214 ? canonicalize(fragment, FRAGMENT_ENCODE_SET, false, false, false, false)
1215 : null;
1216 return this;
1217 }
1218
1219 public Builder encodedFragment(@Nullable String encodedFragment) {
1220 this.encodedFragment = encodedFragment != null
1221 ? canonicalize(encodedFragment, FRAGMENT_ENCODE_SET, true, false, false, false)
1222 : null;
1223 return this;
1224 }
1225
1226 /**
1227 * Re-encodes the components of this URL so that it satisfies (obsolete) RFC 2396, which is
1228 * particularly strict for certain components.
1229 */
1230 Builder reencodeForUri() {
1231 for (int i = 0, size = encodedPathSegments.size(); i < size; i++) {
1232 String pathSegment = encodedPathSegments.get(i);
1233 encodedPathSegments.set(i,
1234 canonicalize(pathSegment, PATH_SEGMENT_ENCODE_SET_URI, true, true, false, true));
1235 }
1236 if (encodedQueryNamesAndValues != null) {
1237 for (int i = 0, size = encodedQueryNamesAndValues.size(); i < size; i++) {
1238 String component = encodedQueryNamesAndValues.get(i);
1239 if (component != null) {
1240 encodedQueryNamesAndValues.set(i,
1241 canonicalize(component, QUERY_COMPONENT_ENCODE_SET_URI, true, true, true, true));
1242 }
1243 }
1244 }
1245 if (encodedFragment != null) {
1246 encodedFragment = canonicalize(
1247 encodedFragment, FRAGMENT_ENCODE_SET_URI, true, true, false, false);
1248 }
1249 return this;
1250 }
1251
1252 public HttpUrl build() {
1253 if (scheme == null) throw new IllegalStateException("scheme == null");
1254 if (host == null) throw new IllegalStateException("host == null");
1255 return new HttpUrl(this);
1256 }
1257
1258 @Override public String toString() {
1259 StringBuilder result = new StringBuilder();
1260 if (scheme != null) {
1261 result.append(scheme);
1262 result.append("://");
1263 } else {
1264 result.append("//");
1265 }
1266
1267 if (!encodedUsername.isEmpty() || !encodedPassword.isEmpty()) {
1268 result.append(encodedUsername);
1269 if (!encodedPassword.isEmpty()) {
1270 result.append(':');
1271 result.append(encodedPassword);
1272 }
1273 result.append('@');
1274 }
1275
1276 if (host != null) {
1277 if (host.indexOf(':') != -1) {
1278 // Host is an IPv6 address.
1279 result.append('[');
1280 result.append(host);
1281 result.append(']');
1282 } else {
1283 result.append(host);
1284 }
1285 }
1286
1287 if (port != -1 || scheme != null) {
1288 int effectivePort = effectivePort();
1289 if (scheme == null || effectivePort != defaultPort(scheme)) {
1290 result.append(':');
1291 result.append(effectivePort);
1292 }
1293 }
1294
1295 pathSegmentsToString(result, encodedPathSegments);
1296
1297 if (encodedQueryNamesAndValues != null) {
1298 result.append('?');
1299 namesAndValuesToQueryString(result, encodedQueryNamesAndValues);
1300 }
1301
1302 if (encodedFragment != null) {
1303 result.append('#');
1304 result.append(encodedFragment);
1305 }
1306
1307 return result.toString();
1308 }
1309
1310 static final String INVALID_HOST = "Invalid URL host";
1311
1312 Builder parse(@Nullable HttpUrl base, String input) {
1313 int pos = skipLeadingAsciiWhitespace(input, 0, input.length());
1314 int limit = skipTrailingAsciiWhitespace(input, pos, input.length());
1315
1316 // Scheme.
1317 int schemeDelimiterOffset = schemeDelimiterOffset(input, pos, limit);
1318 if (schemeDelimiterOffset != -1) {
1319 if (input.regionMatches(true, pos, "https:", 0, 6)) {
1320 this.scheme = "https";
1321 pos += "https:".length();
1322 } else if (input.regionMatches(true, pos, "http:", 0, 5)) {
1323 this.scheme = "http";
1324 pos += "http:".length();
1325 } else {
1326 throw new IllegalArgumentException("Expected URL scheme 'http' or 'https' but was '"
1327 + input.substring(0, schemeDelimiterOffset) + "'");
1328 }
1329 } else if (base != null) {
1330 this.scheme = base.scheme;
1331 } else {
1332 throw new IllegalArgumentException(
1333 "Expected URL scheme 'http' or 'https' but no colon was found");
1334 }
1335
1336 // Authority.
1337 boolean hasUsername = false;
1338 boolean hasPassword = false;
1339 int slashCount = slashCount(input, pos, limit);
1340 if (slashCount >= 2 || base == null || !base.scheme.equals(this.scheme)) {
1341 // Read an authority if either:
1342 // * The input starts with 2 or more slashes. These follow the scheme if it exists.
1343 // * The input scheme exists and is different from the base URL's scheme.
1344 //
1345 // The structure of an authority is:
1346 // username:password@host:port
1347 //
1348 // Username, password and port are optional.
1349 // [username[:password]@]host[:port]
1350 pos += slashCount;
1351 authority:
1352 while (true) {
1353 int componentDelimiterOffset = delimiterOffset(input, pos, limit, "@/\\?#");
1354 int c = componentDelimiterOffset != limit
1355 ? input.charAt(componentDelimiterOffset)
1356 : -1;
1357 switch (c) {
1358 case '@':
1359 // User info precedes.
1360 if (!hasPassword) {
1361 int passwordColonOffset = delimiterOffset(
1362 input, pos, componentDelimiterOffset, ':');
1363 String canonicalUsername = canonicalize(input, pos, passwordColonOffset,
1364 USERNAME_ENCODE_SET, true, false, false, true, null);
1365 this.encodedUsername = hasUsername
1366 ? this.encodedUsername + "%40" + canonicalUsername
1367 : canonicalUsername;
1368 if (passwordColonOffset != componentDelimiterOffset) {
1369 hasPassword = true;
1370 this.encodedPassword = canonicalize(input, passwordColonOffset + 1,
1371 componentDelimiterOffset, PASSWORD_ENCODE_SET, true, false, false, true,
1372 null);
1373 }
1374 hasUsername = true;
1375 } else {
1376 this.encodedPassword = this.encodedPassword + "%40" + canonicalize(input, pos,
1377 componentDelimiterOffset, PASSWORD_ENCODE_SET, true, false, false, true, null);
1378 }
1379 pos = componentDelimiterOffset + 1;
1380 break;
1381
1382 case -1:
1383 case '/':
1384 case '\\':
1385 case '?':
1386 case '#':
1387 // Host info precedes.
1388 int portColonOffset = portColonOffset(input, pos, componentDelimiterOffset);
1389 if (portColonOffset + 1 < componentDelimiterOffset) {
1390 host = canonicalizeHost(input, pos, portColonOffset);
1391 port = parsePort(input, portColonOffset + 1, componentDelimiterOffset);
1392 if (port == -1) {
1393 throw new IllegalArgumentException("Invalid URL port: \""
1394 + input.substring(portColonOffset + 1, componentDelimiterOffset) + '"');
1395 }
1396 } else {
1397 host = canonicalizeHost(input, pos, portColonOffset);
1398 port = defaultPort(scheme);
1399 }
1400 if (host == null) {
1401 throw new IllegalArgumentException(
1402 INVALID_HOST + ": \"" + input.substring(pos, portColonOffset) + '"');
1403 }
1404 pos = componentDelimiterOffset;
1405 break authority;
1406 }
1407 }
1408 } else {
1409 // This is a relative link. Copy over all authority components. Also maybe the path & query.
1410 this.encodedUsername = base.encodedUsername();
1411 this.encodedPassword = base.encodedPassword();
1412 this.host = base.host;
1413 this.port = base.port;
1414 this.encodedPathSegments.clear();
1415 this.encodedPathSegments.addAll(base.encodedPathSegments());
1416 if (pos == limit || input.charAt(pos) == '#') {
1417 encodedQuery(base.encodedQuery());
1418 }
1419 }
1420
1421 // Resolve the relative path.
1422 int pathDelimiterOffset = delimiterOffset(input, pos, limit, "?#");
1423 resolvePath(input, pos, pathDelimiterOffset);
1424 pos = pathDelimiterOffset;
1425
1426 // Query.
1427 if (pos < limit && input.charAt(pos) == '?') {
1428 int queryDelimiterOffset = delimiterOffset(input, pos, limit, '#');
1429 this.encodedQueryNamesAndValues = queryStringToNamesAndValues(canonicalize(
1430 input, pos + 1, queryDelimiterOffset, QUERY_ENCODE_SET, true, false, true, true, null));
1431 pos = queryDelimiterOffset;
1432 }
1433
1434 // Fragment.
1435 if (pos < limit && input.charAt(pos) == '#') {
1436 this.encodedFragment = canonicalize(
1437 input, pos + 1, limit, FRAGMENT_ENCODE_SET, true, false, false, false, null);
1438 }
1439
1440 return this;
1441 }
1442
1443 private void resolvePath(String input, int pos, int limit) {
1444 // Read a delimiter.
1445 if (pos == limit) {
1446 // Empty path: keep the base path as-is.
1447 return;
1448 }
1449 char c = input.charAt(pos);
1450 if (c == '/' || c == '\\') {
1451 // Absolute path: reset to the default "/".
1452 encodedPathSegments.clear();
1453 encodedPathSegments.add("");
1454 pos++;
1455 } else {
1456 // Relative path: clear everything after the last '/'.
1457 encodedPathSegments.set(encodedPathSegments.size() - 1, "");
1458 }
1459
1460 // Read path segments.
1461 for (int i = pos; i < limit; ) {
1462 int pathSegmentDelimiterOffset = delimiterOffset(input, i, limit, "/\\");
1463 boolean segmentHasTrailingSlash = pathSegmentDelimiterOffset < limit;
1464 push(input, i, pathSegmentDelimiterOffset, segmentHasTrailingSlash, true);
1465 i = pathSegmentDelimiterOffset;
1466 if (segmentHasTrailingSlash) i++;
1467 }
1468 }
1469
1470 /** Adds a path segment. If the input is ".." or equivalent, this pops a path segment. */
1471 private void push(String input, int pos, int limit, boolean addTrailingSlash,
1472 boolean alreadyEncoded) {
1473 String segment = canonicalize(
1474 input, pos, limit, PATH_SEGMENT_ENCODE_SET, alreadyEncoded, false, false, true, null);
1475 if (isDot(segment)) {
1476 return; // Skip '.' path segments.
1477 }
1478 if (isDotDot(segment)) {
1479 pop();
1480 return;
1481 }
1482 if (encodedPathSegments.get(encodedPathSegments.size() - 1).isEmpty()) {
1483 encodedPathSegments.set(encodedPathSegments.size() - 1, segment);
1484 } else {
1485 encodedPathSegments.add(segment);
1486 }
1487 if (addTrailingSlash) {
1488 encodedPathSegments.add("");
1489 }
1490 }
1491
1492 private boolean isDot(String input) {
1493 return input.equals(".") || input.equalsIgnoreCase("%2e");
1494 }
1495
1496 private boolean isDotDot(String input) {
1497 return input.equals("..")
1498 || input.equalsIgnoreCase("%2e.")
1499 || input.equalsIgnoreCase(".%2e")
1500 || input.equalsIgnoreCase("%2e%2e");
1501 }
1502
1503 /**
1504 * Removes a path segment. When this method returns the last segment is always "", which means
1505 * the encoded path will have a trailing '/'.
1506 *
1507 * <p>Popping "/a/b/c/" yields "/a/b/". In this case the list of path segments goes from ["a",
1508 * "b", "c", ""] to ["a", "b", ""].
1509 *
1510 * <p>Popping "/a/b/c" also yields "/a/b/". The list of path segments goes from ["a", "b", "c"]
1511 * to ["a", "b", ""].
1512 */
1513 private void pop() {
1514 String removed = encodedPathSegments.remove(encodedPathSegments.size() - 1);
1515
1516 // Make sure the path ends with a '/' by either adding an empty string or clearing a segment.
1517 if (removed.isEmpty() && !encodedPathSegments.isEmpty()) {
1518 encodedPathSegments.set(encodedPathSegments.size() - 1, "");
1519 } else {
1520 encodedPathSegments.add("");
1521 }
1522 }
1523
1524 /**
1525 * Returns the index of the ':' in {@code input} that is after scheme characters. Returns -1 if
1526 * {@code input} does not have a scheme that starts at {@code pos}.
1527 */
1528 private static int schemeDelimiterOffset(String input, int pos, int limit) {
1529 if (limit - pos < 2) return -1;
1530
1531 char c0 = input.charAt(pos);
1532 if ((c0 < 'a' || c0 > 'z') && (c0 < 'A' || c0 > 'Z')) return -1; // Not a scheme start char.
1533
1534 for (int i = pos + 1; i < limit; i++) {
1535 char c = input.charAt(i);
1536
1537 if ((c >= 'a' && c <= 'z')
1538 || (c >= 'A' && c <= 'Z')
1539 || (c >= '0' && c <= '9')
1540 || c == '+'
1541 || c == '-'
1542 || c == '.') {
1543 continue; // Scheme character. Keep going.
1544 } else if (c == ':') {
1545 return i; // Scheme prefix!
1546 } else {
1547 return -1; // Non-scheme character before the first ':'.
1548 }
1549 }
1550
1551 return -1; // No ':'; doesn't start with a scheme.
1552 }
1553
1554 /** Returns the number of '/' and '\' slashes in {@code input}, starting at {@code pos}. */
1555 private static int slashCount(String input, int pos, int limit) {
1556 int slashCount = 0;
1557 while (pos < limit) {
1558 char c = input.charAt(pos);
1559 if (c == '\\' || c == '/') {
1560 slashCount++;
1561 pos++;
1562 } else {
1563 break;
1564 }
1565 }
1566 return slashCount;
1567 }
1568
1569 /** Finds the first ':' in {@code input}, skipping characters between square braces "[...]". */
1570 private static int portColonOffset(String input, int pos, int limit) {
1571 for (int i = pos; i < limit; i++) {
1572 switch (input.charAt(i)) {
1573 case '[':
1574 while (++i < limit) {
1575 if (input.charAt(i) == ']') break;
1576 }
1577 break;
1578 case ':':
1579 return i;
1580 }
1581 }
1582 return limit; // No colon.
1583 }
1584
1585 private static @Nullable String canonicalizeHost(String input, int pos, int limit) {
1586 // Start by percent decoding the host. The WHATWG spec suggests doing this only after we've
1587 // checked for IPv6 square braces. But Chrome does it first, and that's more lenient.
1588 String percentDecoded = percentDecode(input, pos, limit, false);
1589 return Util.canonicalizeHost(percentDecoded);
1590 }
1591
1592 private static int parsePort(String input, int pos, int limit) {
1593 try {
1594 // Canonicalize the port string to skip '\n' etc.
1595 String portString = canonicalize(input, pos, limit, "", false, false, false, true, null);
1596 int i = Integer.parseInt(portString);
1597 if (i > 0 && i <= 65535) return i;
1598 return -1;
1599 } catch (NumberFormatException e) {
1600 return -1; // Invalid port.
1601 }
1602 }
1603 }
1604
1605 static String percentDecode(String encoded, boolean plusIsSpace) {
1606 return percentDecode(encoded, 0, encoded.length(), plusIsSpace);
1607 }
1608
1609 private List<String> percentDecode(List<String> list, boolean plusIsSpace) {
1610 int size = list.size();
1611 List<String> result = new ArrayList<>(size);
1612 for (int i = 0; i < size; i++) {
1613 String s = list.get(i);
1614 result.add(s != null ? percentDecode(s, plusIsSpace) : null);
1615 }
1616 return Collections.unmodifiableList(result);
1617 }
1618
1619 static String percentDecode(String encoded, int pos, int limit, boolean plusIsSpace) {
1620 for (int i = pos; i < limit; i++) {
1621 char c = encoded.charAt(i);
1622 if (c == '%' || (c == '+' && plusIsSpace)) {
1623 // Slow path: the character at i requires decoding!
1624 Buffer out = new Buffer();
1625 out.writeUtf8(encoded, pos, i);
1626 percentDecode(out, encoded, i, limit, plusIsSpace);
1627 return out.readUtf8();
1628 }
1629 }
1630
1631 // Fast path: no characters in [pos..limit) required decoding.
1632 return encoded.substring(pos, limit);
1633 }
1634
1635 static void percentDecode(Buffer out, String encoded, int pos, int limit, boolean plusIsSpace) {
1636 int codePoint;
1637 for (int i = pos; i < limit; i += Character.charCount(codePoint)) {
1638 codePoint = encoded.codePointAt(i);
1639 if (codePoint == '%' && i + 2 < limit) {
1640 int d1 = decodeHexDigit(encoded.charAt(i + 1));
1641 int d2 = decodeHexDigit(encoded.charAt(i + 2));
1642 if (d1 != -1 && d2 != -1) {
1643 out.writeByte((d1 << 4) + d2);
1644 i += 2;
1645 continue;
1646 }
1647 } else if (codePoint == '+' && plusIsSpace) {
1648 out.writeByte(' ');
1649 continue;
1650 }
1651 out.writeUtf8CodePoint(codePoint);
1652 }
1653 }
1654
1655 static boolean percentEncoded(String encoded, int pos, int limit) {
1656 return pos + 2 < limit
1657 && encoded.charAt(pos) == '%'
1658 && decodeHexDigit(encoded.charAt(pos + 1)) != -1
1659 && decodeHexDigit(encoded.charAt(pos + 2)) != -1;
1660 }
1661
1662 /**
1663 * Returns a substring of {@code input} on the range {@code [pos..limit)} with the following
1664 * transformations:
1665 * <ul>
1666 * <li>Tabs, newlines, form feeds and carriage returns are skipped.
1667 * <li>In queries, ' ' is encoded to '+' and '+' is encoded to "%2B".
1668 * <li>Characters in {@code encodeSet} are percent-encoded.
1669 * <li>Control characters and non-ASCII characters are percent-encoded.
1670 * <li>All other characters are copied without transformation.
1671 * </ul>
1672 *
1673 * @param alreadyEncoded true to leave '%' as-is; false to convert it to '%25'.
1674 * @param strict true to encode '%' if it is not the prefix of a valid percent encoding.
1675 * @param plusIsSpace true to encode '+' as "%2B" if it is not already encoded.
1676 * @param asciiOnly true to encode all non-ASCII codepoints.
1677 * @param charset which charset to use, null equals UTF-8.
1678 */
1679 static String canonicalize(String input, int pos, int limit, String encodeSet,
1680 boolean alreadyEncoded, boolean strict, boolean plusIsSpace, boolean asciiOnly,
1681 @Nullable Charset charset) {
1682 int codePoint;
1683 for (int i = pos; i < limit; i += Character.charCount(codePoint)) {
1684 codePoint = input.codePointAt(i);
1685 if (codePoint < 0x20
1686 || codePoint == 0x7f
1687 || codePoint >= 0x80 && asciiOnly
1688 || encodeSet.indexOf(codePoint) != -1
1689 || codePoint == '%' && (!alreadyEncoded || strict && !percentEncoded(input, i, limit))
1690 || codePoint == '+' && plusIsSpace) {
1691 // Slow path: the character at i requires encoding!
1692 Buffer out = new Buffer();
1693 out.writeUtf8(input, pos, i);
1694 canonicalize(out, input, i, limit, encodeSet, alreadyEncoded, strict, plusIsSpace,
1695 asciiOnly, charset);
1696 return out.readUtf8();
1697 }
1698 }
1699
1700 // Fast path: no characters in [pos..limit) required encoding.
1701 return input.substring(pos, limit);
1702 }
1703
1704 static void canonicalize(Buffer out, String input, int pos, int limit, String encodeSet,
1705 boolean alreadyEncoded, boolean strict, boolean plusIsSpace, boolean asciiOnly,
1706 @Nullable Charset charset) {
1707 Buffer encodedCharBuffer = null; // Lazily allocated.
1708 int codePoint;
1709 for (int i = pos; i < limit; i += Character.charCount(codePoint)) {
1710 codePoint = input.codePointAt(i);
1711 if (alreadyEncoded
1712 && (codePoint == '\t' || codePoint == '\n' || codePoint == '\f' || codePoint == '\r')) {
1713 // Skip this character.
1714 } else if (codePoint == '+' && plusIsSpace) {
1715 // Encode '+' as '%2B' since we permit ' ' to be encoded as either '+' or '%20'.
1716 out.writeUtf8(alreadyEncoded ? "+" : "%2B");
1717 } else if (codePoint < 0x20
1718 || codePoint == 0x7f
1719 || codePoint >= 0x80 && asciiOnly
1720 || encodeSet.indexOf(codePoint) != -1
1721 || codePoint == '%' && (!alreadyEncoded || strict && !percentEncoded(input, i, limit))) {
1722 // Percent encode this character.
1723 if (encodedCharBuffer == null) {
1724 encodedCharBuffer = new Buffer();
1725 }
1726
1727 if (charset == null || charset.equals(UTF_8)) {
1728 encodedCharBuffer.writeUtf8CodePoint(codePoint);
1729 } else {
1730 encodedCharBuffer.writeString(input, i, i + Character.charCount(codePoint), charset);
1731 }
1732
1733 while (!encodedCharBuffer.exhausted()) {
1734 int b = encodedCharBuffer.readByte() & 0xff;
1735 out.writeByte('%');
1736 out.writeByte(HEX_DIGITS[(b >> 4) & 0xf]);
1737 out.writeByte(HEX_DIGITS[b & 0xf]);
1738 }
1739 } else {
1740 // This character doesn't need encoding. Just copy it over.
1741 out.writeUtf8CodePoint(codePoint);
1742 }
1743 }
1744 }
1745
1746 static String canonicalize(String input, String encodeSet, boolean alreadyEncoded, boolean strict,
1747 boolean plusIsSpace, boolean asciiOnly, @Nullable Charset charset) {
1748 return canonicalize(input, 0, input.length(), encodeSet, alreadyEncoded, strict, plusIsSpace,
1749 asciiOnly, charset);
1750 }
1751
1752 static String canonicalize(String input, String encodeSet, boolean alreadyEncoded, boolean strict,
1753 boolean plusIsSpace, boolean asciiOnly) {
1754 return canonicalize(
1755 input, 0, input.length(), encodeSet, alreadyEncoded, strict, plusIsSpace, asciiOnly, null);
1756 }
1757 }
1758