Monitoring JavaMelody on _ip-10-0-11-13.ec2.internal

1 /*

2  * Copyright (C) 2015 Square, Inc.

3  *

4  * Licensed under the Apache License, Version 2.0 (the "License");

5  * you may not use this file except in compliance with the License.

6  * You may obtain a copy of the License at

7  *

8  *      http://www.apache.org/licenses/LICENSE-2.0

9  *

10  * Unless required by applicable law or agreed to in writing, software

11  * distributed under the License is distributed on an "AS IS" BASIS,

12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

13  * See the License for the specific language governing permissions and

14  * limitations under the License.

15  */

16 package okhttp3;

17 

18 import java.net.InetAddress;

19 import java.net.MalformedURLException;

20 import java.net.URI;

21 import java.net.URISyntaxException;

22 import java.net.URL;

23 import java.nio.charset.Charset;

24 import java.util.ArrayList;

25 import java.util.Collections;

26 import java.util.LinkedHashSet;

27 import java.util.List;

28 import java.util.Set;

29 import javax.annotation.Nullable;

30 import okhttp3.internal.Util;

31 import okhttp3.internal.publicsuffix.PublicSuffixDatabase;

32 import okio.Buffer;

33 

34 import static java.nio.charset.StandardCharsets.UTF_8;

35 import static okhttp3.internal.Util.decodeHexDigit;

36 import static okhttp3.internal.Util.delimiterOffset;

37 import static okhttp3.internal.Util.skipLeadingAsciiWhitespace;

38 import static okhttp3.internal.Util.skipTrailingAsciiWhitespace;

39 import static okhttp3.internal.Util.verifyAsIpAddress;

40 

41 /**

42  * A uniform resource locator (URL) with a scheme of either {@code http} or {@code https}. Use this

43  * class to compose and decompose Internet addresses. For example, this code will compose and print

44  * a URL for Google search: <pre>   {@code

45  *

46  *   HttpUrl url = new HttpUrl.Builder()

47  *       .scheme("https")

48  *       .host("www.google.com")

49  *       .addPathSegment("search")

50  *       .addQueryParameter("q", "polar bears")

51  *       .build();

52  *   System.out.println(url);

53  * }</pre>

54  *

55  * which prints: <pre>   {@code

56  *

57  *     https://www.google.com/search?q=polar%20bears

58  * }</pre>

59  *

60  * As another example, this code prints the human-readable query parameters of a Twitter search:

61  * <pre>   {@code

62  *

63  *   HttpUrl url = HttpUrl.parse("https://twitter.com/search?q=cute%20%23puppies&f=images");

64  *   for (int i = 0, size = url.querySize(); i < size; i++) {

65  *     System.out.println(url.queryParameterName(i) + ": " + url.queryParameterValue(i));

66  *   }

67  * }</pre>

68  *

69  * which prints: <pre>   {@code

70  *

71  *   q: cute #puppies

72  *   f: images

73  * }</pre>

74  *

75  * In addition to composing URLs from their component parts and decomposing URLs into their

76  * component parts, this class implements relative URL resolution: what address you'd reach by

77  * clicking a relative link on a specified page. For example: <pre>   {@code

78  *

79  *   HttpUrl base = HttpUrl.parse("https://www.youtube.com/user/WatchTheDaily/videos");

80  *   HttpUrl link = base.resolve("../../watch?v=cbP2N1BQdYc");

81  *   System.out.println(link);

82  * }</pre>

83  *

84  * which prints: <pre>   {@code

85  *

86  *   https://www.youtube.com/watch?v=cbP2N1BQdYc

87  * }</pre>

88  *

89  * <h3>What's in a URL?</h3>

90  *

91  * A URL has several components.

92  *

93  * <h4>Scheme</h4>

94  *

95  * <p>Sometimes referred to as <i>protocol</i>, A URL's scheme describes what mechanism should be

96  * used to retrieve the resource. Although URLs have many schemes ({@code mailto}, {@code file},

97  * {@code ftp}), this class only supports {@code http} and {@code https}. Use {@link URI

98  * java.net.URI} for URLs with arbitrary schemes.

99  *

100  * <h4>Username and Password</h4>

101  *

102  * <p>Username and password are either present, or the empty string {@code ""} if absent. This class

103  * offers no mechanism to differentiate empty from absent. Neither of these components are popular

104  * in practice. Typically HTTP applications use other mechanisms for user identification and

105  * authentication.

106  *

107  * <h4>Host</h4>

108  *

109  * <p>The host identifies the webserver that serves the URL's resource. It is either a hostname like

110  * {@code square.com} or {@code localhost}, an IPv4 address like {@code 192.168.0.1}, or an IPv6

111  * address like {@code ::1}.

112  *

113  * <p>Usually a webserver is reachable with multiple identifiers: its IP addresses, registered

114  * domain names, and even {@code localhost} when connecting from the server itself. Each of a

115  * webserver's names is a distinct URL and they are not interchangeable. For example, even if {@code

116  * http://square.github.io/dagger} and {@code http://google.github.io/dagger} are served by the same

117  * IP address, the two URLs identify different resources.

118  *

119  * <h4>Port</h4>

120  *

121  * <p>The port used to connect to the webserver. By default this is 80 for HTTP and 443 for HTTPS.

122  * This class never returns -1 for the port: if no port is explicitly specified in the URL then the

123  * scheme's default is used.

124  *

125  * <h4>Path</h4>

126  *

127  * <p>The path identifies a specific resource on the host. Paths have a hierarchical structure like

128  * "/square/okhttp/issues/1486" and decompose into a list of segments like ["square", "okhttp",

129  * "issues", "1486"].

130  *

131  * <p>This class offers methods to compose and decompose paths by segment. It composes each path

132  * from a list of segments by alternating between "/" and the encoded segment. For example the

133  * segments ["a", "b"] build "/a/b" and the segments ["a", "b", ""] build "/a/b/".

134  *

135  * <p>If a path's last segment is the empty string then the path ends with "/". This class always

136  * builds non-empty paths: if the path is omitted it defaults to "/". The default path's segment

137  * list is a single empty string: [""].

138  *

139  * <h4>Query</h4>

140  *

141  * <p>The query is optional: it can be null, empty, or non-empty. For many HTTP URLs the query

142  * string is subdivided into a collection of name-value parameters. This class offers methods to set

143  * the query as the single string, or as individual name-value parameters. With name-value

144  * parameters the values are optional and names may be repeated.

145  *

146  * <h4>Fragment</h4>

147  *

148  * <p>The fragment is optional: it can be null, empty, or non-empty. Unlike host, port, path, and

149  * query the fragment is not sent to the webserver: it's private to the client.

150  *

151  * <h3>Encoding</h3>

152  *

153  * <p>Each component must be encoded before it is embedded in the complete URL. As we saw above, the

154  * string {@code cute #puppies} is encoded as {@code cute%20%23puppies} when used as a query

155  * parameter value.

156  *

157  * <h4>Percent encoding</h4>

158  *

159  * <p>Percent encoding replaces a character (like {@code \ud83c\udf69}) with its UTF-8 hex bytes

160  * (like {@code %F0%9F%8D%A9}). This approach works for whitespace characters, control characters,

161  * non-ASCII characters, and characters that already have another meaning in a particular context.

162  *

163  * <p>Percent encoding is used in every URL component except for the hostname. But the set of

164  * characters that need to be encoded is different for each component. For example, the path

165  * component must escape all of its {@code ?} characters, otherwise it could be interpreted as the

166  * start of the URL's query. But within the query and fragment components, the {@code ?} character

167  * doesn't delimit anything and doesn't need to be escaped. <pre>   {@code

168  *

169  *   HttpUrl url = HttpUrl.parse("http://who-let-the-dogs.out").newBuilder()

170  *       .addPathSegment("_Who?_")

171  *       .query("_Who?_")

172  *       .fragment("_Who?_")

173  *       .build();

174  *   System.out.println(url);

175  * }</pre>

176  *

177  * This prints: <pre>   {@code

178  *

179  *   http://who-let-the-dogs.out/_Who%3F_?_Who?_#_Who?_

180  * }</pre>

181  *

182  * When parsing URLs that lack percent encoding where it is required, this class will percent encode

183  * the offending characters.

184  *

185  * <h4>IDNA Mapping and Punycode encoding</h4>

186  *

187  * <p>Hostnames have different requirements and use a different encoding scheme. It consists of IDNA

188  * mapping and Punycode encoding.

189  *

190  * <p>In order to avoid confusion and discourage phishing attacks, <a

191  * href="http://www.unicode.org/reports/tr46/#ToASCII">IDNA Mapping</a> transforms names to avoid

192  * confusing characters. This includes basic case folding: transforming shouting {@code SQUARE.COM}

193  * into cool and casual {@code square.com}. It also handles more exotic characters. For example, the

194  * Unicode trademark sign (™) could be confused for the letters "TM" in {@code http://ho™mail.com}.

195  * To mitigate this, the single character (™) maps to the string (tm). There is similar policy for

196  * all of the 1.1 million Unicode code points. Note that some code points such as "\ud83c\udf69" are

197  * not mapped and cannot be used in a hostname.

198  *

199  * <p><a href="http://ietf.org/rfc/rfc3492.txt">Punycode</a> converts a Unicode string to an ASCII

200  * string to make international domain names work everywhere. For example, "σ" encodes as "xn--4xa".

201  * The encoded string is not human readable, but can be used with classes like {@link InetAddress}

202  * to establish connections.

203  *

204  * <h3>Why another URL model?</h3>

205  *

206  * <p>Java includes both {@link URL java.net.URL} and {@link URI java.net.URI}. We offer a new URL

207  * model to address problems that the others don't.

208  *

209  * <h4>Different URLs should be different</h4>

210  *

211  * <p>Although they have different content, {@code java.net.URL} considers the following two URLs

212  * equal, and the {@link Object#equals equals()} method between them returns true:

213  *

214  * <ul>

215  *   <li>http://square.github.io/

216  *   <li>http://google.github.io/

217  * </ul>

218  *

219  * This is because those two hosts share the same IP address. This is an old, bad design decision

220  * that makes {@code java.net.URL} unusable for many things. It shouldn't be used as a {@link

221  * java.util.Map Map} key or in a {@link Set}. Doing so is both inefficient because equality may

222  * require a DNS lookup, and incorrect because unequal URLs may be equal because of how they are

223  * hosted.

224  *

225  * <h4>Equal URLs should be equal</h4>

226  *

227  * <p>These two URLs are semantically identical, but {@code java.net.URI} disagrees:

228  *

229  * <ul>

230  *   <li>http://host:80/

231  *   <li>http://host

232  * </ul>

233  *

234  * Both the unnecessary port specification ({@code :80}) and the absent trailing slash ({@code /})

235  * cause URI to bucket the two URLs separately. This harms URI's usefulness in collections. Any

236  * application that stores information-per-URL will need to either canonicalize manually, or suffer

237  * unnecessary redundancy for such URLs.

238  *

239  * <p>Because they don't attempt canonical form, these classes are surprisingly difficult to use

240  * securely. Suppose you're building a webservice that checks that incoming paths are prefixed

241  * "/static/images/" before serving the corresponding assets from the filesystem. <pre>   {@code

242  *

243  *   String attack = "http://example.com/static/images/../../../../../etc/passwd";

244  *   System.out.println(new URL(attack).getPath());

245  *   System.out.println(new URI(attack).getPath());

246  *   System.out.println(HttpUrl.parse(attack).encodedPath());

247  * }</pre>

248  *

249  * By canonicalizing the input paths, they are complicit in directory traversal attacks. Code that

250  * checks only the path prefix may suffer!

251  * <pre>   {@code

252  *

253  *    /static/images/../../../../../etc/passwd

254  *    /static/images/../../../../../etc/passwd

255  *    /etc/passwd

256  * }</pre>

257  *

258  * <h4>If it works on the web, it should work in your application</h4>

259  *

260  * <p>The {@code java.net.URI} class is strict around what URLs it accepts. It rejects URLs like

261  * "http://example.com/abc|def" because the '|' character is unsupported. This class is more

262  * forgiving: it will automatically percent-encode the '|', yielding "http://example.com/abc%7Cdef".

263  * This kind behavior is consistent with web browsers. {@code HttpUrl} prefers consistency with

264  * major web browsers over consistency with obsolete specifications.

265  *

266  * <h4>Paths and Queries should decompose</h4>

267  *

268  * <p>Neither of the built-in URL models offer direct access to path segments or query parameters.

269  * Manually using {@code StringBuilder} to assemble these components is cumbersome: do '+'

270  * characters get silently replaced with spaces? If a query parameter contains a '&amp;', does that

271  * get escaped? By offering methods to read and write individual query parameters directly,

272  * application developers are saved from the hassles of encoding and decoding.

273  *

274  * <h4>Plus a modern API</h4>

275  *

276  * <p>The URL (JDK1.0) and URI (Java 1.4) classes predate builders and instead use telescoping

277  * constructors. For example, there's no API to compose a URI with a custom port without also

278  * providing a query and fragment.

279  *

280  * <p>Instances of {@link HttpUrl} are well-formed and always have a scheme, host, and path. With

281  * {@code java.net.URL} it's possible to create an awkward URL like {@code http:/} with scheme and

282  * path but no hostname. Building APIs that consume such malformed values is difficult!

283  *

284  * <p>This class has a modern API. It avoids punitive checked exceptions: {@link #get get()}

285  * throws {@link IllegalArgumentException} on invalid input or {@link #parse parse()}

286  * returns null if the input is an invalid URL. You can even be explicit about whether each

287  * component has been encoded already.

288  */

289 public final class HttpUrl {

290   private static final char[] HEX_DIGITS =

291       {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};

292   static final String USERNAME_ENCODE_SET = " \"':;<=>@[]^`{}|/\\?#";

293   static final String PASSWORD_ENCODE_SET = " \"':;<=>@[]^`{}|/\\?#";

294   static final String PATH_SEGMENT_ENCODE_SET = " \"<>^`{}|/\\?#";

295   static final String PATH_SEGMENT_ENCODE_SET_URI = "[]";

296   static final String QUERY_ENCODE_SET = " \"'<>#";

297   static final String QUERY_COMPONENT_REENCODE_SET = " \"'<>#&=";

298   static final String QUERY_COMPONENT_ENCODE_SET = " !\"#$&'(),/:;<=>?@[]\\^`{|}~";

299   static final String QUERY_COMPONENT_ENCODE_SET_URI = "\\^`{|}";

300   static final String FORM_ENCODE_SET = " \"':;<=>@[]^`{}|/\\?#&!$(),~";

301   static final String FRAGMENT_ENCODE_SET = "";

302   static final String FRAGMENT_ENCODE_SET_URI = " \"#<>\\^`{|}";

303 

304   /** Either "http" or "https". */

305   final String scheme;

306 

307   /** Decoded username. */

308   private final String username;

309 

310   /** Decoded password. */

311   private final String password;

312 

313   /** Canonical hostname. */

314   final String host;

315 

316   /** Either 80, 443 or a user-specified port. In range [1..65535]. */

317   final int port;

318 

319   /**

320    * A list of canonical path segments. This list always contains at least one element, which may be

321    * the empty string. Each segment is formatted with a leading '/', so if path segments were ["a",

322    * "b", ""], then the encoded path would be "/a/b/".

323    */

324   private final List<String> pathSegments;

325 

326   /**

327    * Alternating, decoded query names and values, or null for no query. Names may be empty or

328    * non-empty, but never null. Values are null if the name has no corresponding '=' separator, or

329    * empty, or non-empty.

330    */

331   private final @Nullable List<String> queryNamesAndValues;

332 

333   /** Decoded fragment. */

334   private final @Nullable String fragment;

335 

336   /** Canonical URL. */

337   private final String url;

338 

339   HttpUrl(Builder builder) {

340     this.scheme = builder.scheme;

341     this.username = percentDecode(builder.encodedUsername, false);

342     this.password = percentDecode(builder.encodedPassword, false);

343     this.host = builder.host;

344     this.port = builder.effectivePort();

345     this.pathSegments = percentDecode(builder.encodedPathSegments, false);

346     this.queryNamesAndValues = builder.encodedQueryNamesAndValues != null

347         ? percentDecode(builder.encodedQueryNamesAndValues, true)

348         : null;

349     this.fragment = builder.encodedFragment != null

350         ? percentDecode(builder.encodedFragment, false)

351         : null;

352     this.url = builder.toString();

353   }

354 

355   /** Returns this URL as a {@link URL java.net.URL}. */

356   public URL url() {

357     try {

358       return new URL(url);

359     } catch (MalformedURLException e) {

360       throw new RuntimeException(e); // Unexpected!

361     }

362   }

363 

364   /**

365    * Returns this URL as a {@link URI java.net.URI}. Because {@code URI} is more strict than this

366    * class, the returned URI may be semantically different from this URL:

367    *

368    * <ul>

369    *     <li>Characters forbidden by URI like {@code [} and {@code |} will be escaped.

370    *     <li>Invalid percent-encoded sequences like {@code %xx} will be encoded like {@code %25xx}.

371    *     <li>Whitespace and control characters in the fragment will be stripped.

372    * </ul>

373    *

374    * <p>These differences may have a significant consequence when the URI is interpreted by a

375    * webserver. For this reason the {@linkplain URI URI class} and this method should be avoided.

376    */

377   public URI uri() {

378     String uri = newBuilder().reencodeForUri().toString();

379     try {

380       return new URI(uri);

381     } catch (URISyntaxException e) {

382       // Unlikely edge case: the URI has a forbidden character in the fragment. Strip it & retry.

383       try {

384         String stripped = uri.replaceAll("[\\u0000-\\u001F\\u007F-\\u009F\\p{javaWhitespace}]", "");

385         return URI.create(stripped);

386       } catch (Exception e1) {

387         throw new RuntimeException(e); // Unexpected!

388       }

389     }

390   }

391 

392   /** Returns either "http" or "https". */

393   public String scheme() {

394     return scheme;

395   }

396 

397   public boolean isHttps() {

398     return scheme.equals("https");

399   }

400 

401   /**

402    * Returns the username, or an empty string if none is set.

403    *

404    * <p><table summary="">

405    *   <tr><th>URL</th><th>{@code encodedUsername()}</th></tr>

406    *   <tr><td>{@code http://host/}</td><td>{@code ""}</td></tr>

407    *   <tr><td>{@code http://username@host/}</td><td>{@code "username"}</td></tr>

408    *   <tr><td>{@code http://username:password@host/}</td><td>{@code "username"}</td></tr>

409    *   <tr><td>{@code http://a%20b:c%20d@host/}</td><td>{@code "a%20b"}</td></tr>

410    * </table>

411    */

412   public String encodedUsername() {

413     if (username.isEmpty()) return "";

414     int usernameStart = scheme.length() + 3; // "://".length() == 3.

415     int usernameEnd = delimiterOffset(url, usernameStart, url.length(), ":@");

416     return url.substring(usernameStart, usernameEnd);

417   }

418 

419   /**

420    * Returns the decoded username, or an empty string if none is present.

421    *

422    * <p><table summary="">

423    *   <tr><th>URL</th><th>{@code username()}</th></tr>

424    *   <tr><td>{@code http://host/}</td><td>{@code ""}</td></tr>

425    *   <tr><td>{@code http://username@host/}</td><td>{@code "username"}</td></tr>

426    *   <tr><td>{@code http://username:password@host/}</td><td>{@code "username"}</td></tr>

427    *   <tr><td>{@code http://a%20b:c%20d@host/}</td><td>{@code "a b"}</td></tr>

428    * </table>

429    */

430   public String username() {

431     return username;

432   }

433 

434   /**

435    * Returns the password, or an empty string if none is set.

436    *

437    * <p><table summary="">

438    *   <tr><th>URL</th><th>{@code encodedPassword()}</th></tr>

439    *   <tr><td>{@code http://host/}</td><td>{@code ""}</td></tr>

440    *   <tr><td>{@code http://username@host/}</td><td>{@code ""}</td></tr>

441    *   <tr><td>{@code http://username:password@host/}</td><td>{@code "password"}</td></tr>

442    *   <tr><td>{@code http://a%20b:c%20d@host/}</td><td>{@code "c%20d"}</td></tr>

443    * </table>

444    */

445   public String encodedPassword() {

446     if (password.isEmpty()) return "";

447     int passwordStart = url.indexOf(':', scheme.length() + 3) + 1;

448     int passwordEnd = url.indexOf('@');

449     return url.substring(passwordStart, passwordEnd);

450   }

451 

452   /**

453    * Returns the decoded password, or an empty string if none is present.

454    *

455    * <p><table summary="">

456    *   <tr><th>URL</th><th>{@code password()}</th></tr>

457    *   <tr><td>{@code http://host/}</td><td>{@code ""}</td></tr>

458    *   <tr><td>{@code http://username@host/}</td><td>{@code ""}</td></tr>

459    *   <tr><td>{@code http://username:password@host/}</td><td>{@code "password"}</td></tr>

460    *   <tr><td>{@code http://a%20b:c%20d@host/}</td><td>{@code "c d"}</td></tr>

461    * </table>

462    */

463   public String password() {

464     return password;

465   }

466 

467   /**

468    * Returns the host address suitable for use with {@link InetAddress#getAllByName(String)}. May

469    * be:

470    *

471    * <ul>

472    *   <li>A regular host name, like {@code android.com}.

473    *   <li>An IPv4 address, like {@code 127.0.0.1}.

474    *   <li>An IPv6 address, like {@code ::1}. Note that there are no square braces.

475    *   <li>An encoded IDN, like {@code xn--n3h.net}.

476    * </ul>

477    *

478    * <p><table summary="">

479    *   <tr><th>URL</th><th>{@code host()}</th></tr>

480    *   <tr><td>{@code http://android.com/}</td><td>{@code "android.com"}</td></tr>

481    *   <tr><td>{@code http://127.0.0.1/}</td><td>{@code "127.0.0.1"}</td></tr>

482    *   <tr><td>{@code http://[::1]/}</td><td>{@code "::1"}</td></tr>

483    *   <tr><td>{@code http://xn--n3h.net/}</td><td>{@code "xn--n3h.net"}</td></tr>

484    * </table>

485    */

486   public String host() {

487     return host;

488   }

489 

490   /**

491    * Returns the explicitly-specified port if one was provided, or the default port for this URL's

492    * scheme. For example, this returns 8443 for {@code https://square.com:8443/} and 443 for {@code

493    * https://square.com/}. The result is in {@code [1..65535]}.

494    *

495    * <p><table summary="">

496    *   <tr><th>URL</th><th>{@code port()}</th></tr>

497    *   <tr><td>{@code http://host/}</td><td>{@code 80}</td></tr>

498    *   <tr><td>{@code http://host:8000/}</td><td>{@code 8000}</td></tr>

499    *   <tr><td>{@code https://host/}</td><td>{@code 443}</td></tr>

500    * </table>

501    */

502   public int port() {

503     return port;

504   }

505 

506   /**

507    * Returns 80 if {@code scheme.equals("http")}, 443 if {@code scheme.equals("https")} and -1

508    * otherwise.

509    */

510   public static int defaultPort(String scheme) {

511     if (scheme.equals("http")) {

512       return 80;

513     } else if (scheme.equals("https")) {

514       return 443;

515     } else {

516       return -1;

517     }

518   }

519 

520   /**

521    * Returns the number of segments in this URL's path. This is also the number of slashes in the

522    * URL's path, like 3 in {@code http://host/a/b/c}. This is always at least 1.

523    *

524    * <p><table summary="">

525    *   <tr><th>URL</th><th>{@code pathSize()}</th></tr>

526    *   <tr><td>{@code http://host/}</td><td>{@code 1}</td></tr>

527    *   <tr><td>{@code http://host/a/b/c}</td><td>{@code 3}</td></tr>

528    *   <tr><td>{@code http://host/a/b/c/}</td><td>{@code 4}</td></tr>

529    * </table>

530    */

531   public int pathSize() {

532     return pathSegments.size();

533   }

534 

535   /**

536    * Returns the entire path of this URL encoded for use in HTTP resource resolution. The returned

537    * path will start with {@code "/"}.

538    *

539    * <p><table summary="">

540    *   <tr><th>URL</th><th>{@code encodedPath()}</th></tr>

541    *   <tr><td>{@code http://host/}</td><td>{@code "/"}</td></tr>

542    *   <tr><td>{@code http://host/a/b/c}</td><td>{@code "/a/b/c"}</td></tr>

543    *   <tr><td>{@code http://host/a/b%20c/d}</td><td>{@code "/a/b%20c/d"}</td></tr>

544    * </table>

545    */

546   public String encodedPath() {

547     int pathStart = url.indexOf('/', scheme.length() + 3); // "://".length() == 3.

548     int pathEnd = delimiterOffset(url, pathStart, url.length(), "?#");

549     return url.substring(pathStart, pathEnd);

550   }

551 

552   static void pathSegmentsToString(StringBuilder out, List<String> pathSegments) {

553     for (int i = 0, size = pathSegments.size(); i < size; i++) {

554       out.append('/');

555       out.append(pathSegments.get(i));

556     }

557   }

558 

559   /**

560    * Returns a list of encoded path segments like {@code ["a", "b", "c"]} for the URL {@code

561    * http://host/a/b/c}. This list is never empty though it may contain a single empty string.

562    *

563    * <p><table summary="">

564    *   <tr><th>URL</th><th>{@code encodedPathSegments()}</th></tr>

565    *   <tr><td>{@code http://host/}</td><td>{@code [""]}</td></tr>

566    *   <tr><td>{@code http://host/a/b/c}</td><td>{@code ["a", "b", "c"]}</td></tr>

567    *   <tr><td>{@code http://host/a/b%20c/d}</td><td>{@code ["a", "b%20c", "d"]}</td></tr>

568    * </table>

569    */

570   public List<String> encodedPathSegments() {

571     int pathStart = url.indexOf('/', scheme.length() + 3);

572     int pathEnd = delimiterOffset(url, pathStart, url.length(), "?#");

573     List<String> result = new ArrayList<>();

574     for (int i = pathStart; i < pathEnd; ) {

575       i++; // Skip the '/'.

576       int segmentEnd = delimiterOffset(url, i, pathEnd, '/');

577       result.add(url.substring(i, segmentEnd));

578       i = segmentEnd;

579     }

580     return result;

581   }

582 

583   /**

584    * Returns a list of path segments like {@code ["a", "b", "c"]} for the URL {@code

585    * http://host/a/b/c}. This list is never empty though it may contain a single empty string.

586    *

587    * <p><table summary="">

588    *   <tr><th>URL</th><th>{@code pathSegments()}</th></tr>

589    *   <tr><td>{@code http://host/}</td><td>{@code [""]}</td></tr>

590    *   <tr><td>{@code http://host/a/b/c"}</td><td>{@code ["a", "b", "c"]}</td></tr>

591    *   <tr><td>{@code http://host/a/b%20c/d"}</td><td>{@code ["a", "b c", "d"]}</td></tr>

592    * </table>

593    */

594   public List<String> pathSegments() {

595     return pathSegments;

596   }

597 

598   /**

599    * Returns the query of this URL, encoded for use in HTTP resource resolution. The returned string

600    * may be null (for URLs with no query), empty (for URLs with an empty query) or non-empty (all

601    * other URLs).

602    *

603    * <p><table summary="">

604    *   <tr><th>URL</th><th>{@code encodedQuery()}</th></tr>

605    *   <tr><td>{@code http://host/}</td><td>null</td></tr>

606    *   <tr><td>{@code http://host/?}</td><td>{@code ""}</td></tr>

607    *   <tr><td>{@code http://host/?a=apple&k=key+lime}</td><td>{@code

608    *       "a=apple&k=key+lime"}</td></tr>

609    *   <tr><td>{@code http://host/?a=apple&a=apricot}</td><td>{@code "a=apple&a=apricot"}</td></tr>

610    *   <tr><td>{@code http://host/?a=apple&b}</td><td>{@code "a=apple&b"}</td></tr>

611    * </table>

612    */

613   public @Nullable String encodedQuery() {

614     if (queryNamesAndValues == null) return null; // No query.

615     int queryStart = url.indexOf('?') + 1;

616     int queryEnd = delimiterOffset(url, queryStart, url.length(), '#');

617     return url.substring(queryStart, queryEnd);

618   }

619 

620   static void namesAndValuesToQueryString(StringBuilder out, List<String> namesAndValues) {

621     for (int i = 0, size = namesAndValues.size(); i < size; i += 2) {

622       String name = namesAndValues.get(i);

623       String value = namesAndValues.get(i + 1);

624       if (i > 0) out.append('&');

625       out.append(name);

626       if (value != null) {

627         out.append('=');

628         out.append(value);

629       }

630     }

631   }

632 

633   /**

634    * Cuts {@code encodedQuery} up into alternating parameter names and values. This divides a query

635    * string like {@code subject=math&easy&problem=5-2=3} into the list {@code ["subject", "math",

636    * "easy", null, "problem", "5-2=3"]}. Note that values may be null and may contain '='

637    * characters.

638    */

639   static List<String> queryStringToNamesAndValues(String encodedQuery) {

640     List<String> result = new ArrayList<>();

641     for (int pos = 0; pos <= encodedQuery.length(); ) {

642       int ampersandOffset = encodedQuery.indexOf('&', pos);

643       if (ampersandOffset == -1) ampersandOffset = encodedQuery.length();

644 

645       int equalsOffset = encodedQuery.indexOf('=', pos);

646       if (equalsOffset == -1 || equalsOffset > ampersandOffset) {

647         result.add(encodedQuery.substring(pos, ampersandOffset));

648         result.add(null); // No value for this name.

649       } else {

650         result.add(encodedQuery.substring(pos, equalsOffset));

651         result.add(encodedQuery.substring(equalsOffset + 1, ampersandOffset));

652       }

653       pos = ampersandOffset + 1;

654     }

655     return result;

656   }

657 

658   /**

659    * Returns this URL's query, like {@code "abc"} for {@code http://host/?abc}. Most callers should

660    * prefer {@link #queryParameterName} and {@link #queryParameterValue} because these methods offer

661    * direct access to individual query parameters.

662    *

663    * <p><table summary="">

664    *   <tr><th>URL</th><th>{@code query()}</th></tr>

665    *   <tr><td>{@code http://host/}</td><td>null</td></tr>

666    *   <tr><td>{@code http://host/?}</td><td>{@code ""}</td></tr>

667    *   <tr><td>{@code http://host/?a=apple&k=key+lime}</td><td>{@code "a=apple&k=key

668    *       lime"}</td></tr>

669    *   <tr><td>{@code http://host/?a=apple&a=apricot}</td><td>{@code "a=apple&a=apricot"}</td></tr>

670    *   <tr><td>{@code http://host/?a=apple&b}</td><td>{@code "a=apple&b"}</td></tr>

671    * </table>

672    */

673   public @Nullable String query() {

674     if (queryNamesAndValues == null) return null; // No query.

675     StringBuilder result = new StringBuilder();

676     namesAndValuesToQueryString(result, queryNamesAndValues);

677     return result.toString();

678   }

679 

680   /**

681    * Returns the number of query parameters in this URL, like 2 for {@code

682    * http://host/?a=apple&b=banana}. If this URL has no query this returns 0. Otherwise it returns

683    * one more than the number of {@code "&"} separators in the query.

684    *

685    * <p><table summary="">

686    *   <tr><th>URL</th><th>{@code querySize()}</th></tr>

687    *   <tr><td>{@code http://host/}</td><td>{@code 0}</td></tr>

688    *   <tr><td>{@code http://host/?}</td><td>{@code 1}</td></tr>

689    *   <tr><td>{@code http://host/?a=apple&k=key+lime}</td><td>{@code 2}</td></tr>

690    *   <tr><td>{@code http://host/?a=apple&a=apricot}</td><td>{@code 2}</td></tr>

691    *   <tr><td>{@code http://host/?a=apple&b}</td><td>{@code 2}</td></tr>

692    * </table>

693    */

694   public int querySize() {

695     return queryNamesAndValues != null ? queryNamesAndValues.size() / 2 : 0;

696   }

697 

698   /**

699    * Returns the first query parameter named {@code name} decoded using UTF-8, or null if there is

700    * no such query parameter.

701    *

702    * <p><table summary="">

703    *   <tr><th>URL</th><th>{@code queryParameter("a")}</th></tr>

704    *   <tr><td>{@code http://host/}</td><td>null</td></tr>

705    *   <tr><td>{@code http://host/?}</td><td>null</td></tr>

706    *   <tr><td>{@code http://host/?a=apple&k=key+lime}</td><td>{@code "apple"}</td></tr>

707    *   <tr><td>{@code http://host/?a=apple&a=apricot}</td><td>{@code "apple"}</td></tr>

708    *   <tr><td>{@code http://host/?a=apple&b}</td><td>{@code "apple"}</td></tr>

709    * </table>

710    */

711   public @Nullable String queryParameter(String name) {

712     if (queryNamesAndValues == null) return null;

713     for (int i = 0, size = queryNamesAndValues.size(); i < size; i += 2) {

714       if (name.equals(queryNamesAndValues.get(i))) {

715         return queryNamesAndValues.get(i + 1);

716       }

717     }

718     return null;

719   }

720 

721   /**

722    * Returns the distinct query parameter names in this URL, like {@code ["a", "b"]} for {@code

723    * http://host/?a=apple&b=banana}. If this URL has no query this returns the empty set.

724    *

725    * <p><table summary="">

726    *   <tr><th>URL</th><th>{@code queryParameterNames()}</th></tr>

727    *   <tr><td>{@code http://host/}</td><td>{@code []}</td></tr>

728    *   <tr><td>{@code http://host/?}</td><td>{@code [""]}</td></tr>

729    *   <tr><td>{@code http://host/?a=apple&k=key+lime}</td><td>{@code ["a", "k"]}</td></tr>

730    *   <tr><td>{@code http://host/?a=apple&a=apricot}</td><td>{@code ["a"]}</td></tr>

731    *   <tr><td>{@code http://host/?a=apple&b}</td><td>{@code ["a", "b"]}</td></tr>

732    * </table>

733    */

734   public Set<String> queryParameterNames() {

735     if (queryNamesAndValues == null) return Collections.emptySet();

736     Set<String> result = new LinkedHashSet<>();

737     for (int i = 0, size = queryNamesAndValues.size(); i < size; i += 2) {

738       result.add(queryNamesAndValues.get(i));

739     }

740     return Collections.unmodifiableSet(result);

741   }

742 

743   /**

744    * Returns all values for the query parameter {@code name} ordered by their appearance in this

745    * URL. For example this returns {@code ["banana"]} for {@code queryParameterValue("b")} on {@code

746    * http://host/?a=apple&b=banana}.

747    *

748    * <p><table summary="">

749    *   <tr><th>URL</th><th>{@code queryParameterValues("a")}</th><th>{@code

750    *       queryParameterValues("b")}</th></tr>

751    *   <tr><td>{@code http://host/}</td><td>{@code []}</td><td>{@code []}</td></tr>

752    *   <tr><td>{@code http://host/?}</td><td>{@code []}</td><td>{@code []}</td></tr>

753    *   <tr><td>{@code http://host/?a=apple&k=key+lime}</td><td>{@code ["apple"]}</td><td>{@code

754    *       []}</td></tr>

755    *   <tr><td>{@code http://host/?a=apple&a=apricot}</td><td>{@code ["apple",

756    *       "apricot"]}</td><td>{@code []}</td></tr>

757    *   <tr><td>{@code http://host/?a=apple&b}</td><td>{@code ["apple"]}</td><td>{@code

758    *       [null]}</td></tr>

759    * </table>

760    */

761   public List<String> queryParameterValues(String name) {

762     if (queryNamesAndValues == null) return Collections.emptyList();

763     List<String> result = new ArrayList<>();

764     for (int i = 0, size = queryNamesAndValues.size(); i < size; i += 2) {

765       if (name.equals(queryNamesAndValues.get(i))) {

766         result.add(queryNamesAndValues.get(i + 1));

767       }

768     }

769     return Collections.unmodifiableList(result);

770   }

771 

772   /**

773    * Returns the name of the query parameter at {@code index}. For example this returns {@code "a"}

774    * for {@code queryParameterName(0)} on {@code http://host/?a=apple&b=banana}. This throws if

775    * {@code index} is not less than the {@linkplain #querySize query size}.

776    *

777    * <p><table summary="">

778    *   <tr><th>URL</th><th>{@code queryParameterName(0)}</th><th>{@code

779    *       queryParameterName(1)}</th></tr>

780    *   <tr><td>{@code http://host/}</td><td>exception</td><td>exception</td></tr>

781    *   <tr><td>{@code http://host/?}</td><td>{@code ""}</td><td>exception</td></tr>

782    *   <tr><td>{@code http://host/?a=apple&k=key+lime}</td><td>{@code "a"}</td><td>{@code

783    *       "k"}</td></tr>

784    *   <tr><td>{@code http://host/?a=apple&a=apricot}</td><td>{@code "a"}</td><td>{@code

785    *       "a"}</td></tr>

786    *   <tr><td>{@code http://host/?a=apple&b}</td><td>{@code "a"}</td><td>{@code "b"}</td></tr>

787    * </table>

788    */

789   public String queryParameterName(int index) {

790     if (queryNamesAndValues == null) throw new IndexOutOfBoundsException();

791     return queryNamesAndValues.get(index * 2);

792   }

793 

794   /**

795    * Returns the value of the query parameter at {@code index}. For example this returns {@code

796    * "apple"} for {@code queryParameterName(0)} on {@code http://host/?a=apple&b=banana}. This

797    * throws if {@code index} is not less than the {@linkplain #querySize query size}.

798    *

799    * <p><table summary="">

800    *   <tr><th>URL</th><th>{@code queryParameterValue(0)}</th><th>{@code

801    *       queryParameterValue(1)}</th></tr>

802    *   <tr><td>{@code http://host/}</td><td>exception</td><td>exception</td></tr>

803    *   <tr><td>{@code http://host/?}</td><td>null</td><td>exception</td></tr>

804    *   <tr><td>{@code http://host/?a=apple&k=key+lime}</td><td>{@code "apple"}</td><td>{@code

805    *       "key lime"}</td></tr>

806    *   <tr><td>{@code http://host/?a=apple&a=apricot}</td><td>{@code "apple"}</td><td>{@code

807    *       "apricot"}</td></tr>

808    *   <tr><td>{@code http://host/?a=apple&b}</td><td>{@code "apple"}</td><td>null</td></tr>

809    * </table>

810    */

811   public String queryParameterValue(int index) {

812     if (queryNamesAndValues == null) throw new IndexOutOfBoundsException();

813     return queryNamesAndValues.get(index * 2 + 1);

814   }

815 

816   /**

817    * Returns this URL's encoded fragment, like {@code "abc"} for {@code http://host/#abc}. This

818    * returns null if the URL has no fragment.

819    *

820    * <p><table summary="">

821    *   <tr><th>URL</th><th>{@code encodedFragment()}</th></tr>

822    *   <tr><td>{@code http://host/}</td><td>null</td></tr>

823    *   <tr><td>{@code http://host/#}</td><td>{@code ""}</td></tr>

824    *   <tr><td>{@code http://host/#abc}</td><td>{@code "abc"}</td></tr>

825    *   <tr><td>{@code http://host/#abc|def}</td><td>{@code "abc|def"}</td></tr>

826    * </table>

827    */

828   public @Nullable String encodedFragment() {

829     if (fragment == null) return null;

830     int fragmentStart = url.indexOf('#') + 1;

831     return url.substring(fragmentStart);

832   }

833 

834   /**

835    * Returns this URL's fragment, like {@code "abc"} for {@code http://host/#abc}. This returns null

836    * if the URL has no fragment.

837    *

838    * <p><table summary="">

839    *   <tr><th>URL</th><th>{@code fragment()}</th></tr>

840    *   <tr><td>{@code http://host/}</td><td>null</td></tr>

841    *   <tr><td>{@code http://host/#}</td><td>{@code ""}</td></tr>

842    *   <tr><td>{@code http://host/#abc}</td><td>{@code "abc"}</td></tr>

843    *   <tr><td>{@code http://host/#abc|def}</td><td>{@code "abc|def"}</td></tr>

844    * </table>

845    */

846   public @Nullable String fragment() {

847     return fragment;

848   }

849 

850   /**

851    * Returns a string with containing this URL with its username, password, query, and fragment

852    * stripped, and its path replaced with {@code /...}. For example, redacting {@code

853    * http://username:password@example.com/path} returns {@code http://example.com/...}.

854    */

855   public String redact() {

856     return newBuilder("/...")

857         .username("")

858         .password("")

859         .build()

860         .toString();

861   }

862 

863   /**

864    * Returns the URL that would be retrieved by following {@code link} from this URL, or null if

865    * the resulting URL is not well-formed.

866    */

867   public @Nullable HttpUrl resolve(String link) {

868     Builder builder = newBuilder(link);

869     return builder != null ? builder.build() : null;

870   }

871 

872   public Builder newBuilder() {

873     Builder result = new Builder();

874     result.scheme = scheme;

875     result.encodedUsername = encodedUsername();

876     result.encodedPassword = encodedPassword();

877     result.host = host;

878     // If we're set to a default port, unset it in case of a scheme change.

879     result.port = port != defaultPort(scheme) ? port : -1;

880     result.encodedPathSegments.clear();

881     result.encodedPathSegments.addAll(encodedPathSegments());

882     result.encodedQuery(encodedQuery());

883     result.encodedFragment = encodedFragment();

884     return result;

885   }

886 

887   /**

888    * Returns a builder for the URL that would be retrieved by following {@code link} from this URL,

889    * or null if the resulting URL is not well-formed.

890    */

891   public @Nullable Builder newBuilder(String link) {

892     try {

893       return new Builder().parse(this, link);

894     } catch (IllegalArgumentException ignored) {

895       return null;

896     }

897   }

898 

899   /**

900    * Returns a new {@code HttpUrl} representing {@code url} if it is a well-formed HTTP or HTTPS

901    * URL, or null if it isn't.

902    */

903   public static @Nullable HttpUrl parse(String url) {

904     try {

905       return get(url);

906     } catch (IllegalArgumentException ignored) {

907       return null;

908     }

909   }

910 

911   /**

912    * Returns a new {@code HttpUrl} representing {@code url}.

913    *

914    * @throws IllegalArgumentException If {@code url} is not a well-formed HTTP or HTTPS URL.

915    */

916   public static HttpUrl get(String url) {

917     return new Builder().parse(null, url).build();

918   }

919 

920   /**

921    * Returns an {@link HttpUrl} for {@code url} if its protocol is {@code http} or {@code https}, or

922    * null if it has any other protocol.

923    */

924   public static @Nullable HttpUrl get(URL url) {

925     return parse(url.toString());

926   }

927 

928   public static @Nullable HttpUrl get(URI uri) {

929     return parse(uri.toString());

930   }

931 

932   @Override public boolean equals(@Nullable Object other) {

933     return other instanceof HttpUrl && ((HttpUrl) other).url.equals(url);

934   }

935 

936   @Override public int hashCode() {

937     return url.hashCode();

938   }

939 

940   @Override public String toString() {

941     return url;

942   }

943 

944   /**

945    * Returns the domain name of this URL's {@link #host()} that is one level beneath the public

946    * suffix by consulting the <a href="https://publicsuffix.org">public suffix list</a>. Returns

947    * null if this URL's {@link #host()} is an IP address or is considered a public suffix by the

948    * public suffix list.

949    *

950    * <p>In general this method <strong>should not</strong> be used to test whether a domain is valid

951    * or routable. Instead, DNS is the recommended source for that information.

952    *

953    * <p><table summary="">

954    *   <tr><th>URL</th><th>{@code topPrivateDomain()}</th></tr>

955    *   <tr><td>{@code http://google.com}</td><td>{@code "google.com"}</td></tr>

956    *   <tr><td>{@code http://adwords.google.co.uk}</td><td>{@code "google.co.uk"}</td></tr>

957    *   <tr><td>{@code http://square}</td><td>null</td></tr>

958    *   <tr><td>{@code http://co.uk}</td><td>null</td></tr>

959    *   <tr><td>{@code http://localhost}</td><td>null</td></tr>

960    *   <tr><td>{@code http://127.0.0.1}</td><td>null</td></tr>

961    * </table>

962    */

963   public @Nullable String topPrivateDomain() {

964     if (verifyAsIpAddress(host)) return null;

965     return PublicSuffixDatabase.get().getEffectiveTldPlusOne(host);

966   }

967 

968   public static final class Builder {

969     @Nullable String scheme;

970     String encodedUsername = "";

971     String encodedPassword = "";

972     @Nullable String host;

973     int port = -1;

974     final List<String> encodedPathSegments = new ArrayList<>();

975     @Nullable List<String> encodedQueryNamesAndValues;

976     @Nullable String encodedFragment;

977 

978     public Builder() {

979       encodedPathSegments.add(""); // The default path is '/' which needs a trailing space.

980     }

981 

982     public Builder scheme(String scheme) {

983       if (scheme == null) {

984         throw new NullPointerException("scheme == null");

985       } else if (scheme.equalsIgnoreCase("http")) {

986         this.scheme = "http";

987       } else if (scheme.equalsIgnoreCase("https")) {

988         this.scheme = "https";

989       } else {

990         throw new IllegalArgumentException("unexpected scheme: " + scheme);

991       }

992       return this;

993     }

994 

995     public Builder username(String username) {

996       if (username == null) throw new NullPointerException("username == null");

997       this.encodedUsername = canonicalize(username, USERNAME_ENCODE_SET, false, false, false, true);

998       return this;

999     }

1000 

1001     public Builder encodedUsername(String encodedUsername) {

1002       if (encodedUsername == null) throw new NullPointerException("encodedUsername == null");

1003       this.encodedUsername = canonicalize(

1004           encodedUsername, USERNAME_ENCODE_SET, true, false, false, true);

1005       return this;

1006     }

1007 

1008     public Builder password(String password) {

1009       if (password == null) throw new NullPointerException("password == null");

1010       this.encodedPassword = canonicalize(password, PASSWORD_ENCODE_SET, false, false, false, true);

1011       return this;

1012     }

1013 

1014     public Builder encodedPassword(String encodedPassword) {

1015       if (encodedPassword == null) throw new NullPointerException("encodedPassword == null");

1016       this.encodedPassword = canonicalize(

1017           encodedPassword, PASSWORD_ENCODE_SET, true, false, false, true);

1018       return this;

1019     }

1020 

1021     /**

1022      * @param host either a regular hostname, International Domain Name, IPv4 address, or IPv6

1023      * address.

1024      */

1025     public Builder host(String host) {

1026       if (host == null) throw new NullPointerException("host == null");

1027       String encoded = canonicalizeHost(host, 0, host.length());

1028       if (encoded == null) throw new IllegalArgumentException("unexpected host: " + host);

1029       this.host = encoded;

1030       return this;

1031     }

1032 

1033     public Builder port(int port) {

1034       if (port <= 0 || port > 65535) throw new IllegalArgumentException("unexpected port: " + port);

1035       this.port = port;

1036       return this;

1037     }

1038 

1039     int effectivePort() {

1040       return port != -1 ? port : defaultPort(scheme);

1041     }

1042 

1043     public Builder addPathSegment(String pathSegment) {

1044       if (pathSegment == null) throw new NullPointerException("pathSegment == null");

1045       push(pathSegment, 0, pathSegment.length(), false, false);

1046       return this;

1047     }

1048 

1049     /**

1050      * Adds a set of path segments separated by a slash (either {@code \} or {@code /}). If

1051      * {@code pathSegments} starts with a slash, the resulting URL will have empty path segment.

1052      */

1053     public Builder addPathSegments(String pathSegments) {

1054       if (pathSegments == null) throw new NullPointerException("pathSegments == null");

1055       return addPathSegments(pathSegments, false);

1056     }

1057 

1058     public Builder addEncodedPathSegment(String encodedPathSegment) {

1059       if (encodedPathSegment == null) {

1060         throw new NullPointerException("encodedPathSegment == null");

1061       }

1062       push(encodedPathSegment, 0, encodedPathSegment.length(), false, true);

1063       return this;

1064     }

1065 

1066     /**

1067      * Adds a set of encoded path segments separated by a slash (either {@code \} or {@code /}). If

1068      * {@code encodedPathSegments} starts with a slash, the resulting URL will have empty path

1069      * segment.

1070      */

1071     public Builder addEncodedPathSegments(String encodedPathSegments) {

1072       if (encodedPathSegments == null) {

1073         throw new NullPointerException("encodedPathSegments == null");

1074       }

1075       return addPathSegments(encodedPathSegments, true);

1076     }

1077 

1078     private Builder addPathSegments(String pathSegments, boolean alreadyEncoded) {

1079       int offset = 0;

1080       do {

1081         int segmentEnd = delimiterOffset(pathSegments, offset, pathSegments.length(), "/\\");

1082         boolean addTrailingSlash = segmentEnd < pathSegments.length();

1083         push(pathSegments, offset, segmentEnd, addTrailingSlash, alreadyEncoded);

1084         offset = segmentEnd + 1;

1085       } while (offset <= pathSegments.length());

1086       return this;

1087     }

1088 

1089     public Builder setPathSegment(int index, String pathSegment) {

1090       if (pathSegment == null) throw new NullPointerException("pathSegment == null");

1091       String canonicalPathSegment = canonicalize(pathSegment, 0, pathSegment.length(),

1092           PATH_SEGMENT_ENCODE_SET, false, false, false, true, null);

1093       if (isDot(canonicalPathSegment) || isDotDot(canonicalPathSegment)) {

1094         throw new IllegalArgumentException("unexpected path segment: " + pathSegment);

1095       }

1096       encodedPathSegments.set(index, canonicalPathSegment);

1097       return this;

1098     }

1099 

1100     public Builder setEncodedPathSegment(int index, String encodedPathSegment) {

1101       if (encodedPathSegment == null) {

1102         throw new NullPointerException("encodedPathSegment == null");

1103       }

1104       String canonicalPathSegment = canonicalize(encodedPathSegment, 0, encodedPathSegment.length(),

1105           PATH_SEGMENT_ENCODE_SET, true, false, false, true, null);

1106       encodedPathSegments.set(index, canonicalPathSegment);

1107       if (isDot(canonicalPathSegment) || isDotDot(canonicalPathSegment)) {

1108         throw new IllegalArgumentException("unexpected path segment: " + encodedPathSegment);

1109       }

1110       return this;

1111     }

1112 

1113     public Builder removePathSegment(int index) {

1114       encodedPathSegments.remove(index);

1115       if (encodedPathSegments.isEmpty()) {

1116         encodedPathSegments.add(""); // Always leave at least one '/'.

1117       }

1118       return this;

1119     }

1120 

1121     public Builder encodedPath(String encodedPath) {

1122       if (encodedPath == null) throw new NullPointerException("encodedPath == null");

1123       if (!encodedPath.startsWith("/")) {

1124         throw new IllegalArgumentException("unexpected encodedPath: " + encodedPath);

1125       }

1126       resolvePath(encodedPath, 0, encodedPath.length());

1127       return this;

1128     }

1129 

1130     public Builder query(@Nullable String query) {

1131       this.encodedQueryNamesAndValues = query != null

1132           ? queryStringToNamesAndValues(canonicalize(

1133           query, QUERY_ENCODE_SET, false, false, true, true))

1134           : null;

1135       return this;

1136     }

1137 

1138     public Builder encodedQuery(@Nullable String encodedQuery) {

1139       this.encodedQueryNamesAndValues = encodedQuery != null

1140           ? queryStringToNamesAndValues(

1141           canonicalize(encodedQuery, QUERY_ENCODE_SET, true, false, true, true))

1142           : null;

1143       return this;

1144     }

1145 

1146     /** Encodes the query parameter using UTF-8 and adds it to this URL's query string. */

1147     public Builder addQueryParameter(String name, @Nullable String value) {

1148       if (name == null) throw new NullPointerException("name == null");

1149       if (encodedQueryNamesAndValues == null) encodedQueryNamesAndValues = new ArrayList<>();

1150       encodedQueryNamesAndValues.add(

1151           canonicalize(name, QUERY_COMPONENT_ENCODE_SET, false, false, true, true));

1152       encodedQueryNamesAndValues.add(value != null

1153           ? canonicalize(value, QUERY_COMPONENT_ENCODE_SET, false, false, true, true)

1154           : null);

1155       return this;

1156     }

1157 

1158     /** Adds the pre-encoded query parameter to this URL's query string. */

1159     public Builder addEncodedQueryParameter(String encodedName, @Nullable String encodedValue) {

1160       if (encodedName == null) throw new NullPointerException("encodedName == null");

1161       if (encodedQueryNamesAndValues == null) encodedQueryNamesAndValues = new ArrayList<>();

1162       encodedQueryNamesAndValues.add(

1163           canonicalize(encodedName, QUERY_COMPONENT_REENCODE_SET, true, false, true, true));

1164       encodedQueryNamesAndValues.add(encodedValue != null

1165           ? canonicalize(encodedValue, QUERY_COMPONENT_REENCODE_SET, true, false, true, true)

1166           : null);

1167       return this;

1168     }

1169 

1170     public Builder setQueryParameter(String name, @Nullable String value) {

1171       removeAllQueryParameters(name);

1172       addQueryParameter(name, value);

1173       return this;

1174     }

1175 

1176     public Builder setEncodedQueryParameter(String encodedName, @Nullable String encodedValue) {

1177       removeAllEncodedQueryParameters(encodedName);

1178       addEncodedQueryParameter(encodedName, encodedValue);

1179       return this;

1180     }

1181 

1182     public Builder removeAllQueryParameters(String name) {

1183       if (name == null) throw new NullPointerException("name == null");

1184       if (encodedQueryNamesAndValues == null) return this;

1185       String nameToRemove = canonicalize(

1186           name, QUERY_COMPONENT_ENCODE_SET, false, false, true, true);

1187       removeAllCanonicalQueryParameters(nameToRemove);

1188       return this;

1189     }

1190 

1191     public Builder removeAllEncodedQueryParameters(String encodedName) {

1192       if (encodedName == null) throw new NullPointerException("encodedName == null");

1193       if (encodedQueryNamesAndValues == null) return this;

1194       removeAllCanonicalQueryParameters(

1195           canonicalize(encodedName, QUERY_COMPONENT_REENCODE_SET, true, false, true, true));

1196       return this;

1197     }

1198 

1199     private void removeAllCanonicalQueryParameters(String canonicalName) {

1200       for (int i = encodedQueryNamesAndValues.size() - 2; i >= 0; i -= 2) {

1201         if (canonicalName.equals(encodedQueryNamesAndValues.get(i))) {

1202           encodedQueryNamesAndValues.remove(i + 1);

1203           encodedQueryNamesAndValues.remove(i);

1204           if (encodedQueryNamesAndValues.isEmpty()) {

1205             encodedQueryNamesAndValues = null;

1206             return;

1207           }

1208         }

1209       }

1210     }

1211 

1212     public Builder fragment(@Nullable String fragment) {

1213       this.encodedFragment = fragment != null

1214           ? canonicalize(fragment, FRAGMENT_ENCODE_SET, false, false, false, false)

1215           : null;

1216       return this;

1217     }

1218 

1219     public Builder encodedFragment(@Nullable String encodedFragment) {

1220       this.encodedFragment = encodedFragment != null

1221           ? canonicalize(encodedFragment, FRAGMENT_ENCODE_SET, true, false, false, false)

1222           : null;

1223       return this;

1224     }

1225 

1226     /**

1227      * Re-encodes the components of this URL so that it satisfies (obsolete) RFC 2396, which is

1228      * particularly strict for certain components.

1229      */

1230     Builder reencodeForUri() {

1231       for (int i = 0, size = encodedPathSegments.size(); i < size; i++) {

1232         String pathSegment = encodedPathSegments.get(i);

1233         encodedPathSegments.set(i,

1234             canonicalize(pathSegment, PATH_SEGMENT_ENCODE_SET_URI, true, true, false, true));

1235       }

1236       if (encodedQueryNamesAndValues != null) {

1237         for (int i = 0, size = encodedQueryNamesAndValues.size(); i < size; i++) {

1238           String component = encodedQueryNamesAndValues.get(i);

1239           if (component != null) {

1240             encodedQueryNamesAndValues.set(i,

1241                 canonicalize(component, QUERY_COMPONENT_ENCODE_SET_URI, true, true, true, true));

1242           }

1243         }

1244       }

1245       if (encodedFragment != null) {

1246         encodedFragment = canonicalize(

1247             encodedFragment, FRAGMENT_ENCODE_SET_URI, true, true, false, false);

1248       }

1249       return this;

1250     }

1251 

1252     public HttpUrl build() {

1253       if (scheme == null) throw new IllegalStateException("scheme == null");

1254       if (host == null) throw new IllegalStateException("host == null");

1255       return new HttpUrl(this);

1256     }

1257 

1258     @Override public String toString() {

1259       StringBuilder result = new StringBuilder();

1260       if (scheme != null) {

1261         result.append(scheme);

1262         result.append("://");

1263       } else {

1264         result.append("//");

1265       }

1266 

1267       if (!encodedUsername.isEmpty() || !encodedPassword.isEmpty()) {

1268         result.append(encodedUsername);

1269         if (!encodedPassword.isEmpty()) {

1270           result.append(':');

1271           result.append(encodedPassword);

1272         }

1273         result.append('@');

1274       }

1275 

1276       if (host != null) {

1277         if (host.indexOf(':') != -1) {

1278           // Host is an IPv6 address.

1279           result.append('[');

1280           result.append(host);

1281           result.append(']');

1282         } else {

1283           result.append(host);

1284         }

1285       }

1286 

1287       if (port != -1 || scheme != null) {

1288         int effectivePort = effectivePort();

1289         if (scheme == null || effectivePort != defaultPort(scheme)) {

1290           result.append(':');

1291           result.append(effectivePort);

1292         }

1293       }

1294 

1295       pathSegmentsToString(result, encodedPathSegments);

1296 

1297       if (encodedQueryNamesAndValues != null) {

1298         result.append('?');

1299         namesAndValuesToQueryString(result, encodedQueryNamesAndValues);

1300       }

1301 

1302       if (encodedFragment != null) {

1303         result.append('#');

1304         result.append(encodedFragment);

1305       }

1306 

1307       return result.toString();

1308     }

1309 

1310     static final String INVALID_HOST = "Invalid URL host";

1311 

1312     Builder parse(@Nullable HttpUrl base, String input) {

1313       int pos = skipLeadingAsciiWhitespace(input, 0, input.length());

1314       int limit = skipTrailingAsciiWhitespace(input, pos, input.length());

1315 

1316       // Scheme.

1317       int schemeDelimiterOffset = schemeDelimiterOffset(input, pos, limit);

1318       if (schemeDelimiterOffset != -1) {

1319         if (input.regionMatches(true, pos, "https:", 0, 6)) {

1320           this.scheme = "https";

1321           pos += "https:".length();

1322         } else if (input.regionMatches(true, pos, "http:", 0, 5)) {

1323           this.scheme = "http";

1324           pos += "http:".length();

1325         } else {

1326           throw new IllegalArgumentException("Expected URL scheme 'http' or 'https' but was '"

1327               + input.substring(0, schemeDelimiterOffset) + "'");

1328         }

1329       } else if (base != null) {

1330         this.scheme = base.scheme;

1331       } else {

1332         throw new IllegalArgumentException(

1333             "Expected URL scheme 'http' or 'https' but no colon was found");

1334       }

1335 

1336       // Authority.

1337       boolean hasUsername = false;

1338       boolean hasPassword = false;

1339       int slashCount = slashCount(input, pos, limit);

1340       if (slashCount >= 2 || base == null || !base.scheme.equals(this.scheme)) {

1341         // Read an authority if either:

1342         //  * The input starts with 2 or more slashes. These follow the scheme if it exists.

1343         //  * The input scheme exists and is different from the base URL's scheme.

1344         //

1345         // The structure of an authority is:

1346         //   username:password@host:port

1347         //

1348         // Username, password and port are optional.

1349         //   [username[:password]@]host[:port]

1350         pos += slashCount;

1351         authority:

1352         while (true) {

1353           int componentDelimiterOffset = delimiterOffset(input, pos, limit, "@/\\?#");

1354           int c = componentDelimiterOffset != limit

1355               ? input.charAt(componentDelimiterOffset)

1356               : -1;

1357           switch (c) {

1358             case '@':

1359               // User info precedes.

1360               if (!hasPassword) {

1361                 int passwordColonOffset = delimiterOffset(

1362                     input, pos, componentDelimiterOffset, ':');

1363                 String canonicalUsername = canonicalize(input, pos, passwordColonOffset,

1364                     USERNAME_ENCODE_SET, true, false, false, true, null);

1365                 this.encodedUsername = hasUsername

1366                     ? this.encodedUsername + "%40" + canonicalUsername

1367                     : canonicalUsername;

1368                 if (passwordColonOffset != componentDelimiterOffset) {

1369                   hasPassword = true;

1370                   this.encodedPassword = canonicalize(input, passwordColonOffset + 1,

1371                       componentDelimiterOffset, PASSWORD_ENCODE_SET, true, false, false, true,

1372                       null);

1373                 }

1374                 hasUsername = true;

1375               } else {

1376                 this.encodedPassword = this.encodedPassword + "%40" + canonicalize(input, pos,

1377                     componentDelimiterOffset, PASSWORD_ENCODE_SET, true, false, false, true, null);

1378               }

1379               pos = componentDelimiterOffset + 1;

1380               break;

1381 

1382             case -1:

1383             case '/':

1384             case '\\':

1385             case '?':

1386             case '#':

1387               // Host info precedes.

1388               int portColonOffset = portColonOffset(input, pos, componentDelimiterOffset);

1389               if (portColonOffset + 1 < componentDelimiterOffset) {

1390                 host = canonicalizeHost(input, pos, portColonOffset);

1391                 port = parsePort(input, portColonOffset + 1, componentDelimiterOffset);

1392                 if (port == -1) {

1393                   throw new IllegalArgumentException("Invalid URL port: \""

1394                       + input.substring(portColonOffset + 1, componentDelimiterOffset) + '"');

1395                 }

1396               } else {

1397                 host = canonicalizeHost(input, pos, portColonOffset);

1398                 port = defaultPort(scheme);

1399               }

1400               if (host == null) {

1401                 throw new IllegalArgumentException(

1402                     INVALID_HOST + ": \"" + input.substring(pos, portColonOffset) + '"');

1403               }

1404               pos = componentDelimiterOffset;

1405               break authority;

1406           }

1407         }

1408       } else {

1409         // This is a relative link. Copy over all authority components. Also maybe the path & query.

1410         this.encodedUsername = base.encodedUsername();

1411         this.encodedPassword = base.encodedPassword();

1412         this.host = base.host;

1413         this.port = base.port;

1414         this.encodedPathSegments.clear();

1415         this.encodedPathSegments.addAll(base.encodedPathSegments());

1416         if (pos == limit || input.charAt(pos) == '#') {

1417           encodedQuery(base.encodedQuery());

1418         }

1419       }

1420 

1421       // Resolve the relative path.

1422       int pathDelimiterOffset = delimiterOffset(input, pos, limit, "?#");

1423       resolvePath(input, pos, pathDelimiterOffset);

1424       pos = pathDelimiterOffset;

1425 

1426       // Query.

1427       if (pos < limit && input.charAt(pos) == '?') {

1428         int queryDelimiterOffset = delimiterOffset(input, pos, limit, '#');

1429         this.encodedQueryNamesAndValues = queryStringToNamesAndValues(canonicalize(

1430             input, pos + 1, queryDelimiterOffset, QUERY_ENCODE_SET, true, false, true, true, null));

1431         pos = queryDelimiterOffset;

1432       }

1433 

1434       // Fragment.

1435       if (pos < limit && input.charAt(pos) == '#') {

1436         this.encodedFragment = canonicalize(

1437             input, pos + 1, limit, FRAGMENT_ENCODE_SET, true, false, false, false, null);

1438       }

1439 

1440       return this;

1441     }

1442 

1443     private void resolvePath(String input, int pos, int limit) {

1444       // Read a delimiter.

1445       if (pos == limit) {

1446         // Empty path: keep the base path as-is.

1447         return;

1448       }

1449       char c = input.charAt(pos);

1450       if (c == '/' || c == '\\') {

1451         // Absolute path: reset to the default "/".

1452         encodedPathSegments.clear();

1453         encodedPathSegments.add("");

1454         pos++;

1455       } else {

1456         // Relative path: clear everything after the last '/'.

1457         encodedPathSegments.set(encodedPathSegments.size() - 1, "");

1458       }

1459 

1460       // Read path segments.

1461       for (int i = pos; i < limit; ) {

1462         int pathSegmentDelimiterOffset = delimiterOffset(input, i, limit, "/\\");

1463         boolean segmentHasTrailingSlash = pathSegmentDelimiterOffset < limit;

1464         push(input, i, pathSegmentDelimiterOffset, segmentHasTrailingSlash, true);

1465         i = pathSegmentDelimiterOffset;

1466         if (segmentHasTrailingSlash) i++;

1467       }

1468     }

1469 

1470     /** Adds a path segment. If the input is ".." or equivalent, this pops a path segment. */

1471     private void push(String input, int pos, int limit, boolean addTrailingSlash,

1472         boolean alreadyEncoded) {

1473       String segment = canonicalize(

1474           input, pos, limit, PATH_SEGMENT_ENCODE_SET, alreadyEncoded, false, false, true, null);

1475       if (isDot(segment)) {

1476         return; // Skip '.' path segments.

1477       }

1478       if (isDotDot(segment)) {

1479         pop();

1480         return;

1481       }

1482       if (encodedPathSegments.get(encodedPathSegments.size() - 1).isEmpty()) {

1483         encodedPathSegments.set(encodedPathSegments.size() - 1, segment);

1484       } else {

1485         encodedPathSegments.add(segment);

1486       }

1487       if (addTrailingSlash) {

1488         encodedPathSegments.add("");

1489       }

1490     }

1491 

1492     private boolean isDot(String input) {

1493       return input.equals(".") || input.equalsIgnoreCase("%2e");

1494     }

1495 

1496     private boolean isDotDot(String input) {

1497       return input.equals("..")

1498           || input.equalsIgnoreCase("%2e.")

1499           || input.equalsIgnoreCase(".%2e")

1500           || input.equalsIgnoreCase("%2e%2e");

1501     }

1502 

1503     /**

1504      * Removes a path segment. When this method returns the last segment is always "", which means

1505      * the encoded path will have a trailing '/'.

1506      *

1507      * <p>Popping "/a/b/c/" yields "/a/b/". In this case the list of path segments goes from ["a",

1508      * "b", "c", ""] to ["a", "b", ""].

1509      *

1510      * <p>Popping "/a/b/c" also yields "/a/b/". The list of path segments goes from ["a", "b", "c"]

1511      * to ["a", "b", ""].

1512      */

1513     private void pop() {

1514       String removed = encodedPathSegments.remove(encodedPathSegments.size() - 1);

1515 

1516       // Make sure the path ends with a '/' by either adding an empty string or clearing a segment.

1517       if (removed.isEmpty() && !encodedPathSegments.isEmpty()) {

1518         encodedPathSegments.set(encodedPathSegments.size() - 1, "");

1519       } else {

1520         encodedPathSegments.add("");

1521       }

1522     }

1523 

1524     /**

1525      * Returns the index of the ':' in {@code input} that is after scheme characters. Returns -1 if

1526      * {@code input} does not have a scheme that starts at {@code pos}.

1527      */

1528     private static int schemeDelimiterOffset(String input, int pos, int limit) {

1529       if (limit - pos < 2) return -1;

1530 

1531       char c0 = input.charAt(pos);

1532       if ((c0 < 'a' || c0 > 'z') && (c0 < 'A' || c0 > 'Z')) return -1; // Not a scheme start char.

1533 

1534       for (int i = pos + 1; i < limit; i++) {

1535         char c = input.charAt(i);

1536 

1537         if ((c >= 'a' && c <= 'z')

1538             || (c >= 'A' && c <= 'Z')

1539             || (c >= '0' && c <= '9')

1540             || c == '+'

1541             || c == '-'

1542             || c == '.') {

1543           continue; // Scheme character. Keep going.

1544         } else if (c == ':') {

1545           return i; // Scheme prefix!

1546         } else {

1547           return -1; // Non-scheme character before the first ':'.

1548         }

1549       }

1550 

1551       return -1; // No ':'; doesn't start with a scheme.

1552     }

1553 

1554     /** Returns the number of '/' and '\' slashes in {@code input}, starting at {@code pos}. */

1555     private static int slashCount(String input, int pos, int limit) {

1556       int slashCount = 0;

1557       while (pos < limit) {

1558         char c = input.charAt(pos);

1559         if (c == '\\' || c == '/') {

1560           slashCount++;

1561           pos++;

1562         } else {

1563           break;

1564         }

1565       }

1566       return slashCount;

1567     }

1568 

1569     /** Finds the first ':' in {@code input}, skipping characters between square braces "[...]". */

1570     private static int portColonOffset(String input, int pos, int limit) {

1571       for (int i = pos; i < limit; i++) {

1572         switch (input.charAt(i)) {

1573           case '[':

1574             while (++i < limit) {

1575               if (input.charAt(i) == ']') break;

1576             }

1577             break;

1578           case ':':

1579             return i;

1580         }

1581       }

1582       return limit; // No colon.

1583     }

1584 

1585     private static @Nullable String canonicalizeHost(String input, int pos, int limit) {

1586       // Start by percent decoding the host. The WHATWG spec suggests doing this only after we've

1587       // checked for IPv6 square braces. But Chrome does it first, and that's more lenient.

1588       String percentDecoded = percentDecode(input, pos, limit, false);

1589       return Util.canonicalizeHost(percentDecoded);

1590     }

1591 

1592     private static int parsePort(String input, int pos, int limit) {

1593       try {

1594         // Canonicalize the port string to skip '\n' etc.

1595         String portString = canonicalize(input, pos, limit, "", false, false, false, true, null);

1596         int i = Integer.parseInt(portString);

1597         if (i > 0 && i <= 65535) return i;

1598         return -1;

1599       } catch (NumberFormatException e) {

1600         return -1; // Invalid port.

1601       }

1602     }

1603   }

1604 

1605   static String percentDecode(String encoded, boolean plusIsSpace) {

1606     return percentDecode(encoded, 0, encoded.length(), plusIsSpace);

1607   }

1608 

1609   private List<String> percentDecode(List<String> list, boolean plusIsSpace) {

1610     int size = list.size();

1611     List<String> result = new ArrayList<>(size);

1612     for (int i = 0; i < size; i++) {

1613       String s = list.get(i);

1614       result.add(s != null ? percentDecode(s, plusIsSpace) : null);

1615     }

1616     return Collections.unmodifiableList(result);

1617   }

1618 

1619   static String percentDecode(String encoded, int pos, int limit, boolean plusIsSpace) {

1620     for (int i = pos; i < limit; i++) {

1621       char c = encoded.charAt(i);

1622       if (c == '%' || (c == '+' && plusIsSpace)) {

1623         // Slow path: the character at i requires decoding!

1624         Buffer out = new Buffer();

1625         out.writeUtf8(encoded, pos, i);

1626         percentDecode(out, encoded, i, limit, plusIsSpace);

1627         return out.readUtf8();

1628       }

1629     }

1630 

1631     // Fast path: no characters in [pos..limit) required decoding.

1632     return encoded.substring(pos, limit);

1633   }

1634 

1635   static void percentDecode(Buffer out, String encoded, int pos, int limit, boolean plusIsSpace) {

1636     int codePoint;

1637     for (int i = pos; i < limit; i += Character.charCount(codePoint)) {

1638       codePoint = encoded.codePointAt(i);

1639       if (codePoint == '%' && i + 2 < limit) {

1640         int d1 = decodeHexDigit(encoded.charAt(i + 1));

1641         int d2 = decodeHexDigit(encoded.charAt(i + 2));

1642         if (d1 != -1 && d2 != -1) {

1643           out.writeByte((d1 << 4) + d2);

1644           i += 2;

1645           continue;

1646         }

1647       } else if (codePoint == '+' && plusIsSpace) {

1648         out.writeByte(' ');

1649         continue;

1650       }

1651       out.writeUtf8CodePoint(codePoint);

1652     }

1653   }

1654 

1655   static boolean percentEncoded(String encoded, int pos, int limit) {

1656     return pos + 2 < limit

1657         && encoded.charAt(pos) == '%'

1658         && decodeHexDigit(encoded.charAt(pos + 1)) != -1

1659         && decodeHexDigit(encoded.charAt(pos + 2)) != -1;

1660   }

1661 

1662   /**

1663    * Returns a substring of {@code input} on the range {@code [pos..limit)} with the following

1664    * transformations:

1665    * <ul>

1666    *   <li>Tabs, newlines, form feeds and carriage returns are skipped.

1667    *   <li>In queries, ' ' is encoded to '+' and '+' is encoded to "%2B".

1668    *   <li>Characters in {@code encodeSet} are percent-encoded.

1669    *   <li>Control characters and non-ASCII characters are percent-encoded.

1670    *   <li>All other characters are copied without transformation.

1671    * </ul>

1672    *

1673    * @param alreadyEncoded true to leave '%' as-is; false to convert it to '%25'.

1674    * @param strict true to encode '%' if it is not the prefix of a valid percent encoding.

1675    * @param plusIsSpace true to encode '+' as "%2B" if it is not already encoded.

1676    * @param asciiOnly true to encode all non-ASCII codepoints.

1677    * @param charset which charset to use, null equals UTF-8.

1678    */

1679   static String canonicalize(String input, int pos, int limit, String encodeSet,

1680       boolean alreadyEncoded, boolean strict, boolean plusIsSpace, boolean asciiOnly,

1681       @Nullable Charset charset) {

1682     int codePoint;

1683     for (int i = pos; i < limit; i += Character.charCount(codePoint)) {

1684       codePoint = input.codePointAt(i);

1685       if (codePoint < 0x20

1686           || codePoint == 0x7f

1687           || codePoint >= 0x80 && asciiOnly

1688           || encodeSet.indexOf(codePoint) != -1

1689           || codePoint == '%' && (!alreadyEncoded || strict && !percentEncoded(input, i, limit))

1690           || codePoint == '+' && plusIsSpace) {

1691         // Slow path: the character at i requires encoding!

1692         Buffer out = new Buffer();

1693         out.writeUtf8(input, pos, i);

1694         canonicalize(out, input, i, limit, encodeSet, alreadyEncoded, strict, plusIsSpace,

1695             asciiOnly, charset);

1696         return out.readUtf8();

1697       }

1698     }

1699 

1700     // Fast path: no characters in [pos..limit) required encoding.

1701     return input.substring(pos, limit);

1702   }

1703 

1704   static void canonicalize(Buffer out, String input, int pos, int limit, String encodeSet,

1705       boolean alreadyEncoded, boolean strict, boolean plusIsSpace, boolean asciiOnly,

1706       @Nullable Charset charset) {

1707     Buffer encodedCharBuffer = null; // Lazily allocated.

1708     int codePoint;

1709     for (int i = pos; i < limit; i += Character.charCount(codePoint)) {

1710       codePoint = input.codePointAt(i);

1711       if (alreadyEncoded

1712           && (codePoint == '\t' || codePoint == '\n' || codePoint == '\f' || codePoint == '\r')) {

1713         // Skip this character.

1714       } else if (codePoint == '+' && plusIsSpace) {

1715         // Encode '+' as '%2B' since we permit ' ' to be encoded as either '+' or '%20'.

1716         out.writeUtf8(alreadyEncoded ? "+" : "%2B");

1717       } else if (codePoint < 0x20

1718           || codePoint == 0x7f

1719           || codePoint >= 0x80 && asciiOnly

1720           || encodeSet.indexOf(codePoint) != -1

1721           || codePoint == '%' && (!alreadyEncoded || strict && !percentEncoded(input, i, limit))) {

1722         // Percent encode this character.

1723         if (encodedCharBuffer == null) {

1724           encodedCharBuffer = new Buffer();

1725         }

1726 

1727         if (charset == null || charset.equals(UTF_8)) {

1728           encodedCharBuffer.writeUtf8CodePoint(codePoint);

1729         } else {

1730           encodedCharBuffer.writeString(input, i, i + Character.charCount(codePoint), charset);

1731         }

1732 

1733         while (!encodedCharBuffer.exhausted()) {

1734           int b = encodedCharBuffer.readByte() & 0xff;

1735           out.writeByte('%');

1736           out.writeByte(HEX_DIGITS[(b >> 4) & 0xf]);

1737           out.writeByte(HEX_DIGITS[b & 0xf]);

1738         }

1739       } else {

1740         // This character doesn't need encoding. Just copy it over.

1741         out.writeUtf8CodePoint(codePoint);

1742       }

1743     }

1744   }

1745 

1746   static String canonicalize(String input, String encodeSet, boolean alreadyEncoded, boolean strict,

1747       boolean plusIsSpace, boolean asciiOnly, @Nullable Charset charset) {

1748     return canonicalize(input, 0, input.length(), encodeSet, alreadyEncoded, strict, plusIsSpace,

1749         asciiOnly, charset);

1750   }

1751 

1752   static String canonicalize(String input, String encodeSet, boolean alreadyEncoded, boolean strict,

1753       boolean plusIsSpace, boolean asciiOnly) {

1754    return canonicalize(

1755         input, 0, input.length(), encodeSet, alreadyEncoded, strict, plusIsSpace, asciiOnly, null);

1756   }

1757 }

1758