1 /*
2
3 Licensed to the Apache Software Foundation (ASF) under one or more
4 contributor license agreements. See the NOTICE file distributed with
5 this work for additional information regarding copyright ownership.
6 The ASF licenses this file to You under the Apache License, Version 2.0
7 (the "License"); you may not use this file except in compliance with
8 the License. You may obtain a copy of the License at
9
10 http://www.apache.org/licenses/LICENSE-2.0
11
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17
18 */
19 package org.apache.batik.util;
20
21 import java.net.MalformedURLException;
22 import java.net.URL;
23
24
25 /**
26 * The default protocol handler this handles the most common
27 * protocols, such as 'file' 'http' 'ftp'.
28 * The parsing should be general enought to support most
29 * 'normal' URL formats, so in many cases
30 *
31 * @author <a href="mailto:deweese@apache.org">Thomas DeWeese</a>
32 * @version $Id: ParsedURLDefaultProtocolHandler.java 1733416 2016-03-03 07:07:13Z gadams $
33 */
34 public class ParsedURLDefaultProtocolHandler
35 extends AbstractParsedURLProtocolHandler {
36
37 /**
38 * Default constructor sets no protocol so this becomes
39 * default handler.
40 */
41 public ParsedURLDefaultProtocolHandler() {
42 super(null);
43 }
44
45 /**
46 * Subclass constructor allows subclasses to provide protocol,
47 * to be handled.
48 */
49 protected ParsedURLDefaultProtocolHandler(String protocol) {
50 super(protocol);
51 }
52
53 /**
54 * Subclasses can override these method to construct alternate
55 * subclasses of ParsedURLData.
56 */
57 protected ParsedURLData constructParsedURLData() {
58 return new ParsedURLData();
59 }
60
61 /**
62 * Subclasses can override these method to construct alternate
63 * subclasses of ParsedURLData.
64 * @param url the java.net.URL class we reference.
65 */
66 protected ParsedURLData constructParsedURLData(URL url) {
67 return new ParsedURLData(url);
68 }
69
70 /**
71 * Parses the string and returns the results of parsing in the
72 * ParsedURLData object.
73 * @param urlStr the string to parse as a URL.
74 */
75 public ParsedURLData parseURL(String urlStr) {
76 try {
77 URL url = new URL(urlStr);
78 // System.err.println("System Parse: " + urlStr);
79 return constructParsedURLData(url);
80 } catch (MalformedURLException mue) {
81 // Built in URL wouldn't take it...
82 // mue.printStackTrace();
83 }
84
85 // new Exception("Custom Parse: " + urlStr).printStackTrace();
86 // System.err.println("Custom Parse: " + urlStr);
87
88 ParsedURLData ret = constructParsedURLData();
89
90 if (urlStr == null) return ret;
91
92 int pidx=0, idx;
93 int len = urlStr.length();
94
95 // Pull fragment id off first...
96 idx = urlStr.indexOf('#');
97 ret.ref = null;
98 if (idx != -1) {
99 if (idx+1 < len)
100 ret.ref = urlStr.substring(idx+1);
101 urlStr = urlStr.substring(0,idx);
102 len = urlStr.length();
103 }
104
105 if (len == 0)
106 return ret;
107
108 // Protocol is only allowed to include -+.a-zA-Z
109 // So as soon as we hit something else we know we
110 // are done (if it is a ':' then we have protocol otherwise
111 // we don't.
112 idx = 0;
113 char ch = urlStr.charAt(idx);
114 while ((ch == '-') ||
115 (ch == '+') ||
116 (ch == '.') ||
117 ((ch >= 'a') && (ch <= 'z')) ||
118 ((ch >= 'A') && (ch <= 'Z'))) {
119 idx++;
120 if (idx == len) {
121 ch=0;
122 break;
123 }
124 ch = urlStr.charAt(idx);
125 }
126
127 if (ch == ':') {
128 // Has a protocol spec...
129 ret.protocol = urlStr.substring(pidx, idx).toLowerCase();
130 pidx = idx+1; // Skip ':'
131 }
132
133 // See if we have host/port spec.
134 idx = urlStr.indexOf('/');
135 if ((idx == -1) || ((pidx+2<len) &&
136 (urlStr.charAt(pidx) == '/') &&
137 (urlStr.charAt(pidx+1) == '/'))) {
138 // No slashes (apache.org) or a double slash
139 // (//apache.org/....) so
140 // we should have host[:port] before next slash.
141 if (idx != -1)
142 pidx+=2; // Skip double slash...
143
144 idx = urlStr.indexOf('/', pidx); // find end of host:Port spec
145 String hostPort;
146 if (idx == -1)
147 // Just host and port nothing following...
148 hostPort = urlStr.substring(pidx);
149 else
150 // Path spec follows...
151 hostPort = urlStr.substring(pidx, idx);
152
153 int hidx = idx; // Remember location of '/'
154
155 // pull apart host and port number...
156 idx = hostPort.indexOf(':');
157 ret.port = -1;
158 if (idx == -1) {
159 // Just Host...
160 if (hostPort.length() == 0)
161 ret.host = null;
162 else
163 ret.host = hostPort;
164 } else {
165 // Host and port
166 if (idx == 0) ret.host = null;
167 else ret.host = hostPort.substring(0,idx);
168
169 if (idx+1 < hostPort.length()) {
170 String portStr = hostPort.substring(idx+1);
171 try {
172 ret.port = Integer.parseInt(portStr);
173 } catch (NumberFormatException nfe) {
174 // bad port leave as '-1'
175 }
176 }
177 }
178 if (((ret.host == null) || (ret.host.indexOf('.') == -1)) &&
179 (ret.port == -1))
180 // no '.' in a host spec??? and no port, probably
181 // just a path.
182 ret.host = null;
183 else
184 pidx = hidx;
185 }
186
187 if ((pidx == -1) || (pidx >= len)) return ret; // Nothing follows
188
189 ret.path = urlStr.substring(pidx);
190 return ret;
191 }
192
193 public static String unescapeStr(String str) {
194 int idx = str.indexOf('%');
195 if (idx == -1) return str; // quick out..
196
197 int prev=0;
198 StringBuffer ret = new StringBuffer();
199 while (idx != -1) {
200 if (idx != prev)
201 ret.append(str.substring(prev, idx));
202
203 if (idx+2 >= str.length()) break;
204 prev = idx+3;
205 idx = str.indexOf('%', prev);
206
207 int ch1 = charToHex(str.charAt(idx+1));
208 int ch2 = charToHex(str.charAt(idx+1));
209 if ((ch1 == -1) || (ch2==-1)) continue;
210 ret.append((char)(ch1<<4 | ch2));
211 }
212
213 return ret.toString();
214 }
215
216 public static int charToHex(int ch) {
217 switch(ch) {
218 case '0': case '1': case '2': case '3': case '4':
219 case '5': case '6': case '7': case '8': case '9':
220 return ch-'0';
221 case 'a': case 'A': return 10;
222 case 'b': case 'B': return 11;
223 case 'c': case 'C': return 12;
224 case 'd': case 'D': return 13;
225 case 'e': case 'E': return 14;
226 case 'f': case 'F': return 15;
227 default: return -1;
228 }
229 }
230
231 /**
232 * Parses the string as a sub URL of baseURL, and returns the
233 * results of parsing in the ParsedURLData object.
234 * @param baseURL the base url for parsing.
235 * @param urlStr the string to parse as a URL.
236 */
237 public ParsedURLData parseURL(ParsedURL baseURL, String urlStr) {
238 // Reference to same document (including fragment, and query).
239 if (urlStr.length() == 0)
240 return baseURL.data;
241
242 // System.err.println("Base: " + baseURL + "\n" +
243 // "Sub: " + urlStr);
244
245 int idx = 0, len = urlStr.length();
246 if (len == 0) return baseURL.data;
247
248 // Protocol is only allowed to include -+.a-zA-Z
249 // So as soon as we hit something else we know we
250 // are done (if it is a ':' then we have protocol otherwise
251 // we don't.
252 char ch = urlStr.charAt(idx);
253 while ((ch == '-') ||
254 (ch == '+') ||
255 (ch == '.') ||
256 ((ch >= 'a') && (ch <= 'z')) ||
257 ((ch >= 'A') && (ch <= 'Z'))) {
258 idx++;
259 if (idx == len) {
260 ch=0;
261 break;
262 }
263 ch = urlStr.charAt(idx);
264 }
265 String protocol = null;
266 if (ch == ':') {
267 // Has a protocol spec...
268 protocol = urlStr.substring(0, idx).toLowerCase();
269 }
270
271 if (protocol != null) {
272 // Temporary if we have a protocol then assume absolute
273 // URL. Technically this is the correct handling but much
274 // software supports relative URLs with a protocol that
275 // matches the base URL's protocol.
276 // if (true)
277 // return parseURL(urlStr);
278 if (!protocol.equals(baseURL.getProtocol()))
279 // Different protocols, assume absolute URL ignore base...
280 return parseURL(urlStr);
281
282 // Same protocols, if char after ':' is a '/' then it's
283 // still absolute...
284 idx++;
285 if (idx == urlStr.length())
286 // Just a Protocol???
287 return parseURL(urlStr);
288
289 if (urlStr.charAt(idx) == '/')
290 // Absolute URL...
291 return parseURL(urlStr);
292
293 // Still relative just drop the protocol (we will pick it
294 // back up from the baseURL later...).
295 urlStr = urlStr.substring(idx);
296 }
297
298 if (urlStr.startsWith("/")) {
299 if ((urlStr.length() > 1) &&
300 (urlStr.charAt(1) == '/')) {
301 // Relative but only uses protocol from base
302 return parseURL(baseURL.getProtocol() + ":" + urlStr);
303 }
304 // Relative 'absolute' path, uses protocol and authority
305 // (host) from base
306 return parseURL(baseURL.getPortStr() + urlStr);
307 }
308
309 if (urlStr.startsWith("#")) {
310 String base = baseURL.getPortStr();
311 if (baseURL.getPath() != null) base += baseURL.getPath();
312 return parseURL(base + urlStr);
313 }
314
315 String path = baseURL.getPath();
316 // No path? well we will treat this as being relative to it's self.
317 if (path == null) path = "";
318 idx = path.lastIndexOf('/');
319 if (idx == -1) {
320 // baseURL is just a filename (in current dir) so use current dir
321 // as base of new URL.
322 path = "";
323 } else {
324 path = path.substring(0,idx+1);
325 if (urlStr.startsWith(path)) {
326 urlStr = urlStr.substring(path.length());
327 }
328 }
329
330 // System.err.println("Base Path: " + path);
331 // System.err.println("Base PortStr: " + baseURL.getPortStr());
332 return parseURL(baseURL.getPortStr() + path + urlStr);
333 }
334 }
335
336