Clover Coverage Report
Coverage timestamp: Sun Mar 23 2008 08:24:39 GMT
104   458   37   6.5
68   197   0.5   16
16     3.25  
1    
 
 
  ParsedURL       Line # 100 104 37 95.2% 0.95212764
 
No Tests
 
1    /*
2    * Copyright (c) 2000-2005, University of Salford
3    * All rights reserved.
4    *
5    * Redistribution and use in source and binary forms, with or without
6    * modification, are permitted provided that the following conditions are met:
7    *
8    * Redistributions of source code must retain the above copyright notice, this
9    * list of conditions and the following disclaimer.
10    *
11    * Redistributions in binary form must reproduce the above copyright notice,
12    * this list of conditions and the following disclaimer in the documentation
13    * and/or other materials provided with the distribution.
14    *
15    * Neither the name of the University of Salford nor the names of its
16    * contributors may be used to endorse or promote products derived from this
17    * software without specific prior written permission.
18    *
19    * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20    * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21    * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22    * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23    * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24    * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25    * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26    * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27    * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28    * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29    * POSSIBILITY OF SUCH DAMAGE.
30    */
31    /*
32    * Copyright (c) 2006, University of Kent
33    * All rights reserved.
34    *
35    * Redistribution and use in source and binary forms, with or without
36    * modification, are permitted provided that the following conditions are met:
37    *
38    * Redistributions of source code must retain the above copyright notice, this
39    * list of conditions and the following disclaimer.
40    *
41    * Redistributions in binary form must reproduce the above copyright notice,
42    * this list of conditions and the following disclaimer in the documentation
43    * and/or other materials provided with the distribution.
44    *
45    * 1. Neither the name of the University of Kent nor the names of its
46    * contributors may be used to endorse or promote products derived from this
47    * software without specific prior written permission.
48    *
49    * 2. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
50    * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
51    * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
52    * PURPOSE ARE DISCLAIMED.
53    *
54    * 3. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
55    * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
56    * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
57    * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
58    * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
59    * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
60    * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
61    * POSSIBILITY OF SUCH DAMAGE.
62    *
63    * 4. YOU AGREE THAT THE EXCLUSIONS IN PARAGRAPHS 2 AND 3 ABOVE ARE REASONABLE
64    * IN THE CIRCUMSTANCES. IN PARTICULAR, YOU ACKNOWLEDGE (1) THAT THIS
65    * SOFTWARE HAS BEEN MADE AVAILABLE TO YOU FREE OF CHARGE, (2) THAT THIS
66    * SOFTWARE IS NOT "PRODUCT" QUALITY, BUT HAS BEEN PRODUCED BY A RESEARCH
67    * GROUP WHO DESIRE TO MAKE THIS SOFTWARE FREELY AVAILABLE TO PEOPLE WHO WISH
68    * TO USE IT, AND (3) THAT BECAUSE THIS SOFTWARE IS NOT OF "PRODUCT" QUALITY
69    * IT IS INEVITABLE THAT THERE WILL BE BUGS AND ERRORS, AND POSSIBLY MORE
70    * SERIOUS FAULTS, IN THIS SOFTWARE.
71    *
72    * 5. This license is governed, except to the extent that local laws
73    * necessarily apply, by the laws of England and Wales.
74    */
75   
76   
77    package issrg.utils;
78   
79    import java.util.Vector;
80   
81    /**
82    * This class provides methods for splitting a URL into an array of strings.
83    * There is also a method for checking that particular strings conform to
84    * the standard - i.e. contain only valid characters.
85    *
86    * <p>It handles only HTTP-like URLs:
87    *
88    * <p><code>protocol : // [[username [: password] @] host [:port]] [/ [ path ] [# anchor] [? query]]</code>
89    *
90    * <p>Note that host is also an optional part of the URL, so file: URLs are
91    * also acceptable (but
92    * getHost() will return null).
93    *
94    * <p>There are corresponding methods to get these values; they can be null, if
95    * the value is missing (only path is not null, and has 0 elements, if missing;
96    * paths ending with '/' have an empty String "" at the end of the array).
97    *
98    * @author A.Otenko
99    */
 
100    public class ParsedURL {
101    private String url;
102    private String protocol;
103    private String userName;
104    private String password;
105    private String host;
106    private String port;
107    private String [] path;
108    private String anchor;
109    private String query;
110   
111    String [] normalisedPath;
112    String normalisedURL;
113    String pathString;
114   
 
115  0 toggle protected ParsedURL(){}
116   
117    /**
118    * This constructor builds a ParsedURL given the original URL and parts of it.
119    * Any part can be null, except the path.
120    *
121    * @param url - the original URL from which the parts were obtained
122    * @param protocol - the protocol extracted from the original URL
123    * @param userName - the user name as it appears in the URL
124    * @param password - the password as it appears in the URL
125    * @param host - the host name as it appears in the URL
126    * @param port - the port specification String; may be an integer, but
127    * sometimes it is more than that (e.g. a range of ports)
128    * @param path - the array of path elements; cannot be null, but can be empty
129    * @param anchor - the anchor String (everything after "#" and before the
130    * query String)
131    * @param query - the query String (everything after "?")
132    */
 
133  127 toggle protected ParsedURL(String url, String protocol, String userName, String password,
134    String host, String port, String [] path, String anchor, String query){
135  127 this.url=url;
136  127 this.protocol=protocol;
137  127 this.userName=userName;
138  127 this.password=password;
139  127 this.host=host;
140  127 this.port=port;
141  127 this.path=path;
142  127 this.anchor=anchor;
143  127 this.query=query;
144   
145  127 java.util.Vector v=new java.util.Vector();
146  304 for (int i=0; i<path.length; i++){
147  177 String pathElement = path[i].intern();
148  177 if (pathElement=="." && i<path.length-1) continue; // skip lonely ".", don't skip the trailing "."
149  169 if (pathElement=="..") { // remove the last element only if there are any more elements left
150  3 if (v.size()>0) v.remove(v.size()-1); // remove the last path element
151  3 continue;
152    }
153    // we are here only if path element is not a "." or ".."
154   
155  166 v.add(pathElement);
156    }
157   
158    // now v has the normalised path
159  127 normalisedPath = (String [])v.toArray(new String[0]);
160  127 StringBuffer sb = new StringBuffer();
161  291 for (int i=0; i<normalisedPath.length; i++){
162  164 sb.append("/"+normalisedPath[i]);
163    }
164  127 pathString=sb.toString();
165   
166  127 normalisedURL = getProtocol()+"://"+
167  127 (getHost()==null?
168    "":
169  117 ((getUserName()==null? // should I check if Host is specified first?
170    "":
171    (getUserName()+
172  3 (getPassword()==null?
173    "":
174    (":"+getPassword())
175    )+"@"
176    )
177    )+
178    (getHost()+
179  117 (getPort()==null?
180    "":
181    (":"+getPort())
182    )
183    )
184    )
185    )+
186    pathString+ // normalised Path is here
187  127 (getAnchor()==null? "" : ("#"+getAnchor()))+
188  127 (getQuery()==null? "" : ("?"+getQuery()))
189    ;
190    }
191   
192    /**
193    * @return the protocol of the URL as it has been provided to the constructor
194    */
 
195  1541 toggle public String getProtocol(){
196  1541 return protocol;
197    }
198   
199    /**
200    * @return the original URL as it has been provided to the constructor
201    */
 
202  0 toggle public String getURL(){
203  0 return url;
204    }
205   
206    /**
207    * @return the user name as it has been provided to the constructor
208    */
 
209  1924 toggle public String getUserName(){
210  1924 return userName;
211    }
212   
213    /**
214    * @return the password as it has been provided to the constructor
215    */
 
216  1809 toggle public String getPassword(){
217  1809 return password;
218    }
219   
220    /**
221    * @return the host name as it has been provided to the constructor
222    */
 
223  2743 toggle public String getHost(){
224  2743 return host;
225    }
226   
227    /**
228    * @return the port string as it has been provided to the constructor
229    */
 
230  242 toggle public String getPort(){
231  242 return port;
232    }
233   
234    /**
235    * This method returns normalised path (excessive "." and ".." are removed)
236    *
237    * @return array of strings, representing the path; no "." or ".." are there,
238    * only the last element may be a "." if the URL ends with a "/" and means
239    * that the previous name in the path is a directory
240    */
 
241  916 toggle public String [] getPath(){
242  916 return normalisedPath;
243    }
244   
245    /**
246    * This method returns the path as it is in the URL ("." and ".." are
247    * possible).
248    *
249    * @return array of strings, representing the path; if no excessive "." or ".."
250    * were used, it is the same as getPath()
251    */
 
252  0 toggle public String [] getOriginalPath(){
253  0 return path;
254    }
255   
256    /**
257    * This method returns the normalised path as a String.
258    */
 
259  447 toggle public String getPathString(){
260  447 return pathString;
261    }
262   
263    /**
264    * @return the anchor string as it has been provided to the constructor
265    */
 
266  139 toggle public String getAnchor(){
267  139 return anchor;
268    }
269   
270    /**
271    * @return the query string as it has been provided to the constructor
272    */
 
273  147 toggle public String getQuery(){
274  147 return query;
275    }
276   
277    /**
278    * This method returns a normalised URL (i.e.&nbsp;the path is without '.' and
279    * '..' elements, etc.)
280    */
 
281  554 toggle public String getNormalizedURL(){
282  554 return normalisedURL;
283    }
284   
285    /**
286    * This method parses a URL string, and returns a ParsedURL object, if
287    * succeded.
288    * It returns null, if URL is not valid.
289    *
290    * <p>Valid URLs correspond to the following syntax:
291    * <p><code>[url:]protocol : // [username [: password]@] host [: port] [/ [path] [# [anchor]]] [? [query]]</code>
292    *
293    * @param url is a string encoding of the URL; no character transformation is
294    * done, e.g. %20 remains itself, and is not substituted by a space
295    *
296    * @return ParsedURL object, if parse succeeded, or null, if parse failed.
297    */
 
298  132 toggle public static ParsedURL parseURL(String url){
299  132 String protocol=null, userName=null, password=null, host=null, port=null,
300    path[]=new String[0], anchor=null, query=null;
301   
302  132 String u=url;
303   
304    // u may be null; it's ok, r will be an array of two null strings in a moment
305   
306  132 String [] r=split(u, "://"); // find url:protocol and the rest of the URL
307  132 if (r[0]==null){ // not a URL
308  0 url=null;
309    } else {
310  132 u=r[1];
311  132 r=split(r[0], ":"); // r[0] will become "url" or protocol or null (invalid URL)
312    // r[1] will become protocol or null
313  132 if (r[1]==null){
314  130 protocol=r[0];
315    } else {
316  2 protocol=r[1];
317  2 if (r[0]==null || r[0].compareToIgnoreCase("url")!=0){
318  1 url=null;
319    }
320    }
321    }
322    // ok, now we've got the protocol, and perhaps url is not null (if everything was ok)
323    // the rest of the URL is in u
324   
325  132 if (u!=null && url!=null){
326  122 int j=u.indexOf('#');
327  122 int k=u.indexOf('?');
328   
329  122 if (k<0 || (j>=0 && j<k)) k=j; // if there was no '?', or '#' anchor exists and is before '?' query, then anchor '#' defines the length of the meaningful URL
330   
331  122 if (k>0){ // aha, there was either anchor '#' or query '?' (or both)
332  7 String tail = u.substring(k); // k identifies the position of '#' or '?', whichever is earlier
333   
334  7 r=split(tail, "?"); // now r[0] is anchor, if any, or null, if no anchor
335    // r[1] is query, if any, or null, if no query
336   
337  7 if (r[0]!=null && !r[0].equals("#")){ // if anchor is present, it always starts with '#'; but if it is equal to '#', then there is no anchor
338  3 anchor=r[0].substring(1); // skip '#'
339    }
340   
341  7 query=r[1];
342   
343  7 u=u.substring(0, k);
344    }
345    // ok, now if there were any query or anchor, they were extracted
346   
347    // now u has the user:psw@host:port/path bit
348   
349  122 r=split(u, "/"); // now r[0] is host definition, or null (no host in the URL - must be a file: URL)
350    // r[1] is path definition, or null (no path)
351    // if r[1]==null, it may still have had '/' at the end - a path of one level, with empty filename
352    // should check that there should always be some path (at least singular '/' at the end) if anchor is not null
353   
354  122 String p=r[1];
355  122 if (p==null){
356  22 if (u.endsWith("/")){ // oh, yes, there was this single trailing '/'
357  11 p="";
358    }
359    }
360   
361  122 if (r[0]!=null){ // r[0]!=null - host is present; r[0]==null => u starts with '/' (file: URL)
362    // parse r[0], as if it were a host definition
363   
364  121 r=split(r[0], "@"); // now r[0] is username:password, or hostname:port, or null (invalid URL: '@' was present, but nothing in front of it)
365    // r[1] is hostname:port, or null
366   
367  121 if (r[1]==null){
368    // r[0] == hostname:port
369  116 u=r[0]; // could be null, but this will generate error when splitting hostname and port
370    } else {
371    // r[1] == hostname:port, r[0] == username:password
372  5 u=r[1];
373   
374  5 if (r[0]==null || r[0].endsWith(":")){ // username:password cannot end with ":", and cannot be null ('@' is present)
375  1 url=null;
376    } else {
377  4 r=split(r[0], ":"); // now r[0] is username, or null (invalid URL)
378    // r[1] is password, or null (check has been done if the username:password ends with ':' - 'username:' is an invalid combination)
379   
380  4 userName=r[0];
381  4 password=r[1];
382   
383  4 if (userName==null){ // username:password starts with ":" - bad URL
384  1 url=null;
385    }
386    }
387    }
388    // ok, now username and password have been extracted;
389    // u is hostname:port
390   
391  121 r=split(u, ":"); // now r[0] is host, or null (invalid URL)
392    // r[1] is port, or null
393    // if r[1]==null, and u ends with ":" - invalid URL
394   
395  121 if (r[0]==null || (r[1]==null && u.endsWith(":"))) {
396  1 url=null;
397    } else {
398  120 host=r[0];
399  120 port=r[1]; // note that no checks if port is a number: here we only split the string into components
400    }
401    }
402    // ok, now parsed username:password@host:port
403   
404    // now check if p==path
405  122 if (p!=null){
406    // there is no initial '/' in p
407  111 p+="/"; // this is an artificial improvement to allow a simple loop deal with the path components: every path component ends with a '/'
408    // this will produce a final iteration that would result
409    // in split(p, "/") == {lastPathComponent, null}
410   
411  111 Vector pathElements = new Vector();
412   
413  291 while (p!=null) { // at least one iteration should work
414  180 r=split(p, "/");
415  180 if (r[0]==null) r[0]="."; // two subsequent slashes "//" are treated as "/./"
416   
417  180 pathElements.add(r[0]);
418  180 p=r[1];
419    }
420    // last slash has been removed
421   
422  111 path = (String [])pathElements.toArray(new String[0]);
423    }
424    }
425   
426  132 if (url==null || (path.length==0 && anchor!=null)) return null; // not a valid URL
427   
428  127 return new ParsedURL(url, protocol, userName, password, host, port, path, anchor, query);
429    }
430   
431    /**
432    * This is a utility method that splits the string into two substrings, having
433    * found a c string
434    * in it. The first substring is the string before the first occurence of c,
435    * the second substring is the string after the first occurence of c.
436    *
437    * <p>If the substrings are empty, they will be null.
438    *
439    * <p>E.g. split("@", "@") == {null, null}
440    * <br>split("://path", "://") == {null, "path"}
441    * <br>split("http://host", "://") == {"http", "host"}
442    */
 
443  819 toggle private static String [] split(String u, String c){
444  819 String [] r = new String[2];
445   
446  819 if (u!=null){
447  819 int j=u.indexOf(c);
448  819 if (j<0){
449  383 r[0]=u.length()==0? null: u;
450    } else {
451  436 r[0]=j==0?null:u.substring(0, j);
452  436 r[1]=(j+c.length())<u.length()? u.substring(j+c.length()) : null;
453    }
454    }
455   
456  819 return r;
457    }
458    }