Clover

Clover Coverage Report

Coverage timestamp: Sun Mar 23 2008 08:24:39 GMT

FRAMES NO FRAMES SHOW HELP

Statistics for file ParsedURL.java:
Stmts:	104	LOC:	458	Total cmp:	37	Stmts/Method:	6.5
Branches:	68	NCLOC:	197	Cmp density:	0.5	Methods/Class:	16
Methods:	16			Avg method cmp:	3.25
Classes:	1
Filtered

Expand All

ParsedURL

Line # 100

Total Statements 104

Complexity 37

TOTAL Coverage 95.2%

0.95212764

No Tests

Collapse All

* Redistribution and use in source and binary forms, with or without

* modification, are permitted provided that the following conditions are met:

* Redistributions of source code must retain the above copyright notice, this

* list of conditions and the following disclaimer.

* Redistributions in binary form must reproduce the above copyright notice,

* this list of conditions and the following disclaimer in the documentation

* and/or other materials provided with the distribution.

* Neither the name of the University of Salford nor the names of its

* contributors may be used to endorse or promote products derived from this

* software without specific prior written permission.

* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE

* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

* POSSIBILITY OF SUCH DAMAGE.

* Redistribution and use in source and binary forms, with or without

* modification, are permitted provided that the following conditions are met:

* Redistributions of source code must retain the above copyright notice, this

* list of conditions and the following disclaimer.

* Redistributions in binary form must reproduce the above copyright notice,

* this list of conditions and the following disclaimer in the documentation

* and/or other materials provided with the distribution.

* 1. Neither the name of the University of Kent nor the names of its

* contributors may be used to endorse or promote products derived from this

* software without specific prior written permission.

* 2. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS

* IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,

* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR

* PURPOSE ARE DISCLAIMED.

* 3. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE

* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

* POSSIBILITY OF SUCH DAMAGE.

* 4. YOU AGREE THAT THE EXCLUSIONS IN PARAGRAPHS 2 AND 3 ABOVE ARE REASONABLE

* IN THE CIRCUMSTANCES. IN PARTICULAR, YOU ACKNOWLEDGE (1) THAT THIS

* SOFTWARE HAS BEEN MADE AVAILABLE TO YOU FREE OF CHARGE, (2) THAT THIS

* SOFTWARE IS NOT "PRODUCT" QUALITY, BUT HAS BEEN PRODUCED BY A RESEARCH

* GROUP WHO DESIRE TO MAKE THIS SOFTWARE FREELY AVAILABLE TO PEOPLE WHO WISH

* TO USE IT, AND (3) THAT BECAUSE THIS SOFTWARE IS NOT OF "PRODUCT" QUALITY

* IT IS INEVITABLE THAT THERE WILL BE BUGS AND ERRORS, AND POSSIBLY MORE

* SERIOUS FAULTS, IN THIS SOFTWARE.

* 5. This license is governed, except to the extent that local laws

* necessarily apply, by the laws of England and Wales.

package issrg.utils;

import java.util.Vector;

/**

* This class provides methods for splitting a URL into an array of strings.

* There is also a method for checking that particular strings conform to

* the standard - i.e. contain only valid characters.

* It handles only HTTP-like URLs:

* <code>protocol : // [[username [: password] @] host [:port]] [/ [ path ] [# anchor] [? query]]</code>

* Note that host is also an optional part of the URL, so file: URLs are

* also acceptable (but

* getHost() will return null).

* There are corresponding methods to get these values; they can be null, if

* the value is missing (only path is not null, and has 0 elements, if missing;

* paths ending with '/' have an empty String "" at the end of the array).

* @author A.Otenko

100

public class ParsedURL {

101

private String url;

102

private String protocol;

103

private String userName;

104

private String password;

105

private String host;

106

private String port;

107

private String [] path;

108

private String anchor;

109

private String query;

110

111

String [] normalisedPath;

112

String normalisedURL;

113

String pathString;

114

115

protected ParsedURL(){}

116

117

/**

118

* This constructor builds a ParsedURL given the original URL and parts of it.

119

* Any part can be null, except the path.

120

121

* @param url - the original URL from which the parts were obtained

122

* @param protocol - the protocol extracted from the original URL

123

* @param userName - the user name as it appears in the URL

124

* @param password - the password as it appears in the URL

125

* @param host - the host name as it appears in the URL

126

* @param port - the port specification String; may be an integer, but

127

* sometimes it is more than that (e.g. a range of ports)

128

* @param path - the array of path elements; cannot be null, but can be empty

129

* @param anchor - the anchor String (everything after "#" and before the

130

* query String)

131

* @param query - the query String (everything after "?")

132

133

127

protected ParsedURL(String url, String protocol, String userName, String password,

134

String host, String port, String [] path, String anchor, String query){

135

127

this.url=url;

136

127

this.protocol=protocol;

137

127

this.userName=userName;

138

127

this.password=password;

139

127

this.host=host;

140

127

this.port=port;

141

127

this.path=path;

142

127

this.anchor=anchor;

143

127

this.query=query;

144

145

127

java.util.Vector v=new java.util.Vector();

146

304

for (int i=0; i<path.length; i++){

147

177

String pathElement = path[i].intern();

148

177

if (pathElement=="." && i<path.length-1) continue; // skip lonely ".", don't skip the trailing "."

149

169

if (pathElement=="..") { // remove the last element only if there are any more elements left

150

if (v.size()>0) v.remove(v.size()-1); // remove the last path element

151

continue;

152

}

153

// we are here only if path element is not a "." or ".."

154

155

166

v.add(pathElement);

156

}

157

158

// now v has the normalised path

159

127

normalisedPath = (String [])v.toArray(new String[0]);

160

127

StringBuffer sb = new StringBuffer();

161

291

for (int i=0; i<normalisedPath.length; i++){

162

164

sb.append("/"+normalisedPath[i]);

163

}

164

127

pathString=sb.toString();

165

166

127

normalisedURL = getProtocol()+"://"+

167

127

(getHost()==null?

168

"":

169

117

((getUserName()==null? // should I check if Host is specified first?

170

"":

171

(getUserName()+

172

(getPassword()==null?

173

"":

174

(":"+getPassword())

175

)+"@"

176

)

177

178

(getHost()+

179

117

(getPort()==null?

180

"":

181

(":"+getPort())

182

)

183

)

184

)

185

186

pathString+ // normalised Path is here

187

127

(getAnchor()==null? "" : ("#"+getAnchor()))+

188

127

(getQuery()==null? "" : ("?"+getQuery()))

189

;

190

}

191

192

/**

193

* @return the protocol of the URL as it has been provided to the constructor

194

195

1541

public String getProtocol(){

196

1541

return protocol;

197

}

198

199

/**

200

* @return the original URL as it has been provided to the constructor

201

202

public String getURL(){

203

return url;

204

}

205

206

/**

207

* @return the user name as it has been provided to the constructor

208

209

1924

public String getUserName(){

210

1924

return userName;

211

}

212

213

/**

214

* @return the password as it has been provided to the constructor

215

216

1809

public String getPassword(){

217

1809

return password;

218

}

219

220

/**

221

* @return the host name as it has been provided to the constructor

222

223

2743

public String getHost(){

224

2743

return host;

225

}

226

227

/**

228

* @return the port string as it has been provided to the constructor

229

230

242

public String getPort(){

231

242

return port;

232

}

233

234

/**

235

* This method returns normalised path (excessive "." and ".." are removed)

236

237

* @return array of strings, representing the path; no "." or ".." are there,

238

* only the last element may be a "." if the URL ends with a "/" and means

239

* that the previous name in the path is a directory

240

241

916

public String [] getPath(){

242

916

return normalisedPath;

243

}

244

245

/**

246

* This method returns the path as it is in the URL ("." and ".." are

247

* possible).

248

249

* @return array of strings, representing the path; if no excessive "." or ".."

250

* were used, it is the same as getPath()

251

252

public String [] getOriginalPath(){

253

return path;

254

}

255

256

/**

257

* This method returns the normalised path as a String.

258

259

447

public String getPathString(){

260

447

return pathString;

261

}

262

263

/**

264

* @return the anchor string as it has been provided to the constructor

265

266

139

public String getAnchor(){

267

139

return anchor;

268

}

269

270

/**

271

* @return the query string as it has been provided to the constructor

272

273

147

public String getQuery(){

274

147

return query;

275

}

276

277

/**

278

* This method returns a normalised URL (i.e. the path is without '.' and

279

* '..' elements, etc.)

280

281

554

public String getNormalizedURL(){

282

554

return normalisedURL;

283

}

284

285

/**

286

* This method parses a URL string, and returns a ParsedURL object, if

287

* succeded.

288

* It returns null, if URL is not valid.

289

290

* Valid URLs correspond to the following syntax:

291

* <code>[url:]protocol : // [username [: password]@] host [: port] [/ [path] [# [anchor]]] [? [query]]</code>

292

293

* @param url is a string encoding of the URL; no character transformation is

294

* done, e.g. %20 remains itself, and is not substituted by a space

295

296

* @return ParsedURL object, if parse succeeded, or null, if parse failed.

297

298

132

public static ParsedURL parseURL(String url){

299

132

String protocol=null, userName=null, password=null, host=null, port=null,

300

path[]=new String[0], anchor=null, query=null;

301

302

132

String u=url;

303

304

// u may be null; it's ok, r will be an array of two null strings in a moment

305

306

132

String [] r=split(u, "://"); // find url:protocol and the rest of the URL

307

132

if (r[0]==null){ // not a URL

308

url=null;

309

} else {

310

132

u=r[1];

311

132

r=split(r[0], ":"); // r[0] will become "url" or protocol or null (invalid URL)

312

// r[1] will become protocol or null

313

132

if (r[1]==null){

314

130

protocol=r[0];

315

} else {

316

protocol=r[1];

317

if (r[0]==null || r[0].compareToIgnoreCase("url")!=0){

318

url=null;

319

}

320

}

321

}

322

// ok, now we've got the protocol, and perhaps url is not null (if everything was ok)

323

// the rest of the URL is in u

324

325

132

if (u!=null && url!=null){

326

122

int j=u.indexOf('#');

327

122

int k=u.indexOf('?');

328

329

122

if (k<0 || (j>=0 && j<k)) k=j; // if there was no '?', or '#' anchor exists and is before '?' query, then anchor '#' defines the length of the meaningful URL

330

331

122

if (k>0){ // aha, there was either anchor '#' or query '?' (or both)

332

String tail = u.substring(k); // k identifies the position of '#' or '?', whichever is earlier

333

334

r=split(tail, "?"); // now r[0] is anchor, if any, or null, if no anchor

335

// r[1] is query, if any, or null, if no query

336

337

if (r[0]!=null && !r[0].equals("#")){ // if anchor is present, it always starts with '#'; but if it is equal to '#', then there is no anchor

338

anchor=r[0].substring(1); // skip '#'

339

}

340

341

query=r[1];

342

343

u=u.substring(0, k);

344

}

345

// ok, now if there were any query or anchor, they were extracted

346

347

// now u has the user:psw@host:port/path bit

348

349

122

r=split(u, "/"); // now r[0] is host definition, or null (no host in the URL - must be a file: URL)

350

// r[1] is path definition, or null (no path)

351

// if r[1]==null, it may still have had '/' at the end - a path of one level, with empty filename

352

// should check that there should always be some path (at least singular '/' at the end) if anchor is not null

353

354

122

String p=r[1];

355

122

if (p==null){

356

if (u.endsWith("/")){ // oh, yes, there was this single trailing '/'

357

p="";

358

}

359

}

360

361

122

if (r[0]!=null){ // r[0]!=null - host is present; r[0]==null => u starts with '/' (file: URL)

362

// parse r[0], as if it were a host definition

363

364

121

r=split(r[0], "@"); // now r[0] is username:password, or hostname:port, or null (invalid URL: '@' was present, but nothing in front of it)

365

// r[1] is hostname:port, or null

366

367

121

if (r[1]==null){

368

// r[0] == hostname:port

369

116

u=r[0]; // could be null, but this will generate error when splitting hostname and port

370

} else {

371

// r[1] == hostname:port, r[0] == username:password

372

u=r[1];

373

374

if (r[0]==null || r[0].endsWith(":")){ // username:password cannot end with ":", and cannot be null ('@' is present)

375

url=null;

376

} else {

377

r=split(r[0], ":"); // now r[0] is username, or null (invalid URL)

378

// r[1] is password, or null (check has been done if the username:password ends with ':' - 'username:' is an invalid combination)

379

380

userName=r[0];

381

password=r[1];

382

383

if (userName==null){ // username:password starts with ":" - bad URL

384

url=null;

385

}

386

}

387

}

388

// ok, now username and password have been extracted;

389

// u is hostname:port

390

391

121

r=split(u, ":"); // now r[0] is host, or null (invalid URL)

392

// r[1] is port, or null

393

// if r[1]==null, and u ends with ":" - invalid URL

394

395

121

if (r[0]==null || (r[1]==null && u.endsWith(":"))) {

396

url=null;

397

} else {

398

120

host=r[0];

399

120

port=r[1]; // note that no checks if port is a number: here we only split the string into components

400

}

401

}

402

// ok, now parsed username:password@host:port

403

404

// now check if p==path

405

122

if (p!=null){

406

// there is no initial '/' in p

407

111

p+="/"; // this is an artificial improvement to allow a simple loop deal with the path components: every path component ends with a '/'

408

// this will produce a final iteration that would result

409

// in split(p, "/") == {lastPathComponent, null}

410

411

111

Vector pathElements = new Vector();

412

413

291

while (p!=null) { // at least one iteration should work

414

180

r=split(p, "/");

415

180

if (r[0]==null) r[0]="."; // two subsequent slashes "//" are treated as "/./"

416

417

180

pathElements.add(r[0]);

418

180

p=r[1];

419

}

420

// last slash has been removed

421

422

111

path = (String [])pathElements.toArray(new String[0]);

423

}

424

}

425

426

132

if (url==null || (path.length==0 && anchor!=null)) return null; // not a valid URL

427

428

127

return new ParsedURL(url, protocol, userName, password, host, port, path, anchor, query);

429

}

430

431

/**

432

* This is a utility method that splits the string into two substrings, having

433

* found a c string

434

* in it. The first substring is the string before the first occurence of c,

435

* the second substring is the string after the first occurence of c.

436

437

* If the substrings are empty, they will be null.

438

439

* E.g. split("@", "@") == {null, null}

440

* split("://path", "://") == {null, "path"}

441

* split("http://host", "://") == {"http", "host"}

442

443

819

private static String [] split(String u, String c){

444

819

String [] r = new String[2];

445

446

819

if (u!=null){

447

819

int j=u.indexOf(c);

448

819

if (j<0){

449

383

r[0]=u.length()==0? null: u;

450

} else {

451

436

r[0]=j==0?null:u.substring(0, j);

452

436

r[1]=(j+c.length())<u.length()? u.substring(j+c.length()) : null;

453

}

454

}

455

456

819

return r;

457

}

458

}