Clover Coverage Report
Coverage timestamp: Sun Mar 23 2008 08:24:39 GMT
278   1,140   97   8.42
146   470   0.46   33
33     3.91  
1    
 
 
  RFC2253NameParser       Line # 116 278 97 88% 0.8796499
 
  (1)
 
1    /*
2    * Copyright (c) 2000-2005, University of Salford
3    * All rights reserved.
4    *
5    * Redistribution and use in source and binary forms, with or without
6    * modification, are permitted provided that the following conditions are met:
7    *
8    * Redistributions of source code must retain the above copyright notice, this
9    * list of conditions and the following disclaimer.
10    *
11    * Redistributions in binary form must reproduce the above copyright notice,
12    * this list of conditions and the following disclaimer in the documentation
13    * and/or other materials provided with the distribution.
14    *
15    * Neither the name of the University of Salford nor the names of its
16    * contributors may be used to endorse or promote products derived from this
17    * software without specific prior written permission.
18    *
19    * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20    * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21    * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22    * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23    * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24    * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25    * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26    * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27    * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28    * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29    * POSSIBILITY OF SUCH DAMAGE.
30    */
31    /*
32    * Copyright (c) 2006, University of Kent
33    * All rights reserved.
34    *
35    * Redistribution and use in source and binary forms, with or without
36    * modification, are permitted provided that the following conditions are met:
37    *
38    * Redistributions of source code must retain the above copyright notice, this
39    * list of conditions and the following disclaimer.
40    *
41    * Redistributions in binary form must reproduce the above copyright notice,
42    * this list of conditions and the following disclaimer in the documentation
43    * and/or other materials provided with the distribution.
44    *
45    * 1. Neither the name of the University of Kent nor the names of its
46    * contributors may be used to endorse or promote products derived from this
47    * software without specific prior written permission.
48    *
49    * 2. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
50    * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
51    * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
52    * PURPOSE ARE DISCLAIMED.
53    *
54    * 3. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
55    * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
56    * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
57    * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
58    * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
59    * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
60    * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
61    * POSSIBILITY OF SUCH DAMAGE.
62    *
63    * 4. YOU AGREE THAT THE EXCLUSIONS IN PARAGRAPHS 2 AND 3 ABOVE ARE REASONABLE
64    * IN THE CIRCUMSTANCES. IN PARTICULAR, YOU ACKNOWLEDGE (1) THAT THIS
65    * SOFTWARE HAS BEEN MADE AVAILABLE TO YOU FREE OF CHARGE, (2) THAT THIS
66    * SOFTWARE IS NOT "PRODUCT" QUALITY, BUT HAS BEEN PRODUCED BY A RESEARCH
67    * GROUP WHO DESIRE TO MAKE THIS SOFTWARE FREELY AVAILABLE TO PEOPLE WHO WISH
68    * TO USE IT, AND (3) THAT BECAUSE THIS SOFTWARE IS NOT OF "PRODUCT" QUALITY
69    * IT IS INEVITABLE THAT THERE WILL BE BUGS AND ERRORS, AND POSSIBLY MORE
70    * SERIOUS FAULTS, IN THIS SOFTWARE.
71    *
72    * 5. This license is governed, except to the extent that local laws
73    * necessarily apply, by the laws of England and Wales.
74    */
75   
76    package issrg.utils;
77   
78    import java.util.Vector;
79    import java.text.StringCharacterIterator;
80    import java.text.CharacterIterator;
81   
82    /**
83    * This is an implementation of an RFC2253 LDAP DN parser. Many existing parsers
84    * are
85    * burdained with semantics interpretation, so the parsers look after the
86    * attribute
87    * type names and OIDs. This is not so useful in this developing world. Many
88    * people
89    * invent their own attributes, just for their own LDAP directory, and such
90    * parsers
91    * will obviously fail. However, it would be so simple to just return the DN
92    * divided
93    * into RDNs, each of them separated into AVAs with attribute value represented
94    * as an unescaped String or a binary value. Let the caller cope with the
95    * attribute
96    * type names and OIDs!
97    *
98    * <p>Note also, that due to a common misconception people often join RDNs,
99    * separating them with comma-space, instead of just comma, as is specified in
100    * the RFC.
101    *
102    * <p>Also I want to share some ambiguity I find in the said RFC (not the only
103    * ambiguity found, though!): it says to join the RDNs in reverse order, but
104    * it does not tell to <i>parse</i> them in the reverse order; which means that
105    * the DN
106    * will be reversed after each parse-compose operation.
107    *
108    * <p>This parser supports OSF-syntax DNs as well, but not all of such DNs can
109    * be represented in RFC2253 form (a valid OSF DN "/C=gb/etc" cannot be
110    * converted
111    * into RFC2253 DN, because the last component doesn't have an attribute type).
112    *
113    * @author A.Otenko
114    */
115   
 
116    public class RFC2253NameParser {
117    /**
118    * These are some stupid definitions, just to make sure we are using
119    * the specified characters by their ASCII code.
120    */
121    private final static char A_CHAR = 65;
122    private final static char a_CHAR = 97;
123    private final static char F_CHAR = 70;
124    private final static char f_CHAR = 95;
125   
126    private final static char X_CHAR = 88;
127    private final static char x_CHAR = 120;
128   
129    private final static char Z_CHAR = 90;
130    private final static char z_CHAR = 122;
131   
132    private final static char NINE_CHAR = 57;
133    private final static char ZERO_CHAR = 48;
134   
135    private final static char LT_CHAR = '<'; // the RFC does not seem to specify the codes for these. of course i can look it up, but i don't see any particular reason for that.
136    private final static char GT_CHAR = '>';
137   
138    public final static char ASSIGN_CHAR = 61; // you may want to use these
139    public final static char COMMA_CHAR = 44; // when constructing the DN
140    private final static char HYPHEN_CHAR = 45;
141    public final static char PLUS_CHAR = 43; // out of String [][][]
142    private final static char SEMICOLON_CHAR = 59;
143   
144    private final static char APOSTROPHE_CHAR = 39; // ', used in OSF to quote chars
145    private final static char BSLASH_CHAR = 92;
146    private final static char HASH_CHAR = 35;
147    private final static char SLASH_CHAR = 47; // just ordinary Slash '/'
148    private final static char SPACE_CHAR = 32;
149    private final static char QUOTE_CHAR = 34;
150   
151    private final static char DOT_CHAR = 46;
152    private final static char O_CHAR = 79;
153    private final static char I_CHAR = 73;
154    private final static char D_CHAR = 68;
155    private final static char o_CHAR = 111;
156    private final static char i_CHAR = 105;
157    private final static char d_CHAR = 100;
158   
159    /**
160    * The starting non-terminal, distinguishedName.
161    * <p><code>distinguishedName = [name]</code>
162    *
163    * @param Name a string to be parsed into a Disitnguished Name
164    * @return returns an array of arrays of AVA, each of them being an
165    * array
166    * of two strings, the first being the name or the OID of the
167    * attribute, as specified in the DN, and the second being the
168    * value of the attribute; see also {@link #toCanonicalDN(String[][][]) toCanonicalName} method.
169    *
170    * Note also, that the method simply returns an array of arrays of
171    * AttributeValueAssertions. The latter is an array of two strings:
172    * attribute type and attribute value; not to mess with extra classes.
173    *
174    * @throws RFC2253ParsingException, which will always contain a nested
175    * exception. The exception contains basic information: at what
176    * position the parsing error occured, and there is not much
177    * use in printing its stack. The details of the fault are
178    * contained in the nested exception, and the actual error point
179    * in the code as well, if you want to print the stack trace.
180    * The code does not throw any other exceptions, even run-time
181    * ones, except for IllegalArgumentException, which may occur in
182    * case
183    * a null string is passed as a Name parameter.
184    */
 
185  12854 toggle public static String [][][] distinguishedName(String Name) throws RFC2253ParsingException{
186  12854 if (Name==null){
187  0 throw new IllegalArgumentException("Name parameter cannot be null");
188    }
189   
190  12854 Name=Name.trim();
191   
192  12854 if (Name.intern()==""){ // an empty DN
193  46 return new String[0][][]; // certainly, a more general way would be to try{ dn=name(ci); }catch(){} and check if it reached the end of string
194    }
195   
196    /* interesting, whether the RFC would allow a sequence of
197    * spaces to be a DN. there is no explicit statements, so I
198    * disallow that. note, that in fact there are some statements
199    * about spaces preceding the attribute names and values. but certainly
200    * the spaces alone do not precede _any_ attribute name, do they?..
201    */
202  12808 CharacterIterator ci = new StringCharacterIterator(Name);
203   
204  12808 if (Name.startsWith(new String(new char[]{SLASH_CHAR, DOT_CHAR, DOT_CHAR, DOT_CHAR, SLASH_CHAR}))){
205  1 ci.setIndex(4); // skip "/..." in OSF names
206    }
207   
208  12808 try{
209  12808 String [][][] dn = name(ci, Name.startsWith(new String(new char[]{SLASH_CHAR}))); // OSF names always start with a Slash, RC2253 cannot start with a slash
210   
211  12800 if (ci.getIndex()<ci.getEndIndex()){
212  2 throw new RFC2253ParsingException("End of Distinguished Name expected.");
213    }
214  12798 return dn;
215    }catch(Exception ex){ // now this code throws one exception only!
216  10 String msg = null;
217  10 int pos = 0;
218   
219  10 if (ci==null){
220  0 msg = "CharacterIterator is null";
221    }else{
222  10 pos = ci.getIndex();
223  10 msg = "Parse error at "+(pos);
224    }
225  10 throw new RFC2253ParsingException(pos, msg, ex);
226    }
227    }
228   
229    /**
230    * This method returns the canonical representation of the DN separated
231    * into arrays of strings.
232    *
233    * <p>It simply combines the parts of the dn in the following way: the
234    * attribute types
235    * in each AVA are converted to upper case (because some applications
236    * allow lowercase
237    * input of these), attribute values are taken as is, and then all AVAs
238    * in the same
239    * RDN are combined using "=", after that all RDNs are concatenated
240    * using ",".
241    *
242    * <p>dn is an array of RDNs. RDN is an array of AVA (Attribute Value
243    * Assertion). AVA
244    * is an array of two strings (after parsing using distinguishedName
245    * method). The
246    * string with index 0 is the attribute type. The string with index 1
247    * is the attribute
248    * value. The value will be escaped using escapeString method.
249    *
250    * <p>If AVA is an array of more than two strings (the reference in RDN
251    * can be replaced
252    * by the user), and the string with index 2 is not null, it will be
253    * placed as the
254    * attribute value as is, instead of escaped string with index 1. This
255    * allows the user
256    * to provide the values of attributes that should be compared as
257    * binary (for example,
258    * "#<hexstring>" would match only this string of hexadecimal values).
259    *
260    * <p><code>
261    * String [][][] dn = distinguishedName("uid=aBc , c=gb");<br>
262    * String [][] uid_rdn = dn[0];<br>
263    * String [] uid_ava = uid_rdn[0];<br>
264    * <br>
265    * uid_rdn[0] = new String[]{ uid_ava[0], uid_ava[1], toHexString(uid_ava[1].getBytes()) };<br>
266    * <br>
267    * String canonicalDN = toCanonicalDN(dn);<br>
268    * </code>
269    *
270    * <p>In the example above the distinguished name will be parsed as an
271    * array of two RDNs,
272    * each of them having only one AVA. We are accessing the leftmost RDN.
273    * We are interested
274    * in replacing the AVA in it that corresponds to the "uid" attribute
275    * type, so the uid will
276    * be case sensitive ("aBc" is not the same as "ABC"). So we replace
277    * the corresponding
278    * AVA in the RDN with the new value - an array containing the
279    * user-defined string to
280    * be put in the RDN.
281    *
282    * <p>Note that in most cases conversion to the canonic DN will look
283    * like this:
284    *
285    * <p><code>
286    * String canonicalDN = toCanonicalDN(distinguishedName( nonCanonical ));<br>
287    * </code>
288    *
289    * @param dn is the parsed DN with the array format as described above
290    *
291    * @return String value containing the canonical RFC2253 DN
292    *
293    * @throws NullPointerException and IndexOutOfRange in case the dn is a
294    * malformed
295    * input (AVA is less than 2 elements, or null pointer encountered)
296    */
 
297  12821 toggle public static String toCanonicalDN(String [][][] dn){
298  12821 StringBuffer sb = new StringBuffer();
299   
300  65279 for (int i=0; i<dn.length; i++){
301  52458 if (i!=0){
302  39683 sb.append(COMMA_CHAR);
303    }
304   
305  104916 for (int j=0; j<dn[i].length; j++){
306  52458 if (j!=0){
307  0 sb.append(PLUS_CHAR);
308    }
309   
310  52458 sb.append(dn[i][j][0].toUpperCase()); // upcase the attribute type name
311  52458 sb.append(ASSIGN_CHAR);
312   
313  52458 String t;
314  52458 if (dn[i][j].length>2 && dn[i][j][2]!=null){
315  0 t=dn[i][j][2];
316    }else{
317  52458 t=escapeString(dn[i][j][1]);
318    }
319   
320  52458 sb.append(t);
321    }
322    }
323   
324  12821 return sb.toString();
325    }
326   
327    /**
328    * This method will attempt to convert a given DN to canonical DN.
329    * If it is not a DN, it will return null.
330    *
331    * <p>This is the same as calling toCanonicalDN(distinguishedName(dn)),
332    * but is more convenient, because it doesn't throw exceptions.
333    *
334    * @param dn - the DN to convert to canonical form; can be null
335    *
336    * @return the canonical representation of the DN, or null, if it is
337    * not a DN.
338    */
 
339  70 toggle public static String toCanonicalDN(String dn){
340  70 String canonical = null;
341  70 try{
342  70 if (dn!=null) canonical = toCanonicalDN(distinguishedName(dn));
343    }catch(RFC2253ParsingException rpe){ // ignore all parsing exceptions
344    }
345   
346  70 return canonical;
347    }
348   
349    /**
350    * The name non-terminal.
351    *
352    * @param n - the CharacterIterator where the current position points to
353    * a distinguished name
354    * @param OSF - if true, OSF syntax is assumed; if false, RFC2253 syntax
355    * is assumed
356    *
357    * @return a DN array of RDN arrays of AVA arrays
358    */
 
359  12808 toggle protected static String [][][] name(CharacterIterator n, boolean OSF) throws RFC2253ParsingException{
360  12808 Vector result = new Vector();
361   
362  12808 while (true){
363  52503 result.add(name_component(n, OSF));
364  52495 skip_spaces(n);
365   
366  52495 if ((!OSF && !COMMA(n.current())) || (OSF && !SLASH(n.current()))) {
367  12800 break; // no name-components will follow
368    }
369   
370  39695 if (!OSF) {
371  39690 n.next(); // eat the ','
372  39690 skip_spaces(n);
373    }
374    }
375   
376  12800 Object [] o = result.toArray();
377  12800 String [][][] s = new String[o.length][][];
378  65293 for (int i=0; i<s.length; i++){
379  52493 s[OSF?s.length-1-i:i] = (String [][])o[i]; // OSF names appear in the reverse order in the string - so they should be copied in reverse order
380    }
381   
382  12800 return s;
383    }
384   
385    /**
386    * The name-component non-terminal.
387    *
388    * @param ci - the CharacterIterator, where the current position points
389    * to a RDN
390    * @param OSF - if true, OSF syntax is assumed; if false, RFC2253 syntax
391    * is assumed
392    *
393    * @return an RDN array of AVA arrays
394    */
 
395  52503 toggle protected static String [][] name_component(CharacterIterator ci, boolean OSF) throws RFC2253ParsingException{
396  52503 if (OSF) { // if OSF-style DN, slash and spaces were not skipped yet
397  10 ci.next();
398  10 skip_spaces(ci);
399    }
400   
401  52503 Vector result = new Vector();
402   
403  52503 while(true){
404  52506 result.add(attributeTypeAndValue(ci, OSF));
405  52498 skip_spaces(ci);
406   
407  52498 if ((!OSF && !PLUS(ci.current())) || (OSF && !COMMA(ci.current())))
408  52495 break; // no attributeTypeAndValues will follow
409   
410  3 ci.next();
411   
412  3 skip_spaces(ci);
413    }
414   
415  52495 Object [] o = result.toArray();
416  52495 String [][] s = new String[o.length][];
417  104993 for (int i=0; i<s.length; i++){
418  52498 s[i] = (String [])o[i];
419    }
420   
421  52495 return s;
422    }
423   
424    /**
425    * The attributeTypeAndValue non-terminal.
426    *
427    * @param ci - the CharacterIterator, where the current position points
428    * to an AVA
429    * @param OSF - if true, OSF syntax is assumed; if false, RFC2253 syntax
430    * is assumed
431    *
432    * @return an AVA array (two elements: at 0 - attribute type,
433    * at 1 or 2 - attribute value)
434    */
 
435  52506 toggle protected static String [] attributeTypeAndValue(CharacterIterator ci, boolean OSF) throws RFC2253ParsingException{
436  52506 String [] pair = new String[2];
437   
438  52506 pair[0] = attributeType(ci, OSF);
439   
440  52506 skip_spaces(ci);
441   
442  52506 if (!ASSIGNMENT(ci.current()) || pair[0]==null){ // pair[0]==null only if it is OSF and there was no valid attribute Type
443  6 if (!OSF) throw new RFC2253ParsingException("Assignment mark ('"+ASSIGN_CHAR+"') expected");
444    } else {
445  52500 ci.next();
446  52500 skip_spaces(ci);
447    }
448   
449  52501 pair[1] = attributeValue(ci, OSF);
450  52498 if ( pair[1].startsWith("#") ){
451  2 pair = new String[] { pair[0], null, pair[1] };
452    }
453   
454  52498 return pair;
455    }
456   
457    /**
458    * The attributeType non-terminal. Seems, there is a typo in the RFC:
459    * <p><code>attributeType = (ALPHA 1*keychar) / oid</code>
460    * <p>should perhaps read
461    * <p><code>attributeType = (ALPHA *keychar) / oid</code>
462    * <p>Otherwise, attributeType L (locality) would not be accepted.
463    *
464    * @param ci - the CharacterIterator, where the current position points
465    * to an attribute type
466    * @param OSF - if true, OSF syntax is assumed; if false, RFC2253 syntax
467    * is assumed
468    *
469    * @return an attribute type as an "oid.<dotted OID>" or the type
470    */
 
471  52506 toggle protected static String attributeType(CharacterIterator ci, boolean OSF) throws RFC2253ParsingException{
472  52506 StringBuffer result = new StringBuffer();
473   
474  52506 int currentPos = ci.getIndex();
475   
476  52506 try{
477  52506 result.append(oid(ci));
478    }catch(RFC2253ParsingException ex){
479  52501 ci.setIndex(currentPos); // restart from where the OID started
480   
481  52501 char c;
482  0 if (ALPHA(c=ci.current())){
483  52501 do{
484  86669 result.append(c);
485  0 }while(keychar(c=ci.next()));
486   
487  52501 if (OSF && !ASSIGNMENT(c)) { // if it is OSF name, '=' should follow immediately
488  1 ci.setIndex(currentPos); // if there was no '=', then it is just a value, without any attribute type
489    // restore the position from which parsing should continue
490  1 return null; // return null attribute type
491    }
492    }else{
493  0 if (OSF) return null; // non-alphanumeric character encountered - it must be an attribute value
494  0 throw new RFC2253ParsingException("attributeType expected");
495    }
496    }
497   
498  52505 return result.toString();
499    }
500   
501   
502    /**
503    * The keychar terminal. Someone could call it a non-terminal, but since
504    * it represents the smallest granularity of the input, it is a terminal.
505    *
506    * @param c - the character to test
507    *
508    * @return true, if it is a keychar (HYPHEN_CHAR or ALPHA(c) or
509    * DIGIT(c))
510    */
 
511  86669 toggle protected static boolean keychar(char c){
512  86669 return c==HYPHEN_CHAR || ALPHA(c) || DIGIT(c);
513    }
514   
515   
516    /**
517    * The oid non-terminal.
518    *
519    * @param ci - the CharacterIterator, where the current position points
520    * to an attribute type expressed as an OID
521    *
522    * @return the OID string without "oid." prefix
523    */
 
524  52506 toggle protected static String oid(CharacterIterator ci) throws RFC2253ParsingException{
525  52506 char [] OID_string = {O_CHAR, I_CHAR, D_CHAR, DOT_CHAR};
526  52506 char [] oid_string = {o_CHAR, i_CHAR, d_CHAR, DOT_CHAR};
527  52506 char c;
528  52506 illegal_oid:
529    do{ // this loop is needed only to break properly
530    // is it a good style of programming?
531    // ...reckoning that return statement
532   
533  52506 c=ci.current();
534   
535  52506 if (c==oid_string[0]){ // let's see if it starts with an "oid." string
536  12473 for (int i=0; i<oid_string.length; i++, ci.next()){
537  12468 if (ci.current()!=oid_string[i]){
538  6224 break illegal_oid;
539    }
540    }
541    }else{
542  46277 if (c==OID_string[0]){ // let's see if it starts with an "OID." string
543  40953 for (int i=0; i<OID_string.length; i++, ci.next()){
544  40952 if (ci.current()!=OID_string[i]){
545  20474 break illegal_oid;
546    }
547    }
548    }
549    }
550   
551  25808 StringBuffer result = new StringBuffer();
552  25808 if (!DIGIT(ci.current())) break illegal_oid;
553   
554  6 do{ result.append(ci.current()); }while(DIGIT(ci.next()));
555   
556  14 while(ci.current()==DOT_CHAR){
557  9 result.append(ci.current()); // add dot once
558   
559  9 if (!DIGIT(ci.next())) break illegal_oid;
560  8 do{ result.append(ci.current()); }while(DIGIT(ci.next()));
561    }
562   
563  5 return result.toString();
564   
565    }while(false);
566   
567  52501 throw new RFC2253ParsingException("Valid OID specification expected");
568    }
569   
570   
571    /**
572    * The attributeValue non-terminal.
573    *
574    * @param ci - the CharacterIterator, where the current position points
575    * to an attribute value
576    * @param OSF - if true, OSF syntax is assumed; if false, RFC2253 syntax
577    * is assumed
578    *
579    * @return attribute value
580    */
 
581  52501 toggle protected static String attributeValue(CharacterIterator ci, boolean OSF) throws RFC2253ParsingException{
582  52501 skip_spaces(ci);
583   
584  52501 String s = string(ci, OSF);
585   
586  52498 skip_spaces(ci);
587   
588  52498 return s;
589   
590    }
591   
592    /**
593    * The string non-terminal. The specification is not quite clear about
594    * the trailing space characters. Is it still possible to have a value
595    * '\ hi hix\ '? I am implementing it as if it were possible, though,
596    * the syntax does not talk about escaping space in such a way.
597    *
598    * @param ci - the CharacterIterator, where the current position points
599    * to a string value of an attribute
600    * @param OSF - if true, OSF syntax is assumed; if false, RFC2253 syntax
601    * is assumed
602    *
603    * @return unescaped string value
604    */
 
605  52501 toggle protected static String string(CharacterIterator ci, boolean OSF) throws RFC2253ParsingException{
606  52501 StringBuffer result = new StringBuffer();
607   
608  52501 char c = ci.current();
609  52501 if (c==HASH_CHAR && !OSF){
610  2 ci.next();
611   
612  2 return hexstring(ci); // this is faster a little bit
613    // than appending it to the result first
614    }else{
615  52499 if (QUOTATION(c, OSF)){
616  9 ci.next(); // eat it
617   
618  9 try{
619  0 while(!QUOTATION(c=ci.current(), OSF)){
620  30 if (quotechar(c, OSF)){
621  23 result.append(c);
622  23 ci.next();
623    }else{
624  7 result.append(pair(ci, OSF));
625    }
626    }
627    }catch(RFC2253ParsingException ex){
628  3 throw new RFC2253ParsingException("Quotation character (["+(OSF?APOSTROPHE_CHAR:QUOTE_CHAR)+"]) expected", ex);
629    }
630   
631  6 ci.next(); // eat the QUOTE_CHAR
632    }else{
633  52490 int spaces = -1; // it shows the index of where the bare spaces started, or -1, if no spaces encountered
634  52490 int currentPos=0; // though, it is set before the pair()
635  52490 try{
636  52489 do{
637  431669 c = ci.current();
638   
639  431670 if (c==SPACE_CHAR){
640  14228 if (spaces<0) spaces=ci.getIndex();
641  14228 ci.next();
642  14228 continue;
643    }
644   
645   
646  417442 currentPos = ci.getIndex();
647   
648  417442 if (stringchar(c, OSF)){
649  364942 ci.next(); // eat it
650    }else{
651  52500 c = pair(ci, OSF); // this is the way out
652    }
653   
654  364951 if (spaces>-1){
655  14109 int remember = ci.getIndex();
656   
657  14109 ci.setIndex(spaces);
658  14109 spaces=-1;
659    // now copy all the spaces that
660    // are to copy
661  28218 while(ci.getIndex()<currentPos){
662  14109 result.append(ci.current());
663  14109 ci.next();
664    }
665   
666  14109 ci.setIndex(remember);
667    }
668   
669  364951 result.append(c);
670   
671    }while(true);
672   
673    }catch(RFC2253ParsingException ex){
674    }
675   
676  52490 if (spaces>-1){
677  118 ci.setIndex(spaces); // forget the trailing spaces
678    }
679    }
680    }
681   
682  52496 return result.toString();
683    }
684   
685   
686    /**
687    * The quotechar terminal.
688    *
689    * @param c - the character to be tested
690    * @param OSF - if true, OSF syntax is assumed; if false, RFC2253 syntax
691    * is assumed
692    *
693    * @return true, if c is a quotechar as defined in RFC2253; false
694    * otherwise
695    */
 
696  30 toggle protected static boolean quotechar(char c, boolean OSF){
697  30 return c!=BSLASH_CHAR && !QUOTATION(c, OSF) && c!=CharacterIterator.DONE;
698    }
699   
700    /**
701    * The special terminal.
702    *
703    * @param c - the character to be tested
704    * @param OSF - if true, OSF syntax is assumed; if false, RFC2253 syntax
705    * is assumed
706    *
707    * @return true, if c is a special as defined in RFC2253; false
708    * otherwise
709    */
 
710  783994 toggle protected static boolean special(char c, boolean OSF){
711  783994 return c!=CharacterIterator.DONE && ( // semicolon is included in COMMA
712  783994 ASSIGNMENT(c) || (OSF?
713    (SLASH(c) || COMMA(c))
714    :(COMMA(c) || PLUS(c) ||
715    c==LT_CHAR || c==GT_CHAR || c==HASH_CHAR))
716    );
717    }
718   
719    /**
720    * The pair non-terminal. Note that it also allows to escape a
721    * SPACE_CHAR, to be consistent with the DN-to-string conversion rules,
722    * that say that I have to allow the last space to be escaped.
723    *
724    * <p>It may read multiple hex pairs escaped with "\" to fully decode
725    * the UTF-8 character.
726    *
727    * @param ci - the CharacterIterator, where the current position points
728    * to a character expressed through the escape character "\" and
729    * the character code
730    * @param OSF - if true, OSF syntax is assumed; if false, RFC2253 syntax
731    * is assumed
732    *
733    * @return unescaped character
734    *
735    * @throws RFC2253ParsingException, and restores the ci pointer to the
736    * position it was on input; thus acting similar to the terminals:
737    * a pointer can move over the whole entity, or it does not move at
738    * all.
739    */
 
740  52507 toggle protected static char pair(CharacterIterator ci, boolean OSF) throws RFC2253ParsingException{
741  52507 int p = onePair( ci, OSF );
742  14 int i = 0x40;
743   
744  14 if (p > 0x7f) {
745  7 p &= 0x7f;
746  14 while( (p & i) != 0 )
747    {
748  7 p = ((p ^ i) << 6) | (onePair( ci, OSF ) & 0x3f);
749  7 i <<= 5;
750    }
751    }
752   
753  14 return (char)p;
754    }
755   
 
756  52514 toggle protected static int onePair(CharacterIterator ci, boolean OSF) throws RFC2253ParsingException{
757  52514 int result;
758  52514 int currentPos = ci.getIndex();
759  52514 char c;
760   
761  52514 try{
762  52514 if (ci.current()!=BSLASH_CHAR){
763  52492 throw new RFC2253ParsingException("Backslash ('"+BSLASH_CHAR+"') escape expected");
764    }
765   
766  22 result=c=ci.next();
767  22 if (OSF){
768  2 if (c==X_CHAR || c==x_CHAR){
769  1 ci.next(); // skip the 'x' or 'X'
770  1 result=Integer.parseInt( hexpair(ci), 16 );
771    }
772  20 }else if (c==BSLASH_CHAR || special(c, OSF) || QUOTATION(c, OSF)
773    || c==SPACE_CHAR){
774  2 ci.next();
775    }else{
776  18 result=Integer.parseInt( hexpair(ci), 16 );
777    }
778   
779  21 return result;
780    }catch(RFC2253ParsingException ex){
781  52493 ci.setIndex(currentPos); // restore the pointer position
782  52493 throw ex;
783    }
784    }
785   
786   
787    /**
788    * The stringchar terminal.
789    *
790    * @param c - the character to be tested
791    * @param OSF - if true, OSF syntax is assumed; if false, RFC2253 syntax
792    * is assumed
793    *
794    * @return true, if c is a stringchar as defined in RFC2253; false
795    * otherwise
796    */
 
797  796768 toggle protected static boolean stringchar(char c, boolean OSF){
798  796768 return c!=CharacterIterator.DONE &&
799    !special(c, OSF) &&
800    !QUOTATION(c, OSF) &&
801    c!=BSLASH_CHAR;
802    }
803   
804   
805    /**
806    * The hexstring non-terminal.
807    *
808    * @param ci - the CharacterIterator, where the current position points
809    * to a value expressed as a hexstring
810    *
811    * @return unescaped hexstring, where each character has the code
812    * corresponding to the hexstring
813    */
 
814  2 toggle protected static String hexstring(CharacterIterator ci) throws RFC2253ParsingException{
815  2 StringBuffer result = new StringBuffer("#");
816   
817  2 result.append(hexpair(ci));
818  2 try{
819  2 while(true){
820  5 result.append(hexpair(ci));
821    }
822    }catch(RFC2253ParsingException ex){
823    }
824   
825  2 return result.toString();
826    }
827   
828   
829    /**
830    * The hexpair non-terminal.
831    *
832    * @param ci - the CharacterIterator, where the current position points
833    * to a single hexadecimal digits pair
834    *
835    * @throws RFC2253ParsingException, but like pair non-terminal, restores
836    * the pointer to the initial position.
837    */
 
838  26 toggle protected static String hexpair(CharacterIterator ci) throws RFC2253ParsingException{
839  26 int currentPos = ci.getIndex();
840   
841  26 char c;
842  0 if (hexchar(c=ci.current())){
843  24 char c1;
844  0 if (hexchar(c1=ci.next())){
845  23 ci.next();
846  23 return new String( new char[]{c, c1} );
847    }
848    }
849   
850  3 ci.setIndex(currentPos);
851  3 throw new RFC2253ParsingException("Valid hexadecimal 8-bit number expected");
852    }
853   
854    /**
855    * This is a utility method to return a value, corresponding to the char
856    * value.
857    * It is private, and I am sure I will use it correctly; you should
858    * not use it at all: it does not check if the input is correct.
859    */
 
860  0 toggle private static int unhex(char c){
861  0 if (c<A_CHAR){ // it must be an ASCII '0'..'9'
862  0 return c-ZERO_CHAR;
863    }
864   
865  0 if (c<a_CHAR){ // it must be an ASCII 'A'..'F'
866  0 return c-A_CHAR+10;
867    }
868   
869  0 return c-a_CHAR+10; // here it is 'a'..'f'
870    }
871   
872   
873    /**
874    * The hexchar terminal.
875    *
876    * @param c - the character to be tested
877    *
878    * @return true, if c is a hexchar as defined in RFC2253; false
879    * otherwise
880    */
 
881  50 toggle protected static boolean hexchar(char c){
882  50 return DIGIT(c) || (c>=A_CHAR && c<=F_CHAR) || (c>='a' && c<='f');
883    }
884   
885   
886    /**
887    * The DIGIT terminal.
888    *
889    * @param c - the character to be tested
890    *
891    * @return true, if c is a digit as defined in RFC2253; false
892    * otherwise
893    */
 
894  78382 toggle protected static boolean DIGIT(char c){
895  78382 return c>=ZERO_CHAR && c<=NINE_CHAR; // 0-9
896    }
897   
898    /**
899    * The ALPHA terminal.
900    *
901    * @param c - the character to be tested
902    *
903    * @return true, if c is an alpha as defined in RFC2253; false
904    * otherwise
905    */
 
906  139170 toggle protected static boolean ALPHA(char c){
907  139170 return (c>=A_CHAR && c<=Z_CHAR) || (c>=a_CHAR && c<=z_CHAR);
908    // A-Z, a-z
909    }
910   
911    /**
912    * The QUOTATION terminal.
913    *
914    * @param c - the character to be tested
915    * @param OSF - if true, OSF syntax is assumed; if false, RFC2253 syntax
916    * is assumed
917    *
918    * @return true, if c is a quotation as defined in RFC2253 or OSF; false
919    * otherwise
920    */
 
921  796853 toggle protected static boolean QUOTATION(char c, boolean OSF){
922  796853 return c==(OSF?APOSTROPHE_CHAR:QUOTE_CHAR); // ' or "
923    }
924   
925   
926    /**
927    * The skip_spaces() non-terminal.
928    * <p><code>skip_spaces = *space</code>
929    *
930    * @param ci - the CharacterIterator, where the current position points
931    * to sequence of spaces
932    */
 
933  354700 toggle protected static void skip_spaces(CharacterIterator ci){
934  354700 ci.previous();
935  358764 while(ci.next()==SPACE_CHAR);
936    }
937   
938   
939    /**
940    * The COMMA terminal.
941    * <p><code>COMMA = "," / ";"</code>
942    *
943    * @param c - the character to be tested
944    *
945    * @return true, if c is a comma as defined in RFC2253; false
946    * otherwise
947    */
 
948  836486 toggle protected static boolean COMMA(char c){
949  836485 return c==COMMA_CHAR || c==SEMICOLON_CHAR;
950    }
951   
952   
953    /**
954    * The PLUS terminal.
955    * <p><code>PLUS = "+"</code>
956    *
957    * @param c - the character to be tested
958    *
959    * @return true, if c is a PLUS as defined in RFC2253; false
960    * otherwise
961    */
 
962  796741 toggle protected static boolean PLUS(char c){
963  796741 return c==PLUS_CHAR;
964    }
965   
966   
967    /**
968    * The ASSIGNMENT terminal.
969    * <p><code>ASSIGNMENT = "="</code>
970    *
971    * @param c - the character to be tested
972    *
973    * @return true, if c is a assignment as defined in RFC2253; false
974    * otherwise
975    */
 
976  836510 toggle protected static boolean ASSIGNMENT(char c){
977  836510 return c==ASSIGN_CHAR;
978    }
979   
980   
981    /**
982    * The SLASH terminal.
983    * <p><code>SLASH = "/"</code>
984    *
985    * @param c - the character to be tested
986    *
987    * @return true, if c is a slash as defined in RFC2253; false
988    * otherwise
989    */
 
990  59 toggle protected static boolean SLASH(char c){
991  59 return c==SLASH_CHAR;
992    }
993   
994   
995    /**
996    * This routine converts a given byte array into a hexstring, prepended
997    * with a HASH_CHAR. The array can be empty, but not null; in the
998    * latter case an IllegalArgumentException is thrown, whilst in the
999    * former case an empty string is returned (see hexstring syntax spec).
1000    *
1001    * @param b - the byte array to be converted into a hexstring
1002    *
1003    * @return a hexstring with the leading "#", or an empty string, if
1004    * b has zero length
1005    */
 
1006  0 toggle public static String toHexString(byte [] b){
1007  0 if (b==null) throw new IllegalArgumentException("Cannot convert null to hexstring");
1008   
1009  0 if (b.length==0) return "";
1010   
1011  0 StringBuffer result = new StringBuffer(HASH_CHAR);
1012   
1013  0 for (int i=0; i<b.length; i++){
1014  0 result.append(intToHex(b[i]>>4));
1015  0 result.append(intToHex(b[i]));
1016    }
1017   
1018  0 return result.toString();
1019    }
1020   
1021    /**
1022    * This method returns a character corresponding to a hexadecimal digit.
1023    *
1024    * @param a - a digit
1025    *
1026    * @return the ASCII character representing it
1027    */
 
1028  56 toggle private static char intToHex(int a){ // don't touch this; it is mine
1029  56 a&=0xf;
1030  56 return (char)((a<10)?(ZERO_CHAR+a):
1031    (a_CHAR+a-10)); // I am using lowercase; do you mind? I find it neater.
1032    }
1033   
1034   
1035    /**
1036    * This routine gets a Unicode String on input, and converts any
1037    * character, that is outside latin alphabet and numbers, to hexpair,
1038    * and escapes all special characters.
1039    *
1040    * @param s - the string to convert
1041    *
1042    * @return the string where all characters outside latin alphabet have
1043    * been escaped, including spaces and other special characters
1044    */
 
1045  52492 toggle public static String escapeString(String s){
1046  52492 char [] c = s.toCharArray();
1047  52492 StringBuffer result = new StringBuffer();
1048   
1049  431818 for (int i=0; i<c.length; i++){
1050  379326 if (!stringchar(c[i], false) || // thus we will escape ``=`` and ``#`` as well; but it says ``implementations MAY escape other symbols as well; though, this is for easier coding only, not for correct parsing
1051    (c[i]==SPACE_CHAR &&
1052    (i==0 || i==c.length-1)
1053    )
1054    ){
1055  8 result.append(BSLASH_CHAR);
1056    }
1057   
1058  379326 result.append(c[i]);
1059    }
1060   
1061  52492 return toUTF8(result.toString());
1062    }
1063   
1064   
1065    /**
1066    * This routine converts the given Unicode string (in fact, even UCS-4)
1067    * into a UTF-8 representation of it, with back-slashes where
1068    * appropriate.
1069    *
1070    * @param s - the string to be converted into UTF-8
1071    *
1072    * @return a string where the characters outside latin alphabet have
1073    * been escaped with the UTF-8 code provided
1074    *
1075    * @see RFC2279
1076    */
 
1077  52492 toggle private static String toUTF8(String s){
1078  52492 char [] c = s.toCharArray();
1079  52492 StringBuffer result = new StringBuffer();
1080   
1081  431826 for (int i=0; i<c.length; i++){
1082  379334 if (c[i]<0x80){
1083  379320 result.append(c[i]);
1084    }else{
1085  14 byte [] a = toUTF8(c[i]);
1086  42 for (int j=0; j<a.length; j++){
1087  28 result.append(BSLASH_CHAR);
1088  28 result.append(intToHex(a[j]>>4));
1089  28 result.append(intToHex(a[j]));
1090    }
1091    }
1092    }
1093   
1094  52492 return result.toString();
1095    }
1096   
1097    /**
1098    * This method returns the bytes of the UTF-8 encoding of a single
1099    * character.
1100    *
1101    * @param c - the character
1102    *
1103    * @return the byte array of the UTF-8 code for the character
1104    */
 
1105  14 toggle private static byte [] toUTF8(char c){
1106  14 byte [] b = new byte [6];
1107  14 int c_mask = 0x80; // comparison mask
1108  14 int mask = 0; // the first byte mask.
1109  14 int a = c; // even UCS4 will be less than 80000000,
1110    // thus a >> 1 is always greater than 0,
1111    // and the loop is finite.
1112  14 int i;
1113  28 for (i=0; i<b.length; i++){
1114  28 if (a<c_mask){
1115  14 b[i]=(byte)(mask | a);
1116  14 break;
1117    }
1118   
1119  14 b[i] = (byte)(0x80 | (a & 0x3f));
1120   
1121  14 if (mask == 0){
1122  14 mask = 0x80;
1123  14 c_mask >>>=1;
1124    }
1125   
1126  14 mask |= c_mask;
1127  14 c_mask >>>=1;
1128  14 a >>>=6;
1129    }
1130   
1131  14 byte [] result = new byte[ i + 1 ];
1132  42 for (int j=0; i>=0; i--, j++){
1133  28 result[j] = b[i];
1134    }
1135   
1136  14 return result;
1137    }
1138   
1139    }
1140