1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65 package org.jaxen.saxpath.base;
66
67 class XPathLexer
68 {
69 private String xpath;
70 private int currentPosition;
71 private int endPosition;
72
73 private Token previousToken;
74
75 XPathLexer(String xpath)
76 {
77 setXPath( xpath );
78 }
79
80 private void setXPath(String xpath)
81 {
82 this.xpath = xpath;
83 this.currentPosition = 0;
84 this.endPosition = xpath.length();
85 }
86
87 String getXPath()
88 {
89 return this.xpath;
90 }
91
92 Token nextToken()
93 {
94 Token token = null;
95
96 do
97 {
98 token = null;
99
100 switch ( LA(1) )
101 {
102 case '$':
103 {
104 token = dollar();
105 break;
106 }
107
108 case '"':
109 case '\'':
110 {
111 token = literal();
112 break;
113 }
114
115 case '/':
116 {
117 token = slashes();
118 break;
119 }
120
121 case ',':
122 {
123 token = comma();
124 break;
125 }
126
127 case '(':
128 {
129 token = leftParen();
130 break;
131 }
132
133 case ')':
134 {
135 token = rightParen();
136 break;
137 }
138
139 case '[':
140 {
141 token = leftBracket();
142 break;
143 }
144
145 case ']':
146 {
147 token = rightBracket();
148 break;
149 }
150
151 case '+':
152 {
153 token = plus();
154 break;
155 }
156
157 case '-':
158 {
159 token = minus();
160 break;
161 }
162
163 case '<':
164 case '>':
165 {
166 token = relationalOperator();
167 break;
168 }
169
170 case '=':
171 {
172 token = equals();
173 break;
174 }
175
176 case '!':
177 {
178 if ( LA(2) == '=' )
179 {
180 token = notEquals();
181 }
182 else
183 {
184 token = not();
185 }
186 break;
187 }
188
189 case '|':
190 {
191 token = pipe();
192 break;
193 }
194
195 case '@':
196 {
197 token = at();
198 break;
199 }
200
201 case ':':
202 {
203 if ( LA(2) == ':' )
204 {
205 token = doubleColon();
206 }
207 else
208 {
209 token = colon();
210 }
211 break;
212 }
213
214 case '*':
215 {
216 token = star();
217 break;
218 }
219
220 case '.':
221 {
222 switch ( LA(2) )
223 {
224 case '0':
225 case '1':
226 case '2':
227 case '3':
228 case '4':
229 case '5':
230 case '6':
231 case '7':
232 case '8':
233 case '9':
234 {
235 token = number();
236 break;
237 }
238 default:
239 {
240 token = dots();
241 break;
242 }
243 }
244 break;
245 }
246
247 case '0':
248 case '1':
249 case '2':
250 case '3':
251 case '4':
252 case '5':
253 case '6':
254 case '7':
255 case '8':
256 case '9':
257 {
258 token = number();
259 break;
260 }
261
262 case ' ':
263 case '\t':
264 case '\n':
265 case '\r':
266 {
267 token = whitespace();
268 break;
269 }
270
271 default:
272 {
273 if ( isIdentifierStartChar( LA(1) ) )
274 {
275 token = identifierOrOperatorName();
276 }
277 }
278 }
279
280 if ( token == null )
281 {
282 if (!hasMoreChars())
283 {
284 token = new Token( TokenTypes.EOF,
285 getXPath(),
286 currentPosition(),
287 endPosition() );
288 }
289 else
290 {
291 token = new Token( TokenTypes.ERROR,
292 getXPath(),
293 currentPosition(),
294 endPosition() );
295 }
296 }
297
298 }
299 while ( token.getTokenType() == TokenTypes.SKIP );
300
301 setPreviousToken( token );
302
303 return token;
304 }
305
306 private Token identifierOrOperatorName()
307 {
308 Token token = null;
309
310 if ( previousToken != null )
311 {
312
313
314
315
316
317
318
319
320
321
322
323 switch ( previousToken.getTokenType() )
324 {
325 case TokenTypes.AT:
326 case TokenTypes.DOUBLE_COLON:
327 case TokenTypes.LEFT_PAREN:
328 case TokenTypes.LEFT_BRACKET:
329 case TokenTypes.AND:
330 case TokenTypes.OR:
331 case TokenTypes.MOD:
332 case TokenTypes.DIV:
333 case TokenTypes.COLON:
334 case TokenTypes.SLASH:
335 case TokenTypes.DOUBLE_SLASH:
336 case TokenTypes.PIPE:
337 case TokenTypes.DOLLAR:
338 case TokenTypes.PLUS:
339 case TokenTypes.MINUS:
340 case TokenTypes.STAR:
341 case TokenTypes.COMMA:
342 case TokenTypes.LESS_THAN_SIGN:
343 case TokenTypes.GREATER_THAN_SIGN:
344 case TokenTypes.LESS_THAN_OR_EQUALS_SIGN:
345 case TokenTypes.GREATER_THAN_OR_EQUALS_SIGN:
346 case TokenTypes.EQUALS:
347 case TokenTypes.NOT_EQUALS:
348 {
349 token = identifier();
350 break;
351 }
352 default:
353 {
354 token = operatorName();
355 break;
356 }
357 }
358 }
359 else
360 {
361 token = identifier();
362 }
363
364 return token;
365 }
366
367 private Token identifier()
368 {
369 Token token = null;
370
371 int start = currentPosition();
372
373 while ( hasMoreChars() )
374 {
375 if ( isIdentifierChar( LA(1) ) )
376 {
377 consume();
378 }
379 else
380 {
381 break;
382 }
383 }
384
385 token = new Token( TokenTypes.IDENTIFIER,
386 getXPath(),
387 start,
388 currentPosition() );
389
390 return token;
391 }
392
393 private Token operatorName()
394 {
395 Token token = null;
396
397 switch ( LA(1) )
398 {
399 case 'a':
400 {
401 token = and();
402 break;
403 }
404
405 case 'o':
406 {
407 token = or();
408 break;
409 }
410
411 case 'm':
412 {
413 token = mod();
414 break;
415 }
416
417 case 'd':
418 {
419 token = div();
420 break;
421 }
422 }
423
424 return token;
425 }
426
427 private Token mod()
428 {
429 Token token = null;
430
431 if ( ( LA(1) == 'm' )
432 &&
433 ( LA(2) == 'o' )
434 &&
435 ( LA(3) == 'd' )
436 )
437 {
438 token = new Token( TokenTypes.MOD,
439 getXPath(),
440 currentPosition(),
441 currentPosition()+3 );
442
443 consume();
444 consume();
445 consume();
446 }
447
448 return token;
449 }
450
451 private Token div()
452 {
453 Token token = null;
454
455 if ( ( LA(1) == 'd' )
456 &&
457 ( LA(2) == 'i' )
458 &&
459 ( LA(3) == 'v' )
460 )
461 {
462 token = new Token( TokenTypes.DIV,
463 getXPath(),
464 currentPosition(),
465 currentPosition()+3 );
466
467 consume();
468 consume();
469 consume();
470 }
471
472 return token;
473 }
474
475 private Token and()
476 {
477 Token token = null;
478
479 if ( ( LA(1) == 'a' )
480 &&
481 ( LA(2) == 'n' )
482 &&
483 ( LA(3) == 'd' )
484 )
485 {
486 token = new Token( TokenTypes.AND,
487 getXPath(),
488 currentPosition(),
489 currentPosition()+3 );
490
491 consume();
492 consume();
493 consume();
494 }
495
496 return token;
497 }
498
499 private Token or()
500 {
501 Token token = null;
502
503 if ( ( LA(1) == 'o' )
504 &&
505 ( LA(2) == 'r' )
506 )
507 {
508 token = new Token( TokenTypes.OR,
509 getXPath(),
510 currentPosition(),
511 currentPosition()+2 );
512
513 consume();
514 consume();
515 }
516
517 return token;
518 }
519
520 private Token number()
521 {
522 int start = currentPosition();
523 boolean periodAllowed = true;
524
525 loop:
526 while( true )
527 {
528 switch ( LA(1) )
529 {
530 case '.':
531 {
532 if ( periodAllowed )
533 {
534 periodAllowed = false;
535 consume();
536 }
537 else
538 {
539 break loop;
540 }
541 break;
542 }
543
544 case '0':
545 case '1':
546 case '2':
547 case '3':
548 case '4':
549 case '5':
550 case '6':
551 case '7':
552 case '8':
553 case '9':
554 {
555 consume();
556 break;
557 }
558 default:
559 {
560 break loop;
561 }
562 }
563 }
564
565 Token token = null;
566
567 if ( periodAllowed )
568 {
569 token = new Token( TokenTypes.INTEGER,
570 getXPath(),
571 start,
572 currentPosition() );
573 }
574 else
575 {
576 token = new Token( TokenTypes.DOUBLE,
577 getXPath(),
578 start,
579 currentPosition() );
580 }
581
582 return token;
583 }
584
585 private Token whitespace()
586 {
587 consume();
588
589 loop:
590 while( hasMoreChars() )
591 {
592 switch ( LA(1) )
593 {
594 case ' ':
595 case '\t':
596 case '\n':
597 case '\r':
598 {
599 consume();
600 break;
601 }
602
603 default:
604 {
605 break loop;
606 }
607 }
608 }
609
610 return new Token( TokenTypes.SKIP,
611 getXPath(),
612 0,
613 0 );
614 }
615
616 private Token comma()
617 {
618 Token token = new Token( TokenTypes.COMMA,
619 getXPath(),
620 currentPosition(),
621 currentPosition()+1 );
622
623 consume();
624
625 return token;
626 }
627
628 private Token equals()
629 {
630 Token token = new Token( TokenTypes.EQUALS,
631 getXPath(),
632 currentPosition(),
633 currentPosition()+1 );
634
635 consume();
636
637 return token;
638 }
639
640 private Token minus()
641 {
642 Token token = new Token( TokenTypes.MINUS,
643 getXPath(),
644 currentPosition(),
645 currentPosition()+1 );
646 consume();
647
648 return token;
649 }
650
651 private Token plus()
652 {
653 Token token = new Token( TokenTypes.PLUS,
654 getXPath(),
655 currentPosition(),
656 currentPosition()+1 );
657 consume();
658
659 return token;
660 }
661
662 private Token dollar()
663 {
664 Token token = new Token( TokenTypes.DOLLAR,
665 getXPath(),
666 currentPosition(),
667 currentPosition()+1 );
668 consume();
669
670 return token;
671 }
672
673 private Token pipe()
674 {
675 Token token = new Token( TokenTypes.PIPE,
676 getXPath(),
677 currentPosition(),
678 currentPosition()+1 );
679
680 consume();
681
682 return token;
683 }
684
685 private Token at()
686 {
687 Token token = new Token( TokenTypes.AT,
688 getXPath(),
689 currentPosition(),
690 currentPosition()+1 );
691
692 consume();
693
694 return token;
695 }
696
697 private Token colon()
698 {
699 Token token = new Token( TokenTypes.COLON,
700 getXPath(),
701 currentPosition(),
702 currentPosition()+1 );
703 consume();
704
705 return token;
706 }
707
708 private Token doubleColon()
709 {
710 Token token = new Token( TokenTypes.DOUBLE_COLON,
711 getXPath(),
712 currentPosition(),
713 currentPosition()+2 );
714
715 consume();
716 consume();
717
718 return token;
719 }
720
721 private Token not()
722 {
723 Token token = new Token( TokenTypes.NOT,
724 getXPath(),
725 currentPosition(),
726 currentPosition() + 1 );
727
728 consume();
729
730 return token;
731 }
732
733 private Token notEquals()
734 {
735 Token token = new Token( TokenTypes.NOT_EQUALS,
736 getXPath(),
737 currentPosition(),
738 currentPosition() + 2 );
739
740 consume();
741 consume();
742
743 return token;
744 }
745
746 private Token relationalOperator()
747 {
748 Token token = null;
749
750 switch ( LA(1) )
751 {
752 case '<':
753 {
754 if ( LA(2) == '=' )
755 {
756 token = new Token( TokenTypes.LESS_THAN_OR_EQUALS_SIGN,
757 getXPath(),
758 currentPosition(),
759 currentPosition() + 2 );
760 consume();
761 }
762 else
763 {
764 token = new Token( TokenTypes.LESS_THAN_SIGN,
765 getXPath(),
766 currentPosition(),
767 currentPosition() + 1);
768 }
769
770 consume();
771 break;
772 }
773 case '>':
774 {
775 if ( LA(2) == '=' )
776 {
777 token = new Token( TokenTypes.GREATER_THAN_OR_EQUALS_SIGN,
778 getXPath(),
779 currentPosition(),
780 currentPosition() + 2 );
781 consume();
782 }
783 else
784 {
785 token = new Token( TokenTypes.GREATER_THAN_SIGN,
786 getXPath(),
787 currentPosition(),
788 currentPosition() + 1 );
789 }
790
791 consume();
792 break;
793 }
794 }
795
796 return token;
797
798 }
799
800 private Token star()
801 {
802 Token token = new Token( TokenTypes.STAR,
803 getXPath(),
804 currentPosition(),
805 currentPosition()+1 );
806
807 consume();
808
809 return token;
810 }
811
812 private Token literal()
813 {
814 Token token = null;
815
816 char match = LA(1);
817
818 consume();
819
820 int start = currentPosition();
821
822 while ( ( token == null )
823 &&
824 hasMoreChars() )
825 {
826 if ( LA(1) == match )
827 {
828 token = new Token( TokenTypes.LITERAL,
829 getXPath(),
830 start,
831 currentPosition() );
832 }
833 consume();
834 }
835
836 return token;
837 }
838
839 private Token dots()
840 {
841 Token token = null;
842
843 switch ( LA(2) )
844 {
845 case '.':
846 {
847 token = new Token( TokenTypes.DOT_DOT,
848 getXPath(),
849 currentPosition(),
850 currentPosition()+2 ) ;
851 consume();
852 consume();
853 break;
854 }
855 default:
856 {
857 token = new Token( TokenTypes.DOT,
858 getXPath(),
859 currentPosition(),
860 currentPosition()+1 );
861 consume();
862 break;
863 }
864 }
865
866 return token;
867 }
868
869 private Token leftBracket()
870 {
871 Token token = new Token( TokenTypes.LEFT_BRACKET,
872 getXPath(),
873 currentPosition(),
874 currentPosition()+1 );
875
876 consume();
877
878 return token;
879 }
880
881 private Token rightBracket()
882 {
883 Token token = new Token( TokenTypes.RIGHT_BRACKET,
884 getXPath(),
885 currentPosition(),
886 currentPosition()+1 );
887
888 consume();
889
890 return token;
891 }
892
893 private Token leftParen()
894 {
895 Token token = new Token( TokenTypes.LEFT_PAREN,
896 getXPath(),
897 currentPosition(),
898 currentPosition()+1 );
899
900 consume();
901
902 return token;
903 }
904
905 private Token rightParen()
906 {
907 Token token = new Token( TokenTypes.RIGHT_PAREN,
908 getXPath(),
909 currentPosition(),
910 currentPosition()+1 );
911
912 consume();
913
914 return token;
915 }
916
917 private Token slashes()
918 {
919 Token token = null;
920
921 switch ( LA(2) )
922 {
923 case '/':
924 {
925 token = new Token( TokenTypes.DOUBLE_SLASH,
926 getXPath(),
927 currentPosition(),
928 currentPosition()+2 );
929 consume();
930 consume();
931 break;
932 }
933 default:
934 {
935 token = new Token( TokenTypes.SLASH,
936 getXPath(),
937 currentPosition(),
938 currentPosition()+1 );
939 consume();
940 }
941 }
942
943 return token;
944 }
945
946 private char LA(int i)
947 {
948 if ( currentPosition + ( i - 1 ) >= endPosition() )
949 {
950 return (char) -1;
951 }
952
953 return getXPath().charAt( currentPosition() + (i - 1) );
954 }
955
956 private void consume()
957 {
958 ++this.currentPosition;
959 }
960
961 private int currentPosition()
962 {
963 return this.currentPosition;
964 }
965
966 private int endPosition()
967 {
968 return this.endPosition;
969 }
970
971 private void setPreviousToken(Token previousToken)
972 {
973 this.previousToken = previousToken;
974 }
975
976 private boolean hasMoreChars()
977 {
978 return currentPosition() < endPosition();
979 }
980
981 private boolean isIdentifierChar(char c)
982 {
983 return Verifier.isXMLNCNameCharacter( c );
984 }
985
986 private boolean isIdentifierStartChar(char c)
987 {
988 return Verifier.isXMLNCNameStartCharacter( c );
989 }
990
991 }