1:
37:
38: package ;
39: import ;
40: import ;
41: import ;
42: import ;
43: import ;
44: import ;
45: import ;
46:
47:
117:
118: public class RE extends REToken {
119:
120: private static final class IntPair implements Serializable {
121: public int first, second;
122: }
123:
124: private static final class CharUnit implements Serializable {
125: public char ch;
126: public boolean bk;
127: }
128:
129:
130: private static final String VERSION = "1.1.5-dev";
131:
132:
133: private static ResourceBundle messages = PropertyResourceBundle.getBundle("gnu/java/util/regex/MessagesBundle", Locale.getDefault());
134:
135:
136:
137: private REToken firstToken, lastToken;
138:
139:
140:
141: private int numSubs;
142:
143:
144: private int minimumLength;
145: private int maximumLength;
146:
147:
151: public static final int REG_ICASE = 0x02;
152:
153:
159: public static final int REG_DOT_NEWLINE = 0x04;
160:
161:
166: public static final int REG_MULTILINE = 0x08;
167:
168:
195: public static final int REG_NOTBOL = 0x10;
196:
197:
202: public static final int REG_NOTEOL = 0x20;
203:
204:
216: public static final int REG_ANCHORINDEX = 0x40;
217:
218:
225: public static final int REG_NO_INTERPOLATE = 0x80;
226:
227:
232: public static final int REG_TRY_ENTIRE_MATCH = 0x0100;
233:
234:
242: public static final int REG_REPLACE_USE_BACKSLASHESCAPE = 0x0200;
243:
244:
248: public static final int REG_X_COMMENTS = 0x0400;
249:
250:
253: public static final int REG_ICASE_USASCII = 0x0800;
254:
255:
256: public static final String version() {
257: return VERSION;
258: }
259:
260:
261: static final String getLocalizedMessage(String key) {
262: return messages.getString(key);
263: }
264:
265:
275: public RE(Object pattern) throws REException {
276: this(pattern,0,RESyntax.RE_SYNTAX_PERL5,0,0);
277: }
278:
279:
290: public RE(Object pattern, int cflags) throws REException {
291: this(pattern,cflags,RESyntax.RE_SYNTAX_PERL5,0,0);
292: }
293:
294:
306: public RE(Object pattern, int cflags, RESyntax syntax) throws REException {
307: this(pattern,cflags,syntax,0,0);
308: }
309:
310:
311: private RE(REToken first, REToken last,int subs, int subIndex, int minLength, int maxLength) {
312: super(subIndex);
313: firstToken = first;
314: lastToken = last;
315: numSubs = subs;
316: minimumLength = minLength;
317: maximumLength = maxLength;
318: addToken(new RETokenEndSub(subIndex));
319: }
320:
321: private RE(Object patternObj, int cflags, RESyntax syntax, int myIndex, int nextSub) throws REException {
322: super(myIndex);
323: initialize(patternObj, cflags, syntax, myIndex, nextSub);
324: }
325:
326:
327: protected RE() { super(0); }
328:
329:
330: protected void initialize(Object patternObj, int cflags, RESyntax syntax, int myIndex, int nextSub) throws REException {
331: char[] pattern;
332: if (patternObj instanceof String) {
333: pattern = ((String) patternObj).toCharArray();
334: } else if (patternObj instanceof char[]) {
335: pattern = (char[]) patternObj;
336: } else if (patternObj instanceof StringBuffer) {
337: pattern = new char [((StringBuffer) patternObj).length()];
338: ((StringBuffer) patternObj).getChars(0,pattern.length,pattern,0);
339: } else {
340: pattern = patternObj.toString().toCharArray();
341: }
342:
343: int pLength = pattern.length;
344:
345: numSubs = 0;
346: Vector branches = null;
347:
348:
349: firstToken = lastToken = null;
350:
351:
352:
353: boolean insens = ((cflags & REG_ICASE) > 0);
354: boolean insensUSASCII = ((cflags & REG_ICASE_USASCII) > 0);
355:
356:
357:
358:
359:
360: int index = 0;
361:
362:
363: CharUnit unit = new CharUnit();
364:
365:
366: IntPair minMax = new IntPair();
367:
368:
369: REToken currentToken = null;
370: char ch;
371: boolean quot = false;
372:
373:
374: RESyntax savedSyntax = null;
375: int savedCflags = 0;
376: boolean flagsSaved = false;
377:
378: while (index < pLength) {
379:
380: index = getCharUnit(pattern,index,unit,quot);
381:
382: if (unit.bk)
383: if (unit.ch == 'Q') {
384: quot = true;
385: continue;
386: } else if (unit.ch == 'E') {
387: quot = false;
388: continue;
389: }
390: if (quot)
391: unit.bk = false;
392:
393: if (((cflags & REG_X_COMMENTS) > 0) && (!unit.bk) && (!quot)) {
394: if (Character.isWhitespace(unit.ch)) {
395: continue;
396: }
397: if (unit.ch == '#') {
398: for (int i = index; i < pLength; i++) {
399: if (pattern[i] == '\n') {
400: index = i + 1;
401: continue;
402: }
403: else if (pattern[i] == '\r') {
404: if (i + 1 < pLength && pattern[i + 1] == '\n') {
405: index = i + 2;
406: }
407: else {
408: index = i + 1;
409: }
410: continue;
411: }
412: }
413: index = pLength;
414: continue;
415: }
416: }
417:
418:
419:
420:
421:
422:
423:
424: if ( ( (unit.ch == '|' && (syntax.get(RESyntax.RE_NO_BK_VBAR) ^ (unit.bk || quot)))
425: || (syntax.get(RESyntax.RE_NEWLINE_ALT) && (unit.ch == '\n') && !(unit.bk || quot)) )
426: && !syntax.get(RESyntax.RE_LIMITED_OPS)) {
427:
428: addToken(currentToken);
429: RE theBranch = new RE(firstToken, lastToken, numSubs, subIndex, minimumLength, maximumLength);
430: minimumLength = 0;
431: maximumLength = 0;
432: if (branches == null) {
433: branches = new Vector();
434: }
435: branches.addElement(theBranch);
436: firstToken = lastToken = currentToken = null;
437: }
438:
439:
440:
441:
442:
443:
444:
445:
446:
447:
448:
449: else if ((unit.ch == '{') && syntax.get(RESyntax.RE_INTERVALS) && (syntax.get(RESyntax.RE_NO_BK_BRACES) ^ (unit.bk || quot))) {
450: int newIndex = getMinMax(pattern,index,minMax,syntax);
451: if (newIndex > index) {
452: if (minMax.first > minMax.second)
453: throw new REException(getLocalizedMessage("interval.order"),REException.REG_BADRPT,newIndex);
454: if (currentToken == null)
455: throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,newIndex);
456: if (currentToken instanceof RETokenRepeated)
457: throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,newIndex);
458: if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)
459: throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,newIndex);
460: index = newIndex;
461: currentToken = setRepeated(currentToken,minMax.first,minMax.second,index);
462: }
463: else {
464: addToken(currentToken);
465: currentToken = new RETokenChar(subIndex,unit.ch,insens);
466: if (insensUSASCII) currentToken.unicodeAware = false;
467: }
468: }
469:
470:
471:
472:
473: else if ((unit.ch == '[') && !(unit.bk || quot)) {
474:
475: ParseCharClassResult result = parseCharClass(
476: subIndex, pattern, index, pLength, cflags, syntax, 0);
477: addToken(currentToken);
478: currentToken = result.token;
479: index = result.index;
480: }
481:
482:
483:
484:
485: else if ((unit.ch == '(') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot))) {
486: boolean pure = false;
487: boolean comment = false;
488: boolean lookAhead = false;
489: boolean lookBehind = false;
490: boolean independent = false;
491: boolean negativelh = false;
492: boolean negativelb = false;
493: if ((index+1 < pLength) && (pattern[index] == '?')) {
494: switch (pattern[index+1]) {
495: case '!':
496: if (syntax.get(RESyntax.RE_LOOKAHEAD)) {
497: pure = true;
498: negativelh = true;
499: lookAhead = true;
500: index += 2;
501: }
502: break;
503: case '=':
504: if (syntax.get(RESyntax.RE_LOOKAHEAD)) {
505: pure = true;
506: lookAhead = true;
507: index += 2;
508: }
509: break;
510: case '<':
511:
512:
513: if (syntax.get(RESyntax.RE_LOOKAHEAD)) {
514: index++;
515: switch (pattern[index +1]) {
516: case '!':
517: pure = true;
518: negativelb = true;
519: lookBehind = true;
520: index += 2;
521: break;
522: case '=':
523: pure = true;
524: lookBehind = true;
525: index += 2;
526: }
527: }
528: break;
529: case '>':
530:
531:
532: if (syntax.get(RESyntax.RE_LOOKAHEAD)) {
533: pure = true;
534: independent = true;
535: index += 2;
536: }
537: break;
538: case 'i':
539: case 'd':
540: case 'm':
541: case 's':
542: case 'u':
543: case 'x':
544: case '-':
545: if (!syntax.get(RESyntax.RE_EMBEDDED_FLAGS)) break;
546:
547: int flagIndex = index + 1;
548: int endFlag = -1;
549: RESyntax newSyntax = new RESyntax(syntax);
550: int newCflags = cflags;
551: boolean negate = false;
552: while (flagIndex < pLength && endFlag < 0) {
553: switch(pattern[flagIndex]) {
554: case 'i':
555: if (negate)
556: newCflags &= ~REG_ICASE;
557: else
558: newCflags |= REG_ICASE;
559: flagIndex++;
560: break;
561: case 'd':
562: if (negate)
563: newSyntax.setLineSeparator(RESyntax.DEFAULT_LINE_SEPARATOR);
564: else
565: newSyntax.setLineSeparator("\n");
566: flagIndex++;
567: break;
568: case 'm':
569: if (negate)
570: newCflags &= ~REG_MULTILINE;
571: else
572: newCflags |= REG_MULTILINE;
573: flagIndex++;
574: break;
575: case 's':
576: if (negate)
577: newCflags &= ~REG_DOT_NEWLINE;
578: else
579: newCflags |= REG_DOT_NEWLINE;
580: flagIndex++;
581: break;
582: case 'u':
583: if (negate)
584: newCflags |= REG_ICASE_USASCII;
585: else
586: newCflags &= ~REG_ICASE_USASCII;
587: flagIndex++;
588: break;
589: case 'x':
590: if (negate)
591: newCflags &= ~REG_X_COMMENTS;
592: else
593: newCflags |= REG_X_COMMENTS;
594: flagIndex++;
595: break;
596: case '-':
597: negate = true;
598: flagIndex++;
599: break;
600: case ':':
601: case ')':
602: endFlag = pattern[flagIndex];
603: break;
604: default:
605: throw new REException(getLocalizedMessage("repeat.no.token"), REException.REG_BADRPT, index);
606: }
607: }
608: if (endFlag == ')') {
609: syntax = newSyntax;
610: cflags = newCflags;
611: insens = ((cflags & REG_ICASE) > 0);
612: insensUSASCII = ((cflags & REG_ICASE_USASCII) > 0);
613:
614: comment = true;
615: index = flagIndex - 1;
616: break;
617: }
618: if (endFlag == ':') {
619: savedSyntax = syntax;
620: savedCflags = cflags;
621: flagsSaved = true;
622: syntax = newSyntax;
623: cflags = newCflags;
624: insens = ((cflags & REG_ICASE) > 0);
625: insensUSASCII = ((cflags & REG_ICASE_USASCII) > 0);
626: index = flagIndex -1;
627:
628: }
629: else {
630: throw new REException(getLocalizedMessage("unmatched.paren"), REException.REG_ESUBREG,index);
631: }
632: case ':':
633: if (syntax.get(RESyntax.RE_PURE_GROUPING)) {
634: pure = true;
635: index += 2;
636: }
637: break;
638: case '#':
639: if (syntax.get(RESyntax.RE_COMMENTS)) {
640: comment = true;
641: }
642: break;
643: default:
644: throw new REException(getLocalizedMessage("repeat.no.token"), REException.REG_BADRPT, index);
645: }
646: }
647:
648: if (index >= pLength) {
649: throw new REException(getLocalizedMessage("unmatched.paren"), REException.REG_ESUBREG,index);
650: }
651:
652:
653: int endIndex = index;
654: int nextIndex = index;
655: int nested = 0;
656:
657: while ( ((nextIndex = getCharUnit(pattern,endIndex,unit,false)) > 0)
658: && !(nested == 0 && (unit.ch == ')') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot))) ) {
659: if ((endIndex = nextIndex) >= pLength)
660: throw new REException(getLocalizedMessage("subexpr.no.end"),REException.REG_ESUBREG,nextIndex);
661: else if ((unit.ch == '[') && !(unit.bk || quot)) {
662:
663:
664: int listIndex = nextIndex;
665: if (listIndex < pLength && pattern[listIndex] == '^') listIndex++;
666: if (listIndex < pLength && pattern[listIndex] == ']') listIndex++;
667: int listEndIndex = -1;
668: int listNest = 0;
669: while (listIndex < pLength && listEndIndex < 0) {
670: switch(pattern[listIndex++]) {
671: case '\\':
672: listIndex++;
673: break;
674: case '[':
675:
676:
677: listNest++;
678: if (listIndex < pLength && pattern[listIndex] == '^') listIndex++;
679: if (listIndex < pLength && pattern[listIndex] == ']') listIndex++;
680: break;
681: case ']':
682: if (listNest == 0)
683: listEndIndex = listIndex;
684: listNest--;
685: break;
686: }
687: }
688: if (listEndIndex >= 0) {
689: nextIndex = listEndIndex;
690: if ((endIndex = nextIndex) >= pLength)
691: throw new REException(getLocalizedMessage("subexpr.no.end"),REException.REG_ESUBREG,nextIndex);
692: else
693: continue;
694: }
695: throw new REException(getLocalizedMessage("subexpr.no.end"),REException.REG_ESUBREG,nextIndex);
696: }
697: else if (unit.ch == '(' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot)))
698: nested++;
699: else if (unit.ch == ')' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot)))
700: nested--;
701: }
702:
703:
704:
705:
706: if (comment) index = nextIndex;
707: else {
708:
709: addToken(currentToken);
710: if (!pure) {
711: numSubs++;
712: }
713:
714: int useIndex = (pure || lookAhead || lookBehind || independent) ?
715: 0 : nextSub + numSubs;
716: currentToken = new RE(String.valueOf(pattern,index,endIndex-index).toCharArray(),cflags,syntax,useIndex,nextSub + numSubs);
717: numSubs += ((RE) currentToken).getNumSubs();
718:
719: if (lookAhead) {
720: currentToken = new RETokenLookAhead(currentToken,negativelh);
721: }
722: else if (lookBehind) {
723: currentToken = new RETokenLookBehind(currentToken,negativelb);
724: }
725: else if (independent) {
726: currentToken = new RETokenIndependent(currentToken);
727: }
728:
729: index = nextIndex;
730: if (flagsSaved) {
731: syntax = savedSyntax;
732: cflags = savedCflags;
733: insens = ((cflags & REG_ICASE) > 0);
734: insensUSASCII = ((cflags & REG_ICASE_USASCII) > 0);
735: flagsSaved = false;
736: }
737: }
738: }
739:
740:
741:
742:
743: else if (!syntax.get(RESyntax.RE_UNMATCHED_RIGHT_PAREN_ORD) && ((unit.ch == ')') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot)))) {
744: throw new REException(getLocalizedMessage("unmatched.paren"),REException.REG_EPAREN,index);
745: }
746:
747:
748:
749:
750: else if ((unit.ch == '^') && !(unit.bk || quot)) {
751: addToken(currentToken);
752: currentToken = null;
753: RETokenStart token = null;
754: if ((cflags & REG_MULTILINE) > 0) {
755: String sep = syntax.getLineSeparator();
756: if (sep == null) {
757: token = new RETokenStart(subIndex, null, true);
758: }
759: else {
760: token = new RETokenStart(subIndex, sep);
761: }
762: }
763: else {
764: token = new RETokenStart(subIndex, null);
765: }
766: addToken(token);
767: }
768:
769:
770:
771:
772: else if ((unit.ch == '$') && !(unit.bk || quot)) {
773: addToken(currentToken);
774: currentToken = null;
775: RETokenEnd token = null;
776: if ((cflags & REG_MULTILINE) > 0) {
777: String sep = syntax.getLineSeparator();
778: if (sep == null) {
779: token = new RETokenEnd(subIndex, null, true);
780: }
781: else {
782: token = new RETokenEnd(subIndex, sep);
783: }
784: }
785: else {
786: token = new RETokenEnd(subIndex, null);
787: }
788: addToken(token);
789: }
790:
791:
792:
793:
794: else if ((unit.ch == '.') && !(unit.bk || quot)) {
795: addToken(currentToken);
796: currentToken = new RETokenAny(subIndex,syntax.get(RESyntax.RE_DOT_NEWLINE) || ((cflags & REG_DOT_NEWLINE) > 0),syntax.get(RESyntax.RE_DOT_NOT_NULL));
797: }
798:
799:
800:
801:
802:
803:
804:
805: else if ((unit.ch == '*') && !(unit.bk || quot)) {
806: if (currentToken == null)
807: throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
808: if (currentToken instanceof RETokenRepeated)
809: throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index);
810: if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)
811: throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index);
812: currentToken = setRepeated(currentToken,0,Integer.MAX_VALUE,index);
813: }
814:
815:
816:
817:
818:
819:
820:
821:
822: else if ((unit.ch == '+') && !syntax.get(RESyntax.RE_LIMITED_OPS) && (!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ (unit.bk || quot))) {
823: if (currentToken == null)
824: throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
825:
826:
827: if (currentToken instanceof RETokenRepeated) {
828: RETokenRepeated tokenRep = (RETokenRepeated)currentToken;
829: if (syntax.get(RESyntax.RE_POSSESSIVE_OPS) && !tokenRep.isPossessive() && !tokenRep.isStingy())
830: tokenRep.makePossessive();
831: else
832: throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index);
833:
834: }
835: else if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)
836: throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index);
837: else
838: currentToken = setRepeated(currentToken,1,Integer.MAX_VALUE,index);
839: }
840:
841:
842:
843:
844:
845:
846: else if ((unit.ch == '?') && !syntax.get(RESyntax.RE_LIMITED_OPS) && (!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ (unit.bk || quot))) {
847: if (currentToken == null) throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
848:
849:
850: if (currentToken instanceof RETokenRepeated) {
851: RETokenRepeated tokenRep = (RETokenRepeated)currentToken;
852: if (syntax.get(RESyntax.RE_STINGY_OPS) && !tokenRep.isStingy() && !tokenRep.isPossessive())
853: tokenRep.makeStingy();
854: else
855: throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index);
856: }
857: else if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)
858: throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index);
859: else
860: currentToken = setRepeated(currentToken,0,1,index);
861: }
862:
863:
864:
865:
866: else if (unit.bk && (unit.ch == '0') && syntax.get(RESyntax.RE_OCTAL_CHAR)) {
867: CharExpression ce = getCharExpression(pattern, index - 2, pLength, syntax);
868: if (ce == null)
869: throw new REException("invalid octal character", REException.REG_ESCAPE, index);
870: index = index - 2 + ce.len;
871: addToken(currentToken);
872: currentToken = new RETokenChar(subIndex,ce.ch,insens);
873: if (insensUSASCII) currentToken.unicodeAware = false;
874: }
875:
876:
877:
878:
879:
880:
881:
882:
883:
884:
885:
886:
887:
888: else if (unit.bk && Character.isDigit(unit.ch) && !syntax.get(RESyntax.RE_NO_BK_REFS)) {
889: addToken(currentToken);
890: int numBegin = index - 1;
891: int numEnd = pLength;
892: for (int i = index; i < pLength; i++) {
893: if (! Character.isDigit(pattern[i])) {
894: numEnd = i;
895: break;
896: }
897: }
898: int num = parseInt(pattern, numBegin, numEnd-numBegin, 10);
899:
900: currentToken = new RETokenBackRef(subIndex,num,insens);
901: if (insensUSASCII) currentToken.unicodeAware = false;
902: index = numEnd;
903: }
904:
905:
906:
907:
908: else if (unit.bk && (unit.ch == 'A') && syntax.get(RESyntax.RE_STRING_ANCHORS)) {
909: addToken(currentToken);
910: currentToken = new RETokenStart(subIndex,null);
911: }
912:
913:
914:
915:
916: else if (unit.bk && (unit.ch == 'b') && syntax.get(RESyntax.RE_STRING_ANCHORS)) {
917: addToken(currentToken);
918: currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.BEGIN | RETokenWordBoundary.END, false);
919: }
920:
921:
922:
923: else if (unit.bk && (unit.ch == '<')) {
924: addToken(currentToken);
925: currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.BEGIN, false);
926: }
927:
928:
929:
930: else if (unit.bk && (unit.ch == '>')) {
931: addToken(currentToken);
932: currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.END, false);
933: }
934:
935:
936:
937:
938: else if (unit.bk && (unit.ch == 'B') && syntax.get(RESyntax.RE_STRING_ANCHORS)) {
939: addToken(currentToken);
940: currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.BEGIN | RETokenWordBoundary.END, true);
941: }
942:
943:
944:
945:
946:
947: else if (unit.bk && (unit.ch == 'd') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
948: addToken(currentToken);
949: currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.DIGIT,insens,false);
950: if (insensUSASCII) currentToken.unicodeAware = false;
951: }
952:
953:
954:
955:
956: else if (unit.bk && (unit.ch == 'D') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
957: addToken(currentToken);
958: currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.DIGIT,insens,true);
959: if (insensUSASCII) currentToken.unicodeAware = false;
960: }
961:
962:
963:
964:
965: else if (unit.bk && (unit.ch == 'n')) {
966: addToken(currentToken);
967: currentToken = new RETokenChar(subIndex,'\n',false);
968: }
969:
970:
971:
972:
973: else if (unit.bk && (unit.ch == 'r')) {
974: addToken(currentToken);
975: currentToken = new RETokenChar(subIndex,'\r',false);
976: }
977:
978:
979:
980:
981: else if (unit.bk && (unit.ch == 's') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
982: addToken(currentToken);
983: currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.SPACE,insens,false);
984: if (insensUSASCII) currentToken.unicodeAware = false;
985: }
986:
987:
988:
989:
990: else if (unit.bk && (unit.ch == 'S') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
991: addToken(currentToken);
992: currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.SPACE,insens,true);
993: if (insensUSASCII) currentToken.unicodeAware = false;
994: }
995:
996:
997:
998:
999: else if (unit.bk && (unit.ch == 't')) {
1000: addToken(currentToken);
1001: currentToken = new RETokenChar(subIndex,'\t',false);
1002: }
1003:
1004:
1005:
1006:
1007: else if (unit.bk && (unit.ch == 'w') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
1008: addToken(currentToken);
1009: currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.ALNUM,insens,false);
1010: if (insensUSASCII) currentToken.unicodeAware = false;
1011: }
1012:
1013:
1014:
1015:
1016: else if (unit.bk && (unit.ch == 'W') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
1017: addToken(currentToken);
1018: currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.ALNUM,insens,true);
1019: if (insensUSASCII) currentToken.unicodeAware = false;
1020: }
1021:
1022:
1023:
1024:
1025:
1026:
1027:
1028:
1029:
1030: else if (unit.bk && (unit.ch == 'Z' || unit.ch == 'z') &&
1031: syntax.get(RESyntax.RE_STRING_ANCHORS)) {
1032: addToken(currentToken);
1033: currentToken = new RETokenEnd(subIndex,null);
1034: }
1035:
1036:
1037:
1038:
1039: else if ((unit.bk && (unit.ch == 'x') && syntax.get(RESyntax.RE_HEX_CHAR)) ||
1040: (unit.bk && (unit.ch == 'u') && syntax.get(RESyntax.RE_UNICODE_CHAR))) {
1041: CharExpression ce = getCharExpression(pattern, index - 2, pLength, syntax);
1042: if (ce == null)
1043: throw new REException("invalid hex character", REException.REG_ESCAPE, index);
1044: index = index - 2 + ce.len;
1045: addToken(currentToken);
1046: currentToken = new RETokenChar(subIndex,ce.ch,insens);
1047: if (insensUSASCII) currentToken.unicodeAware = false;
1048: }
1049:
1050:
1051:
1052:
1053: else if ((unit.bk && (unit.ch == 'p') && syntax.get(RESyntax.RE_NAMED_PROPERTY)) ||
1054: (unit.bk && (unit.ch == 'P') && syntax.get(RESyntax.RE_NAMED_PROPERTY))) {
1055: NamedProperty np = getNamedProperty(pattern, index - 2, pLength);
1056: if (np == null)
1057: throw new REException("invalid escape sequence", REException.REG_ESCAPE, index);
1058: index = index - 2 + np.len;
1059: addToken(currentToken);
1060: currentToken = getRETokenNamedProperty(subIndex,np,insens,index);
1061: if (insensUSASCII) currentToken.unicodeAware = false;
1062: }
1063:
1064:
1065:
1066:
1067: else if (unit.bk && (unit.ch == 'G') &&
1068: syntax.get(RESyntax.RE_STRING_ANCHORS)) {
1069: addToken(currentToken);
1070: currentToken = new RETokenEndOfPreviousMatch(subIndex);
1071: }
1072:
1073:
1074:
1075:
1076: else {
1077: addToken(currentToken);
1078: currentToken = new RETokenChar(subIndex,unit.ch,insens);
1079: if (insensUSASCII) currentToken.unicodeAware = false;
1080: }
1081: }
1082:
1083:
1084: addToken(currentToken);
1085:
1086: if (branches != null) {
1087: branches.addElement(new RE(firstToken,lastToken,numSubs,subIndex,minimumLength, maximumLength));
1088: branches.trimToSize();
1089: minimumLength = 0;
1090: maximumLength = 0;
1091: firstToken = lastToken = null;
1092: addToken(new RETokenOneOf(subIndex,branches,false));
1093: }
1094: else addToken(new RETokenEndSub(subIndex));
1095:
1096: }
1097:
1098: private static class ParseCharClassResult {
1099: RETokenOneOf token;
1100: int index;
1101: boolean returnAtAndOperator = false;
1102: }
1103:
1104:
1114: private static ParseCharClassResult parseCharClass(int subIndex,
1115: char[] pattern, int index,
1116: int pLength, int cflags, RESyntax syntax, int pflags)
1117: throws REException {
1118:
1119: boolean insens = ((cflags & REG_ICASE) > 0);
1120: boolean insensUSASCII = ((cflags & REG_ICASE_USASCII) > 0);
1121: Vector options = new Vector();
1122: Vector addition = new Vector();
1123: boolean additionAndAppeared = false;
1124: final int RETURN_AT_AND = 0x01;
1125: boolean returnAtAndOperator = ((pflags & RETURN_AT_AND) != 0);
1126: boolean negative = false;
1127: char ch;
1128:
1129: char lastChar = 0;
1130: boolean lastCharIsSet = false;
1131: if (index == pLength) throw new REException(getLocalizedMessage("unmatched.bracket"),REException.REG_EBRACK,index);
1132:
1133:
1134: if ((ch = pattern[index]) == '^') {
1135: negative = true;
1136: if (++index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
1137: ch = pattern[index];
1138: }
1139:
1140:
1141: if (ch == ']') {
1142: lastChar = ch; lastCharIsSet = true;
1143: if (++index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
1144: }
1145:
1146: while ((ch = pattern[index++]) != ']') {
1147: if ((ch == '-') && (lastCharIsSet)) {
1148: if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
1149: if ((ch = pattern[index]) == ']') {
1150: RETokenChar t = new RETokenChar(subIndex,lastChar,insens);
1151: if (insensUSASCII) t.unicodeAware = false;
1152: options.addElement(t);
1153: lastChar = '-';
1154: } else {
1155: if ((ch == '\\') && syntax.get(RESyntax.RE_BACKSLASH_ESCAPE_IN_LISTS)) {
1156: CharExpression ce = getCharExpression(pattern, index, pLength, syntax);
1157: if (ce == null)
1158: throw new REException("invalid escape sequence", REException.REG_ESCAPE, index);
1159: ch = ce.ch;
1160: index = index + ce.len - 1;
1161: }
1162: RETokenRange t = new RETokenRange(subIndex,lastChar,ch,insens);
1163: if (insensUSASCII) t.unicodeAware = false;
1164: options.addElement(t);
1165: lastChar = 0; lastCharIsSet = false;
1166: index++;
1167: }
1168: } else if ((ch == '\\') && syntax.get(RESyntax.RE_BACKSLASH_ESCAPE_IN_LISTS)) {
1169: if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
1170: int posixID = -1;
1171: boolean negate = false;
1172: char asciiEsc = 0;
1173: boolean asciiEscIsSet = false;
1174: NamedProperty np = null;
1175: if (("dswDSW".indexOf(pattern[index]) != -1) && syntax.get(RESyntax.RE_CHAR_CLASS_ESC_IN_LISTS)) {
1176: switch (pattern[index]) {
1177: case 'D':
1178: negate = true;
1179: case 'd':
1180: posixID = RETokenPOSIX.DIGIT;
1181: break;
1182: case 'S':
1183: negate = true;
1184: case 's':
1185: posixID = RETokenPOSIX.SPACE;
1186: break;
1187: case 'W':
1188: negate = true;
1189: case 'w':
1190: posixID = RETokenPOSIX.ALNUM;
1191: break;
1192: }
1193: }
1194: if (("pP".indexOf(pattern[index]) != -1) && syntax.get(RESyntax.RE_NAMED_PROPERTY)) {
1195: np = getNamedProperty(pattern, index - 1, pLength);
1196: if (np == null)
1197: throw new REException("invalid escape sequence", REException.REG_ESCAPE, index);
1198: index = index - 1 + np.len - 1;
1199: }
1200: else {
1201: CharExpression ce = getCharExpression(pattern, index - 1, pLength, syntax);
1202: if (ce == null)
1203: throw new REException("invalid escape sequence", REException.REG_ESCAPE, index);
1204: asciiEsc = ce.ch; asciiEscIsSet = true;
1205: index = index - 1 + ce.len - 1;
1206: }
1207: if (lastCharIsSet) {
1208: RETokenChar t = new RETokenChar(subIndex,lastChar,insens);
1209: if (insensUSASCII) t.unicodeAware = false;
1210: options.addElement(t);
1211: }
1212:
1213: if (posixID != -1) {
1214: RETokenPOSIX t = new RETokenPOSIX(subIndex,posixID,insens,negate);
1215: if (insensUSASCII) t.unicodeAware = false;
1216: options.addElement(t);
1217: } else if (np != null) {
1218: RETokenNamedProperty t = getRETokenNamedProperty(subIndex,np,insens,index);
1219: if (insensUSASCII) t.unicodeAware = false;
1220: options.addElement(t);
1221: } else if (asciiEscIsSet) {
1222: lastChar = asciiEsc; lastCharIsSet = true;
1223: } else {
1224: lastChar = pattern[index]; lastCharIsSet = true;
1225: }
1226: ++index;
1227: } else if ((ch == '[') && (syntax.get(RESyntax.RE_CHAR_CLASSES)) && (index < pLength) && (pattern[index] == ':')) {
1228: StringBuffer posixSet = new StringBuffer();
1229: index = getPosixSet(pattern,index+1,posixSet);
1230: int posixId = RETokenPOSIX.intValue(posixSet.toString());
1231: if (posixId != -1) {
1232: RETokenPOSIX t = new RETokenPOSIX(subIndex,posixId,insens,false);
1233: if (insensUSASCII) t.unicodeAware = false;
1234: options.addElement(t);
1235: }
1236: } else if ((ch == '[') && (syntax.get(RESyntax.RE_NESTED_CHARCLASS))) {
1237: ParseCharClassResult result = parseCharClass(
1238: subIndex, pattern, index, pLength, cflags, syntax, 0);
1239: addition.addElement(result.token);
1240: addition.addElement("|");
1241: index = result.index;
1242: } else if ((ch == '&') &&
1243: (syntax.get(RESyntax.RE_NESTED_CHARCLASS)) &&
1244: (index < pLength) && (pattern[index] == '&')) {
1245: if (returnAtAndOperator) {
1246: ParseCharClassResult result = new ParseCharClassResult();
1247: options.trimToSize();
1248: if (additionAndAppeared) addition.addElement("&");
1249: if (addition.size() == 0) addition = null;
1250: result.token = new RETokenOneOf(subIndex,
1251: options, addition, negative);
1252: result.index = index - 1;
1253: result.returnAtAndOperator = true;
1254: return result;
1255: }
1256:
1257:
1258:
1259:
1260:
1261:
1262:
1263: if (additionAndAppeared) addition.addElement("&");
1264: addition.addElement(Boolean.FALSE);
1265: additionAndAppeared = true;
1266:
1267:
1268:
1269:
1270:
1271:
1272:
1273:
1274: if ((index + 1 < pLength) && (pattern[index + 1] != '[')) {
1275: ParseCharClassResult result = parseCharClass(
1276: subIndex, pattern, index+1, pLength, cflags, syntax,
1277: RETURN_AT_AND);
1278: addition.addElement(result.token);
1279: addition.addElement("|");
1280:
1281:
1282:
1283:
1284: index = (result.returnAtAndOperator ?
1285: result.index: result.index - 1);
1286: }
1287: } else {
1288: if (lastCharIsSet) {
1289: RETokenChar t = new RETokenChar(subIndex,lastChar,insens);
1290: if (insensUSASCII) t.unicodeAware = false;
1291: options.addElement(t);
1292: }
1293: lastChar = ch; lastCharIsSet = true;
1294: }
1295: if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
1296: }
1297:
1298:
1299: if (lastCharIsSet) {
1300: RETokenChar t = new RETokenChar(subIndex,lastChar,insens);
1301: if (insensUSASCII) t.unicodeAware = false;
1302: options.addElement(t);
1303: }
1304:
1305: ParseCharClassResult result = new ParseCharClassResult();
1306:
1307: options.trimToSize();
1308: if (additionAndAppeared) addition.addElement("&");
1309: if (addition.size() == 0) addition = null;
1310: result.token = new RETokenOneOf(subIndex,options, addition, negative);
1311: result.index = index;
1312: return result;
1313: }
1314:
1315: private static int getCharUnit(char[] input, int index, CharUnit unit, boolean quot) throws REException {
1316: unit.ch = input[index++];
1317: unit.bk = (unit.ch == '\\'
1318: && (!quot || index >= input.length || input[index] == 'E'));
1319: if (unit.bk)
1320: if (index < input.length)
1321: unit.ch = input[index++];
1322: else throw new REException(getLocalizedMessage("ends.with.backslash"),REException.REG_ESCAPE,index);
1323: return index;
1324: }
1325:
1326: private static int parseInt(char[] input, int pos, int len, int radix) {
1327: int ret = 0;
1328: for (int i = pos; i < pos + len; i++) {
1329: ret = ret * radix + Character.digit(input[i], radix);
1330: }
1331: return ret;
1332: }
1333:
1334:
1341: private static class CharExpression {
1342:
1343: char ch;
1344:
1345: String expr;
1346:
1347: int len;
1348: public String toString() { return expr; }
1349: }
1350:
1351: private static CharExpression getCharExpression(char[] input, int pos, int lim,
1352: RESyntax syntax) {
1353: CharExpression ce = new CharExpression();
1354: char c = input[pos];
1355: if (c == '\\') {
1356: if (pos + 1 >= lim) return null;
1357: c = input[pos + 1];
1358: switch(c) {
1359: case 't':
1360: ce.ch = '\t';
1361: ce.len = 2;
1362: break;
1363: case 'n':
1364: ce.ch = '\n';
1365: ce.len = 2;
1366: break;
1367: case 'r':
1368: ce.ch = '\r';
1369: ce.len = 2;
1370: break;
1371: case 'x':
1372: case 'u':
1373: if ((c == 'x' && syntax.get(RESyntax.RE_HEX_CHAR)) ||
1374: (c == 'u' && syntax.get(RESyntax.RE_UNICODE_CHAR))) {
1375: int l = 0;
1376: int expectedLength = (c == 'x' ? 2 : 4);
1377: for (int i = pos + 2; i < pos + 2 + expectedLength; i++) {
1378: if (i >= lim) break;
1379: if (!((input[i] >= '0' && input[i] <= '9') ||
1380: (input[i] >= 'A' && input[i] <= 'F') ||
1381: (input[i] >= 'a' && input[i] <= 'f')))
1382: break;
1383: l++;
1384: }
1385: if (l != expectedLength) return null;
1386: ce.ch = (char)(parseInt(input, pos + 2, l, 16));
1387: ce.len = l + 2;
1388: }
1389: else {
1390: ce.ch = c;
1391: ce.len = 2;
1392: }
1393: break;
1394: case '0':
1395: if (syntax.get(RESyntax.RE_OCTAL_CHAR)) {
1396: int l = 0;
1397: for (int i = pos + 2; i < pos + 2 + 3; i++) {
1398: if (i >= lim) break;
1399: if (input[i] < '0' || input[i] > '7') break;
1400: l++;
1401: }
1402: if (l == 3 && input[pos + 2] > '3') l--;
1403: if (l <= 0) return null;
1404: ce.ch = (char)(parseInt(input, pos + 2, l, 8));
1405: ce.len = l + 2;
1406: }
1407: else {
1408: ce.ch = c;
1409: ce.len = 2;
1410: }
1411: break;
1412: default:
1413: ce.ch = c;
1414: ce.len = 2;
1415: break;
1416: }
1417: }
1418: else {
1419: ce.ch = input[pos];
1420: ce.len = 1;
1421: }
1422: ce.expr = new String(input, pos, ce.len);
1423: return ce;
1424: }
1425:
1426:
1434: private static class NamedProperty {
1435:
1436: String name;
1437:
1438: boolean negate;
1439:
1440: int len;
1441: }
1442:
1443: private static NamedProperty getNamedProperty(char[] input, int pos, int lim) {
1444: NamedProperty np = new NamedProperty();
1445: char c = input[pos];
1446: if (c == '\\') {
1447: if (++pos >= lim) return null;
1448: c = input[pos++];
1449: switch(c) {
1450: case 'p':
1451: np.negate = false;
1452: break;
1453: case 'P':
1454: np.negate = true;
1455: break;
1456: default:
1457: return null;
1458: }
1459: c = input[pos++];
1460: if (c == '{') {
1461: int p = -1;
1462: for (int i = pos; i < lim; i++) {
1463: if (input[i] == '}') {
1464: p = i;
1465: break;
1466: }
1467: }
1468: if (p < 0) return null;
1469: int len = p - pos;
1470: np.name = new String(input, pos, len);
1471: np.len = len + 4;
1472: }
1473: else {
1474: np.name = new String(input, pos - 1, 1);
1475: np.len = 3;
1476: }
1477: return np;
1478: }
1479: else return null;
1480: }
1481:
1482: private static RETokenNamedProperty getRETokenNamedProperty(
1483: int subIndex, NamedProperty np, boolean insens, int index)
1484: throws REException {
1485: try {
1486: return new RETokenNamedProperty(subIndex, np.name, insens, np.negate);
1487: }
1488: catch (REException e) {
1489: REException ree;
1490: ree = new REException(e.getMessage(), REException.REG_ESCAPE, index);
1491: ree.initCause(e);
1492: throw ree;
1493: }
1494: }
1495:
1496:
1501: public boolean isMatch(Object input) {
1502: return isMatch(input,0,0);
1503: }
1504:
1505:
1512: public boolean isMatch(Object input,int index) {
1513: return isMatch(input,index,0);
1514: }
1515:
1516:
1517:
1525: public boolean isMatch(Object input,int index,int eflags) {
1526: return isMatchImpl(makeCharIndexed(input,index),index,eflags);
1527: }
1528:
1529: private boolean isMatchImpl(CharIndexed input, int index, int eflags) {
1530: if (firstToken == null)
1531: return (input.charAt(0) == CharIndexed.OUT_OF_BOUNDS);
1532: REMatch m = new REMatch(numSubs, index, eflags);
1533: if (firstToken.match(input, m)) {
1534: if (m != null) {
1535: if (input.charAt(m.index) == CharIndexed.OUT_OF_BOUNDS) {
1536: return true;
1537: }
1538: }
1539: }
1540: return false;
1541: }
1542:
1543:
1548: public int getNumSubs() {
1549: return numSubs;
1550: }
1551:
1552:
1553: void setUncle(REToken uncle) {
1554: if (lastToken != null) {
1555: lastToken.setUncle(uncle);
1556: } else super.setUncle(uncle);
1557: }
1558:
1559:
1560:
1561: boolean chain(REToken next) {
1562: super.chain(next);
1563: setUncle(next);
1564: return true;
1565: }
1566:
1567:
1571: public int getMinimumLength() {
1572: return minimumLength;
1573: }
1574:
1575: public int getMaximumLength() {
1576: return maximumLength;
1577: }
1578:
1579:
1588: public REMatch[] getAllMatches(Object input) {
1589: return getAllMatches(input,0,0);
1590: }
1591:
1592:
1603: public REMatch[] getAllMatches(Object input, int index) {
1604: return getAllMatches(input,index,0);
1605: }
1606:
1607:
1620: public REMatch[] getAllMatches(Object input, int index, int eflags) {
1621: return getAllMatchesImpl(makeCharIndexed(input,index),index,eflags);
1622: }
1623:
1624:
1625: private REMatch[] getAllMatchesImpl(CharIndexed input, int index, int eflags) {
1626: Vector all = new Vector();
1627: REMatch m = null;
1628: while ((m = getMatchImpl(input,index,eflags,null)) != null) {
1629: all.addElement(m);
1630: index = m.getEndIndex();
1631: if (m.end[0] == 0) {
1632: index++;
1633: input.move(1);
1634: } else {
1635: input.move(m.end[0]);
1636: }
1637: if (!input.isValid()) break;
1638: }
1639: REMatch[] mset = new REMatch[all.size()];
1640: all.copyInto(mset);
1641: return mset;
1642: }
1643:
1644:
1645: boolean match(CharIndexed input, REMatch mymatch) {
1646: if (firstToken == null) {
1647: return next(input, mymatch);
1648: }
1649:
1650:
1651: mymatch.start1[subIndex] = mymatch.index;
1652:
1653: return firstToken.match(input, mymatch);
1654: }
1655:
1656: REMatch findMatch(CharIndexed input, REMatch mymatch) {
1657: if (mymatch.backtrackStack == null)
1658: mymatch.backtrackStack = new BacktrackStack();
1659: boolean b = match(input, mymatch);
1660: if (b) {
1661: return mymatch;
1662: }
1663: return null;
1664: }
1665:
1666:
1673: public REMatch getMatch(Object input) {
1674: return getMatch(input,0,0);
1675: }
1676:
1677:
1686: public REMatch getMatch(Object input, int index) {
1687: return getMatch(input,index,0);
1688: }
1689:
1690:
1700: public REMatch getMatch(Object input, int index, int eflags) {
1701: return getMatch(input,index,eflags,null);
1702: }
1703:
1704:
1717: public REMatch getMatch(Object input, int index, int eflags, StringBuffer buffer) {
1718: return getMatchImpl(makeCharIndexed(input,index),index,eflags,buffer);
1719: }
1720:
1721: REMatch getMatchImpl(CharIndexed input, int anchor, int eflags, StringBuffer buffer) {
1722: boolean tryEntireMatch = ((eflags & REG_TRY_ENTIRE_MATCH) != 0);
1723: RE re = (tryEntireMatch ? (RE) this.clone() : this);
1724: if (tryEntireMatch) {
1725: re.chain(new RETokenEnd(0, null));
1726: }
1727:
1728: REMatch mymatch = new REMatch(numSubs, anchor, eflags);
1729: do {
1730:
1731: if (minimumLength == 0 || input.charAt(minimumLength-1) != CharIndexed.OUT_OF_BOUNDS) {
1732: if (re.match(input, mymatch)) {
1733: REMatch best = mymatch;
1734:
1735:
1736:
1737:
1738:
1739:
1747: best.end[0] = best.index;
1748: best.finish(input);
1749: input.setLastMatch(best);
1750: return best;
1751: }
1752: }
1753: mymatch.clear(++anchor);
1754:
1755: if (buffer != null && input.charAt(0) != CharIndexed.OUT_OF_BOUNDS) {
1756: buffer.append(input.charAt(0));
1757: }
1758: } while (input.move(1));
1759:
1760:
1761: if (minimumLength == 0) {
1762: if (match(input, mymatch)) {
1763: mymatch.finish(input);
1764: return mymatch;
1765: }
1766: }
1767:
1768: return null;
1769: }
1770:
1771:
1778: public REMatchEnumeration getMatchEnumeration(Object input) {
1779: return getMatchEnumeration(input,0,0);
1780: }
1781:
1782:
1783:
1792: public REMatchEnumeration getMatchEnumeration(Object input, int index) {
1793: return getMatchEnumeration(input,index,0);
1794: }
1795:
1796:
1806: public REMatchEnumeration getMatchEnumeration(Object input, int index, int eflags) {
1807: return new REMatchEnumeration(this,makeCharIndexed(input,index),index,eflags);
1808: }
1809:
1810:
1811:
1819: public String substitute(Object input,String replace) {
1820: return substitute(input,replace,0,0);
1821: }
1822:
1823:
1836: public String substitute(Object input,String replace,int index) {
1837: return substitute(input,replace,index,0);
1838: }
1839:
1840:
1853: public String substitute(Object input,String replace,int index,int eflags) {
1854: return substituteImpl(makeCharIndexed(input,index),replace,index,eflags);
1855: }
1856:
1857: private String substituteImpl(CharIndexed input,String replace,int index,int eflags) {
1858: StringBuffer buffer = new StringBuffer();
1859: REMatch m = getMatchImpl(input,index,eflags,buffer);
1860: if (m==null) return buffer.toString();
1861: buffer.append(getReplacement(replace, m, eflags));
1862: if (input.move(m.end[0])) {
1863: do {
1864: buffer.append(input.charAt(0));
1865: } while (input.move(1));
1866: }
1867: return buffer.toString();
1868: }
1869:
1870:
1879: public String substituteAll(Object input,String replace) {
1880: return substituteAll(input,replace,0,0);
1881: }
1882:
1883:
1897: public String substituteAll(Object input,String replace,int index) {
1898: return substituteAll(input,replace,index,0);
1899: }
1900:
1901:
1914: public String substituteAll(Object input,String replace,int index,int eflags) {
1915: return substituteAllImpl(makeCharIndexed(input,index),replace,index,eflags);
1916: }
1917:
1918: private String substituteAllImpl(CharIndexed input,String replace,int index,int eflags) {
1919: StringBuffer buffer = new StringBuffer();
1920: REMatch m;
1921: while ((m = getMatchImpl(input,index,eflags,buffer)) != null) {
1922: buffer.append(getReplacement(replace, m, eflags));
1923: index = m.getEndIndex();
1924: if (m.end[0] == 0) {
1925: char ch = input.charAt(0);
1926: if (ch != CharIndexed.OUT_OF_BOUNDS)
1927: buffer.append(ch);
1928: input.move(1);
1929: } else {
1930: input.move(m.end[0]);
1931: }
1932:
1933: if (!input.isValid()) break;
1934: }
1935: return buffer.toString();
1936: }
1937:
1938: public static String getReplacement(String replace, REMatch m, int eflags) {
1939: if ((eflags & REG_NO_INTERPOLATE) > 0)
1940: return replace;
1941: else {
1942: if ((eflags & REG_REPLACE_USE_BACKSLASHESCAPE) > 0) {
1943: StringBuffer sb = new StringBuffer();
1944: int l = replace.length();
1945: for (int i = 0; i < l; i++) {
1946: char c = replace.charAt(i);
1947: switch(c) {
1948: case '\\':
1949: i++;
1950:
1951: sb.append(replace.charAt(i));
1952: break;
1953: case '$':
1954: int i1 = i + 1;
1955: while (i1 < replace.length() &&
1956: Character.isDigit(replace.charAt(i1))) i1++;
1957: sb.append(m.substituteInto(replace.substring(i, i1)));
1958: i = i1 - 1;
1959: break;
1960: default:
1961: sb.append(c);
1962: }
1963: }
1964: return sb.toString();
1965: }
1966: else
1967: return m.substituteInto(replace);
1968: }
1969: }
1970:
1971:
1972: private void addToken(REToken next) {
1973: if (next == null) return;
1974: minimumLength += next.getMinimumLength();
1975: int nmax = next.getMaximumLength();
1976: if (nmax < Integer.MAX_VALUE && maximumLength < Integer.MAX_VALUE)
1977: maximumLength += nmax;
1978: else
1979: maximumLength = Integer.MAX_VALUE;
1980:
1981: if (firstToken == null) {
1982: lastToken = firstToken = next;
1983: } else {
1984:
1985:
1986: if (lastToken.chain(next)) {
1987: lastToken = next;
1988: }
1989: }
1990: }
1991:
1992: private static REToken setRepeated(REToken current, int min, int max, int index) throws REException {
1993: if (current == null) throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
1994: return new RETokenRepeated(current.subIndex,current,min,max);
1995: }
1996:
1997: private static int getPosixSet(char[] pattern,int index,StringBuffer buf) {
1998:
1999:
2000: int i;
2001: for (i=index; i<(pattern.length-1); i++) {
2002: if ((pattern[i] == ':') && (pattern[i+1] == ']'))
2003: return i+2;
2004: buf.append(pattern[i]);
2005: }
2006: return index;
2007: }
2008:
2009: private int getMinMax(char[] input,int index,IntPair minMax,RESyntax syntax) throws REException {
2010:
2011:
2012: boolean mustMatch = !syntax.get(RESyntax.RE_NO_BK_BRACES);
2013: int startIndex = index;
2014: if (index == input.length) {
2015: if (mustMatch)
2016: throw new REException(getLocalizedMessage("unmatched.brace"),REException.REG_EBRACE,index);
2017: else
2018: return startIndex;
2019: }
2020:
2021: int min,max=0;
2022: CharUnit unit = new CharUnit();
2023: StringBuffer buf = new StringBuffer();
2024:
2025:
2026: do {
2027: index = getCharUnit(input,index,unit,false);
2028: if (Character.isDigit(unit.ch))
2029: buf.append(unit.ch);
2030: } while ((index != input.length) && Character.isDigit(unit.ch));
2031:
2032:
2033: if (buf.length() == 0) {
2034: if (mustMatch)
2035: throw new REException(getLocalizedMessage("interval.error"),REException.REG_EBRACE,index);
2036: else
2037: return startIndex;
2038: }
2039:
2040: min = Integer.parseInt(buf.toString());
2041:
2042: if ((unit.ch == '}') && (syntax.get(RESyntax.RE_NO_BK_BRACES) ^ unit.bk))
2043: max = min;
2044: else if (index == input.length)
2045: if (mustMatch)
2046: throw new REException(getLocalizedMessage("interval.no.end"),REException.REG_EBRACE,index);
2047: else
2048: return startIndex;
2049: else if ((unit.ch == ',') && !unit.bk) {
2050: buf = new StringBuffer();
2051:
2052: while (((index = getCharUnit(input,index,unit,false)) != input.length) && Character.isDigit(unit.ch))
2053: buf.append(unit.ch);
2054:
2055: if (!((unit.ch == '}') && (syntax.get(RESyntax.RE_NO_BK_BRACES) ^ unit.bk)))
2056: if (mustMatch)
2057: throw new REException(getLocalizedMessage("interval.error"),REException.REG_EBRACE,index);
2058: else
2059: return startIndex;
2060:
2061:
2062: if (buf.length() == 0) max = Integer.MAX_VALUE;
2063: else max = Integer.parseInt(buf.toString());
2064: } else
2065: if (mustMatch)
2066: throw new REException(getLocalizedMessage("interval.error"),REException.REG_EBRACE,index);
2067: else
2068: return startIndex;
2069:
2070:
2071:
2072: minMax.first = min;
2073: minMax.second = max;
2074:
2075:
2076: return index;
2077: }
2078:
2079:
2083: public String toString() {
2084: StringBuffer sb = new StringBuffer();
2085: dump(sb);
2086: return sb.toString();
2087: }
2088:
2089: void dump(StringBuffer os) {
2090: os.append("(?#startRE subIndex=" + subIndex + ")");
2091: if (subIndex == 0)
2092: os.append("?:");
2093: if (firstToken != null)
2094: firstToken.dumpAll(os);
2095: if (subIndex == 0)
2096: os.append(")");
2097: os.append("(?#endRE subIndex=" + subIndex + ")");
2098: }
2099:
2100:
2101:
2102:
2103: public static CharIndexed makeCharIndexed(Object input, int index) {
2104:
2105:
2106:
2107:
2108:
2109:
2110: if (input instanceof CharIndexed) {
2111: CharIndexed ci = (CharIndexed) input;
2112: ci.setAnchor(index);
2113: return ci;
2114: }
2115: else if (input instanceof CharSequence)
2116: return new CharIndexedCharSequence((CharSequence) input,index);
2117: else if (input instanceof String)
2118: return new CharIndexedString((String) input,index);
2119: else if (input instanceof char[])
2120: return new CharIndexedCharArray((char[]) input,index);
2121: else if (input instanceof StringBuffer)
2122: return new CharIndexedStringBuffer((StringBuffer) input,index);
2123: else if (input instanceof InputStream)
2124: return new CharIndexedInputStream((InputStream) input,index);
2125: else
2126: return new CharIndexedString(input.toString(), index);
2127: }
2128: }