regexp: fix no leading zero octal character lexing

This commit is contained in:
Bas Leijdekkers
2016-12-20 19:22:32 +01:00
parent f7b12769f3
commit 83091d58fa
7 changed files with 691 additions and 242 deletions

View File

@@ -132,55 +132,55 @@ class _RegExLexer implements FlexLexer {
/* The ZZ_CMAP_A table has 3056 entries */
static final char ZZ_CMAP_A[] = zzUnpackCMap(
"\10\0\2\63\1\66\1\67\1\70\1\66\22\0\1\16\1\75\1\0\1\76\1\21\1\0\1\72\1\62"+
"\1\4\1\5\1\23\1\24\1\65\1\15\1\3\1\0\1\2\3\55\4\54\2\1\1\71\1\0\1\61\1\74"+
"\1\73\1\22\1\0\2\32\1\41\1\37\1\47\1\50\1\33\1\56\1\42\2\13\1\46\1\64\1\57"+
"\1\13\1\44\1\51\1\35\1\36\1\13\1\45\3\35\1\13\1\34\1\10\1\12\1\11\1\20\1\14"+
"\1\0\1\27\1\30\1\40\1\37\2\27\1\31\1\56\1\42\1\13\1\60\1\45\1\13\1\26\1\13"+
"\1\43\1\13\1\26\1\35\1\26\1\53\2\35\1\52\1\13\1\33\1\6\1\25\1\7\7\0\1\67\24"+
"\0\1\13\12\0\1\13\4\0\1\13\5\0\27\13\1\0\12\13\4\0\14\13\16\0\5\13\7\0\1\13"+
"\1\0\1\13\1\0\5\13\1\0\2\13\2\0\4\13\1\0\1\13\6\0\1\13\1\0\3\13\1\0\1\13\1"+
"\0\4\13\1\0\23\13\1\0\13\13\10\0\6\13\1\0\26\13\2\0\1\13\6\0\10\13\10\0\13"+
"\13\5\0\3\13\15\0\12\17\4\0\6\13\1\0\1\13\17\0\2\13\7\0\2\13\12\17\3\13\2"+
"\0\2\13\1\0\16\13\15\0\11\13\13\0\1\13\16\0\12\17\6\13\4\0\2\13\4\0\1\13\5"+
"\0\6\13\4\0\1\13\11\0\1\13\3\0\1\13\7\0\11\13\7\0\5\13\17\0\26\13\3\0\1\13"+
"\2\0\1\13\7\0\12\13\4\0\12\17\1\13\4\0\10\13\2\0\2\13\2\0\26\13\1\0\7\13\1"+
"\0\1\13\3\0\4\13\3\0\1\13\20\0\1\13\15\0\2\13\1\0\1\13\5\0\6\13\4\0\2\13\1"+
"\0\2\13\1\0\2\13\1\0\2\13\17\0\4\13\1\0\1\13\7\0\12\17\2\0\3\13\20\0\11\13"+
"\1\0\2\13\1\0\2\13\1\0\5\13\3\0\1\13\2\0\1\13\30\0\1\13\13\0\10\13\2\0\1\13"+
"\3\0\1\13\1\0\6\13\3\0\3\13\1\0\4\13\3\0\2\13\1\0\1\13\1\0\2\13\3\0\2\13\3"+
"\0\3\13\3\0\14\13\13\0\10\13\1\0\2\13\10\0\3\13\5\0\4\13\1\0\5\13\3\0\1\13"+
"\3\0\2\13\15\0\13\13\2\0\1\13\21\0\1\13\12\0\6\13\5\0\22\13\3\0\10\13\1\0"+
"\11\13\1\0\1\13\2\0\7\13\11\0\1\13\1\0\2\13\14\0\12\17\7\0\2\13\1\0\1\13\2"+
"\0\2\13\1\0\1\13\2\0\1\13\6\0\4\13\1\0\7\13\1\0\3\13\1\0\1\13\1\0\1\13\2\0"+
"\2\13\1\0\4\13\1\0\2\13\11\0\1\13\2\0\5\13\1\0\1\13\11\0\12\17\2\0\14\13\1"+
"\0\24\13\13\0\5\13\3\0\6\13\4\0\4\13\3\0\1\13\3\0\2\13\7\0\3\13\4\0\15\13"+
"\14\0\1\13\1\0\6\13\1\0\1\13\5\0\1\13\2\0\13\13\1\0\15\13\1\0\4\13\2\0\7\13"+
"\1\0\1\13\1\0\4\13\2\0\1\13\1\0\4\13\2\0\7\13\1\0\1\13\1\0\4\13\2\0\16\13"+
"\2\0\6\13\2\0\15\13\2\0\1\13\1\0\10\13\7\0\15\13\1\0\6\13\23\0\1\13\4\0\1"+
"\13\3\0\11\13\1\0\1\13\5\0\17\13\1\0\16\13\2\0\14\13\13\0\1\13\15\0\7\13\7"+
"\0\16\13\15\0\2\13\12\17\3\0\3\13\11\0\4\13\1\0\4\13\3\0\2\13\11\0\10\13\1"+
"\0\1\13\1\0\1\13\1\0\1\13\1\0\6\13\1\0\7\13\1\0\1\13\3\0\3\13\1\0\7\13\3\0"+
"\4\13\2\0\6\13\14\0\2\67\7\0\1\13\15\0\1\13\2\0\1\13\4\0\1\13\2\0\12\13\1"+
"\0\1\13\3\0\5\13\6\0\1\13\1\0\1\13\1\0\1\13\1\0\4\13\1\0\13\13\2\0\4\13\5"+
"\0\5\13\4\0\1\13\4\0\2\13\13\0\5\13\6\0\4\13\3\0\2\13\14\0\10\13\7\0\10\13"+
"\1\0\7\13\6\0\2\13\12\0\5\13\5\0\2\13\3\0\7\13\6\0\3\13\12\17\2\13\13\0\11"+
"\13\2\0\27\13\2\0\7\13\1\0\3\13\1\0\4\13\1\0\4\13\2\0\6\13\3\0\1\13\1\0\1"+
"\13\2\0\5\13\1\0\12\13\12\17\5\13\1\0\3\13\1\0\10\13\4\0\7\13\3\0\1\13\3\0"+
"\2\13\1\0\1\13\3\0\2\13\2\0\5\13\2\0\1\13\1\0\1\13\30\0\3\13\3\0\6\13\2\0"+
"\6\13\2\0\6\13\11\0\7\13\4\0\5\13\3\0\5\13\5\0\1\13\1\0\10\13\1\0\5\13\1\0"+
"\1\13\1\0\2\13\1\0\2\13\1\0\12\13\6\0\12\13\2\0\6\13\2\0\6\13\2\0\6\13\2\0"+
"\3\13\3\0\14\13\1\0\16\13\1\0\2\13\1\0\2\13\1\0\10\13\6\0\4\13\4\0\16\13\2"+
"\0\1\13\1\0\14\13\1\0\2\13\3\0\1\13\2\0\4\13\1\0\2\13\12\0\10\13\6\0\6\13"+
"\1\0\3\13\1\0\12\13\3\0\1\13\12\0\4\13\13\0\12\17\1\13\1\0\1\13\3\0\7\13\1"+
"\0\1\13\1\0\4\13\1\0\17\13\1\0\2\13\14\0\3\13\4\0\2\13\1\0\1\13\20\0\4\13"+
"\10\0\1\13\13\0\10\13\5\0\3\13\2\0\1\13\2\0\2\13\2\0\4\13\1\0\14\13\1\0\1"+
"\13\1\0\7\13\1\0\21\13\1\0\4\13\2\0\10\13\1\0\7\13\1\0\14\13\1\0\4\13\1\0"+
"\5\13\1\0\1\13\3\0\14\13\2\0\13\13\1\0\10\13\2\0\22\17\1\0\2\13\1\0\1\13\2"+
"\0\1\13\1\0\12\13\1\0\4\13\1\0\1\13\1\0\1\13\6\0\1\13\4\0\1\13\1\0\1\13\1"+
"\0\1\13\1\0\3\13\1\0\2\13\1\0\1\13\2\0\1\13\1\0\1\13\1\0\1\13\1\0\1\13\1\0"+
"\1\13\1\0\2\13\1\0\1\13\2\0\4\13\1\0\7\13\1\0\4\13\1\0\4\13\1\0\1\13\1\0\12"+
"\13\1\0\5\13\1\0\3\13\1\0\5\13\1\0\5\13");
"\10\0\2\63\1\66\1\67\1\70\1\66\22\0\1\14\1\75\1\0\1\76\1\17\1\0\1\72\1\62"+
"\1\2\1\3\1\21\1\22\1\65\1\13\1\1\1\0\1\53\3\55\4\54\2\47\1\71\1\0\1\61\1\74"+
"\1\73\1\20\1\0\2\30\1\37\1\35\1\45\1\46\1\31\1\56\1\40\2\11\1\44\1\64\1\57"+
"\1\11\1\42\1\50\1\33\1\34\1\11\1\43\3\33\1\11\1\32\1\6\1\10\1\7\1\16\1\12"+
"\1\0\1\25\1\26\1\36\1\35\2\25\1\27\1\56\1\40\1\11\1\60\1\43\1\11\1\24\1\11"+
"\1\41\1\11\1\24\1\33\1\24\1\52\2\33\1\51\1\11\1\31\1\4\1\23\1\5\7\0\1\67\24"+
"\0\1\11\12\0\1\11\4\0\1\11\5\0\27\11\1\0\12\11\4\0\14\11\16\0\5\11\7\0\1\11"+
"\1\0\1\11\1\0\5\11\1\0\2\11\2\0\4\11\1\0\1\11\6\0\1\11\1\0\3\11\1\0\1\11\1"+
"\0\4\11\1\0\23\11\1\0\13\11\10\0\6\11\1\0\26\11\2\0\1\11\6\0\10\11\10\0\13"+
"\11\5\0\3\11\15\0\12\15\4\0\6\11\1\0\1\11\17\0\2\11\7\0\2\11\12\15\3\11\2"+
"\0\2\11\1\0\16\11\15\0\11\11\13\0\1\11\16\0\12\15\6\11\4\0\2\11\4\0\1\11\5"+
"\0\6\11\4\0\1\11\11\0\1\11\3\0\1\11\7\0\11\11\7\0\5\11\17\0\26\11\3\0\1\11"+
"\2\0\1\11\7\0\12\11\4\0\12\15\1\11\4\0\10\11\2\0\2\11\2\0\26\11\1\0\7\11\1"+
"\0\1\11\3\0\4\11\3\0\1\11\20\0\1\11\15\0\2\11\1\0\1\11\5\0\6\11\4\0\2\11\1"+
"\0\2\11\1\0\2\11\1\0\2\11\17\0\4\11\1\0\1\11\7\0\12\15\2\0\3\11\20\0\11\11"+
"\1\0\2\11\1\0\2\11\1\0\5\11\3\0\1\11\2\0\1\11\30\0\1\11\13\0\10\11\2\0\1\11"+
"\3\0\1\11\1\0\6\11\3\0\3\11\1\0\4\11\3\0\2\11\1\0\1\11\1\0\2\11\3\0\2\11\3"+
"\0\3\11\3\0\14\11\13\0\10\11\1\0\2\11\10\0\3\11\5\0\4\11\1\0\5\11\3\0\1\11"+
"\3\0\2\11\15\0\13\11\2\0\1\11\21\0\1\11\12\0\6\11\5\0\22\11\3\0\10\11\1\0"+
"\11\11\1\0\1\11\2\0\7\11\11\0\1\11\1\0\2\11\14\0\12\15\7\0\2\11\1\0\1\11\2"+
"\0\2\11\1\0\1\11\2\0\1\11\6\0\4\11\1\0\7\11\1\0\3\11\1\0\1\11\1\0\1\11\2\0"+
"\2\11\1\0\4\11\1\0\2\11\11\0\1\11\2\0\5\11\1\0\1\11\11\0\12\15\2\0\14\11\1"+
"\0\24\11\13\0\5\11\3\0\6\11\4\0\4\11\3\0\1\11\3\0\2\11\7\0\3\11\4\0\15\11"+
"\14\0\1\11\1\0\6\11\1\0\1\11\5\0\1\11\2\0\13\11\1\0\15\11\1\0\4\11\2\0\7\11"+
"\1\0\1\11\1\0\4\11\2\0\1\11\1\0\4\11\2\0\7\11\1\0\1\11\1\0\4\11\2\0\16\11"+
"\2\0\6\11\2\0\15\11\2\0\1\11\1\0\10\11\7\0\15\11\1\0\6\11\23\0\1\11\4\0\1"+
"\11\3\0\11\11\1\0\1\11\5\0\17\11\1\0\16\11\2\0\14\11\13\0\1\11\15\0\7\11\7"+
"\0\16\11\15\0\2\11\12\15\3\0\3\11\11\0\4\11\1\0\4\11\3\0\2\11\11\0\10\11\1"+
"\0\1\11\1\0\1\11\1\0\1\11\1\0\6\11\1\0\7\11\1\0\1\11\3\0\3\11\1\0\7\11\3\0"+
"\4\11\2\0\6\11\14\0\2\67\7\0\1\11\15\0\1\11\2\0\1\11\4\0\1\11\2\0\12\11\1"+
"\0\1\11\3\0\5\11\6\0\1\11\1\0\1\11\1\0\1\11\1\0\4\11\1\0\13\11\2\0\4\11\5"+
"\0\5\11\4\0\1\11\4\0\2\11\13\0\5\11\6\0\4\11\3\0\2\11\14\0\10\11\7\0\10\11"+
"\1\0\7\11\6\0\2\11\12\0\5\11\5\0\2\11\3\0\7\11\6\0\3\11\12\15\2\11\13\0\11"+
"\11\2\0\27\11\2\0\7\11\1\0\3\11\1\0\4\11\1\0\4\11\2\0\6\11\3\0\1\11\1\0\1"+
"\11\2\0\5\11\1\0\12\11\12\15\5\11\1\0\3\11\1\0\10\11\4\0\7\11\3\0\1\11\3\0"+
"\2\11\1\0\1\11\3\0\2\11\2\0\5\11\2\0\1\11\1\0\1\11\30\0\3\11\3\0\6\11\2\0"+
"\6\11\2\0\6\11\11\0\7\11\4\0\5\11\3\0\5\11\5\0\1\11\1\0\10\11\1\0\5\11\1\0"+
"\1\11\1\0\2\11\1\0\2\11\1\0\12\11\6\0\12\11\2\0\6\11\2\0\6\11\2\0\6\11\2\0"+
"\3\11\3\0\14\11\1\0\16\11\1\0\2\11\1\0\2\11\1\0\10\11\6\0\4\11\4\0\16\11\2"+
"\0\1\11\1\0\14\11\1\0\2\11\3\0\1\11\2\0\4\11\1\0\2\11\12\0\10\11\6\0\6\11"+
"\1\0\3\11\1\0\12\11\3\0\1\11\12\0\4\11\13\0\12\15\1\11\1\0\1\11\3\0\7\11\1"+
"\0\1\11\1\0\4\11\1\0\17\11\1\0\2\11\14\0\3\11\4\0\2\11\1\0\1\11\20\0\4\11"+
"\10\0\1\11\13\0\10\11\5\0\3\11\2\0\1\11\2\0\2\11\2\0\4\11\1\0\14\11\1\0\1"+
"\11\1\0\7\11\1\0\21\11\1\0\4\11\2\0\10\11\1\0\7\11\1\0\14\11\1\0\4\11\1\0"+
"\5\11\1\0\1\11\3\0\14\11\2\0\13\11\1\0\10\11\2\0\22\15\1\0\2\11\1\0\1\11\2"+
"\0\1\11\1\0\12\11\1\0\4\11\1\0\1\11\1\0\1\11\6\0\1\11\4\0\1\11\1\0\1\11\1"+
"\0\1\11\1\0\3\11\1\0\2\11\1\0\1\11\2\0\1\11\1\0\1\11\1\0\1\11\1\0\1\11\1\0"+
"\1\11\1\0\2\11\1\0\1\11\2\0\4\11\1\0\7\11\1\0\4\11\1\0\4\11\1\0\1\11\1\0\12"+
"\11\1\0\5\11\1\0\3\11\1\0\5\11\1\0\5\11");
/**
* Translates DFA states to action switch labels.
@@ -195,18 +195,18 @@ class _RegExLexer implements FlexLexer {
"\1\35\1\12\1\36\1\37\1\2\1\40\1\41\1\24"+
"\1\42\1\43\1\44\1\45\1\46\1\47\1\1\1\26"+
"\1\50\1\51\2\52\1\53\2\0\1\54\1\0\1\55"+
"\1\56\1\57\1\60\1\61\1\62\1\63\1\64\1\12"+
"\1\65\1\66\1\67\1\70\1\12\1\70\1\71\2\72"+
"\1\56\1\57\1\60\1\61\1\62\1\12\1\63\1\64"+
"\1\65\1\66\1\12\1\66\1\67\2\70\1\71\1\72"+
"\1\73\1\74\1\75\1\76\1\77\1\100\1\101\1\12"+
"\1\102\1\103\1\104\1\0\1\105\1\106\1\107\1\110"+
"\1\0\1\111\1\112\1\113\1\114\1\115\1\0\1\116"+
"\1\0\1\117\1\0\2\120\1\0\1\121\1\122\1\123"+
"\2\76\1\124\1\125\1\126\1\127\1\130\1\131\1\132"+
"\1\133\1\0\2\120\1\0\1\134\2\76\1\124\1\135"+
"\1\136\1\76\1\124\1\76\1\137\4\76";
"\1\0\1\117\2\0\1\120\1\121\1\122\1\73\2\75"+
"\1\123\2\124\1\125\1\126\1\127\1\130\1\131\1\132"+
"\1\133\2\0\1\73\2\75\1\134\1\123\2\124\1\135"+
"\1\75\1\123\1\136\1\75\1\137\4\75";
private static int [] zzUnpackAction() {
int [] result = new int[150];
int [] result = new int[152];
int offset = 0;
offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
return result;
@@ -240,19 +240,19 @@ class _RegExLexer implements FlexLexer {
"\0\u042f\0\u042f\0\u06e4\0\u042f\0\u04ad\0\u04ec\0\u042f\0\u042f"+
"\0\u042f\0\u0723\0\u0762\0\u042f\0\u042f\0\u07a1\0\u042f\0\u042f"+
"\0\u042f\0\u07e0\0\u081f\0\u085e\0\u089d\0\u042f\0\u08dc\0\u091b"+
"\0\u042f\0\u095a\0\u0999\0\u042f\0\u042f\0\u042f\0\u042f\0\u042f"+
"\0\u042f\0\u042f\0\u042f\0\u09d8\0\u0a17\0\u042f\0\u042f\0\u0a56"+
"\0\u042f\0\u042f\0\u042f\0\u042f\0\u0a95\0\u0ad4\0\u042f\0\u042f"+
"\0\u042f\0\u042f\0\u042f\0\u042f\0\u042f\0\u042f\0\u042f\0\u042f"+
"\0\u042f\0\u095a\0\u0999\0\u042f\0\u042f\0\u09d8\0\u042f\0\u042f"+
"\0\u042f\0\u0a17\0\u042f\0\u0a56\0\u0a95\0\u0ad4\0\u042f\0\u042f"+
"\0\u0b13\0\u042f\0\u042f\0\u042f\0\u0b52\0\u042f\0\u042f\0\u042f"+
"\0\u042f\0\u0b91\0\u0bd0\0\u042f\0\u042f\0\u042f\0\u042f\0\u0c0f"+
"\0\u042f\0\u0c4e\0\u042f\0\u0c8d\0\u0ccc\0\u0d0b\0\u0d4a\0\u042f"+
"\0\u042f\0\u042f\0\u0d89\0\u0dc8\0\u0e07\0\u042f\0\u042f\0\u042f"+
"\0\u042f\0\u042f\0\u042f\0\u042f\0\u0e46\0\u0e85\0\u042f\0\u0ec4"+
"\0\u042f\0\u0f03\0\u042f\0\u0f42\0\u042f\0\u042f\0\u0f81\0\u0fc0"+
"\0\u0fff\0\u042f\0\u103e\0\u107d\0\u10bc\0\u10fb";
"\0\u042f\0\u0c4e\0\u042f\0\u0c8d\0\u0ccc\0\u042f\0\u042f\0\u042f"+
"\0\u0d0b\0\u0d4a\0\u0d89\0\u0dc8\0\u0e07\0\u0e46\0\u042f\0\u042f"+
"\0\u042f\0\u042f\0\u042f\0\u042f\0\u042f\0\u0e85\0\u0ec4\0\u042f"+
"\0\u042f\0\u0f03\0\u042f\0\u0f42\0\u0f81\0\u042f\0\u042f\0\u0fc0"+
"\0\u0fff\0\u042f\0\u103e\0\u042f\0\u107d\0\u10bc\0\u10fb\0\u113a";
private static int [] zzUnpackRowMap() {
int [] result = new int[150];
int [] result = new int[152];
int offset = 0;
offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
return result;
@@ -275,88 +275,86 @@ class _RegExLexer implements FlexLexer {
private static final int [] ZZ_TRANS = zzUnpackTrans();
private static final String ZZ_TRANS_PACKED_0 =
"\3\22\1\23\1\24\1\25\1\26\1\27\1\30\1\31"+
"\1\22\1\23\1\24\1\25\1\26\1\27\1\30\1\31"+
"\1\32\3\22\1\33\1\22\1\34\1\35\1\36\1\37"+
"\1\40\1\41\35\22\1\42\2\22\1\42\1\22\1\42"+
"\5\22\1\43\12\22\1\44\64\22\7\45\1\46\3\45"+
"\1\47\4\45\1\34\5\45\26\47\2\45\3\47\3\45"+
"\1\47\13\45\2\50\4\45\1\46\7\45\1\50\34\45"+
"\2\50\7\45\1\51\11\45\11\52\1\53\1\54\53\52"+
"\3\0\26\52\1\55\45\52\3\0\6\52\6\22\1\26"+
"\1\22\1\56\1\57\1\60\2\22\1\61\45\22\1\62"+
"\2\22\1\62\1\22\1\62\1\22\1\63\4\22\6\52"+
"\1\26\1\52\1\30\1\52\1\32\5\52\1\64\45\52"+
"\1\42\1\22\1\42\6\52\6\45\1\65\1\45\1\66"+
"\1\45\1\32\21\45\1\67\1\45\1\67\2\45\1\67"+
"\2\45\1\67\1\45\1\67\10\45\1\67\4\45\1\67"+
"\20\45\1\65\1\45\1\66\1\45\1\32\64\45\5\70"+
"\1\71\5\70\1\72\1\70\1\73\10\70\26\72\2\70"+
"\3\72\3\70\1\72\4\70\1\74\5\70\66\14\1\75"+
"\10\14\13\70\1\76\12\70\26\76\2\70\3\76\3\70"+
"\1\76\6\70\1\77\16\70\1\76\12\70\26\76\2\70"+
"\3\76\1\70\1\100\1\70\1\76\17\70\1\71\5\70"+
"\1\76\12\70\26\76\2\70\3\76\3\70\1\76\13\70"+
"\2\50\2\70\1\71\5\70\1\76\3\70\1\50\6\70"+
"\26\76\2\50\3\76\3\70\1\76\12\70\13\101\1\47"+
"\4\101\1\34\5\101\26\47\2\101\3\47\3\101\1\47"+
"\4\101\1\102\5\101\121\0\1\103\55\0\2\104\14\0"+
"\1\104\34\0\2\104\7\0\1\105\22\0\1\106\1\107"+
"\5\0\1\110\56\0\1\111\1\112\1\113\3\114\1\115"+
"\1\116\1\117\1\111\1\117\1\120\1\111\1\121\1\122"+
"\1\111\1\117\5\114\2\123\1\124\1\125\3\126\3\127"+
"\1\130\2\131\2\132\3\133\1\120\1\134\1\135\1\136"+
"\2\112\1\137\1\140\1\141\2\111\1\122\1\120\1\111"+
"\1\122\1\111\1\122\6\111\47\0\1\142\30\0\2\47"+
"\1\0\2\47\5\0\5\47\6\0\33\47\3\0\1\47"+
"\13\0\2\50\14\0\1\50\34\0\2\50\32\0\1\143"+
"\76\0\1\144\1\145\5\0\1\110\50\0\1\146\5\0"+
"\1\111\1\112\1\113\3\114\1\115\1\116\3\117\1\120"+
"\1\111\1\121\1\122\1\111\1\117\5\114\2\123\1\124"+
"\1\125\3\126\3\127\1\130\2\131\2\132\3\133\1\120"+
"\1\134\1\135\1\136\2\112\1\137\1\140\1\141\2\111"+
"\1\122\1\120\1\111\1\122\1\111\1\122\6\111\72\0"+
"\1\147\17\0\1\72\12\0\26\72\2\0\3\72\3\0"+
"\1\72\25\0\1\73\12\0\26\73\2\0\3\73\3\0"+
"\1\73\13\0\2\76\10\0\5\76\6\0\33\76\3\0"+
"\1\76\23\0\1\150\71\0\1\151\37\0\1\152\14\0"+
"\1\153\1\154\6\0\1\155\1\0\1\155\1\156\1\157"+
"\1\160\1\0\2\104\4\0\1\161\7\0\1\104\34\0"+
"\2\104\7\0\1\105\12\0\2\162\14\0\1\162\34\0"+
"\2\162\32\0\1\106\76\0\1\163\1\164\65\0\2\112"+
"\51\0\2\112\23\0\1\165\51\0\1\166\1\165\27\0"+
"\1\167\151\0\1\170\1\171\14\0\77\172\1\0\2\173"+
"\3\0\1\174\20\0\2\173\1\0\1\173\4\0\3\173"+
"\5\0\2\173\3\0\2\173\22\0\2\175\24\0\2\175"+
"\1\0\1\175\4\0\3\175\5\0\2\175\3\0\2\175"+
"\102\0\1\176\1\177\25\0\1\200\146\0\1\201\12\0"+
"\1\202\76\0\1\203\1\204\1\0\5\205\1\0\71\205"+
"\1\0\2\162\4\0\1\161\7\0\1\162\34\0\2\162"+
"\32\0\1\163\67\0\1\206\51\0\2\206\23\0\1\207"+
"\51\0\2\207\52\0\1\210\46\0\2\211\24\0\2\211"+
"\1\0\1\211\4\0\3\211\5\0\2\211\3\0\2\211"+
"\22\0\2\212\4\0\1\213\17\0\2\212\1\0\1\212"+
"\4\0\3\212\5\0\2\212\3\0\2\212\22\0\2\214"+
"\24\0\2\214\1\0\1\214\4\0\3\214\5\0\2\214"+
"\3\0\2\214\21\0\5\205\1\215\71\205\2\0\1\216"+
"\51\0\2\216\30\0\1\126\70\0\2\217\4\0\1\211"+
"\17\0\2\217\1\0\1\217\4\0\3\217\5\0\2\217"+
"\3\0\2\217\22\0\2\220\24\0\2\220\1\0\1\220"+
"\4\0\3\220\5\0\2\220\3\0\2\220\22\0\2\221"+
"\4\0\1\211\17\0\2\221\1\0\1\221\4\0\3\221"+
"\5\0\2\221\3\0\2\221\22\0\2\222\24\0\2\222"+
"\1\0\1\222\4\0\3\222\5\0\2\222\3\0\2\222"+
"\22\0\2\223\4\0\1\211\17\0\2\223\1\0\1\223"+
"\4\0\3\223\5\0\2\223\3\0\2\223\22\0\2\224"+
"\4\0\1\211\17\0\2\224\1\0\1\224\4\0\3\224"+
"\5\0\2\224\3\0\2\224\22\0\2\225\4\0\1\211"+
"\17\0\2\225\1\0\1\225\4\0\3\225\5\0\2\225"+
"\3\0\2\225\22\0\2\226\4\0\1\211\17\0\2\226"+
"\1\0\1\226\4\0\3\226\5\0\2\226\3\0\2\226"+
"\22\0\2\226\4\0\1\213\17\0\2\226\1\0\1\226"+
"\4\0\3\226\5\0\2\226\3\0\2\226\21\0";
"\1\40\1\41\37\22\1\42\2\22\1\42\1\22\1\42"+
"\5\22\1\43\10\22\1\44\66\22\5\45\1\46\3\45"+
"\1\47\4\45\1\34\5\45\23\47\1\45\3\47\3\45"+
"\3\47\3\45\1\47\17\45\1\46\7\45\1\50\31\45"+
"\1\50\3\45\3\50\7\45\1\51\11\45\7\52\1\53"+
"\1\54\55\52\3\0\24\52\1\55\47\52\3\0\6\52"+
"\4\22\1\26\1\22\1\56\1\57\1\60\2\22\1\61"+
"\47\22\1\62\2\22\1\62\1\22\1\62\1\22\1\63"+
"\4\22\4\52\1\26\1\52\1\30\1\52\1\32\5\52"+
"\1\64\47\52\1\42\1\22\1\42\6\52\4\45\1\65"+
"\1\45\1\66\1\45\1\32\21\45\1\67\1\45\1\67"+
"\2\45\1\67\2\45\1\67\1\45\1\67\12\45\1\67"+
"\4\45\1\67\16\45\1\65\1\45\1\66\1\45\1\32"+
"\66\45\3\70\1\71\5\70\1\72\1\70\1\73\10\70"+
"\23\72\1\70\3\72\3\70\3\72\3\70\1\72\4\70"+
"\1\74\5\70\66\14\1\75\10\14\11\70\1\76\12\70"+
"\23\76\1\70\3\76\3\70\3\76\3\70\1\76\6\70"+
"\1\77\14\70\1\76\12\70\23\76\1\70\3\76\3\70"+
"\3\76\1\70\1\100\1\70\1\76\15\70\1\71\5\70"+
"\1\76\12\70\23\76\1\70\3\76\3\70\3\76\3\70"+
"\1\76\15\70\1\71\5\70\1\76\3\70\1\50\6\70"+
"\23\76\1\50\3\76\3\50\3\76\3\70\1\76\12\70"+
"\11\101\1\47\4\101\1\34\5\101\23\47\1\101\3\47"+
"\3\101\3\47\3\101\1\47\4\101\1\102\5\101\117\0"+
"\1\103\73\0\1\104\31\0\1\104\3\0\3\104\7\0"+
"\1\105\20\0\1\106\1\107\5\0\1\110\60\0\1\111"+
"\3\112\1\113\1\114\1\115\1\111\1\115\1\116\1\111"+
"\1\117\1\120\1\111\1\115\5\112\2\121\1\122\1\123"+
"\3\124\3\125\1\126\2\127\2\130\3\131\1\116\1\132"+
"\1\133\1\134\1\135\1\136\2\132\1\137\1\140\1\141"+
"\2\111\1\120\1\116\1\111\1\120\1\111\1\120\6\111"+
"\45\0\1\142\33\0\2\47\5\0\5\47\6\0\35\47"+
"\3\0\1\47\27\0\1\50\31\0\1\50\3\0\3\50"+
"\30\0\1\143\76\0\1\144\1\145\5\0\1\110\52\0"+
"\1\146\5\0\1\111\3\112\1\113\1\114\3\115\1\116"+
"\1\111\1\117\1\120\1\111\1\115\5\112\2\121\1\122"+
"\1\123\3\124\3\125\1\126\2\127\2\130\3\131\1\116"+
"\1\132\1\133\1\134\1\135\1\136\2\132\1\137\1\140"+
"\1\141\2\111\1\120\1\116\1\111\1\120\1\111\1\120"+
"\6\111\72\0\1\147\15\0\1\72\12\0\23\72\1\0"+
"\3\72\3\0\3\72\3\0\1\72\23\0\1\73\12\0"+
"\23\73\1\0\3\73\3\0\3\73\3\0\1\73\23\0"+
"\5\76\6\0\35\76\3\0\1\76\21\0\1\150\71\0"+
"\1\151\37\0\1\152\16\0\1\153\1\154\6\0\1\155"+
"\1\0\1\155\1\156\1\157\1\160\5\0\1\161\7\0"+
"\1\104\31\0\1\104\3\0\3\104\7\0\1\105\26\0"+
"\1\162\31\0\1\162\3\0\3\162\30\0\1\106\76\0"+
"\1\163\1\164\72\0\1\165\153\0\1\166\1\167\14\0"+
"\77\170\47\0\1\171\3\0\3\171\25\0\1\172\20\0"+
"\2\173\1\0\1\173\4\0\3\173\5\0\3\173\3\0"+
"\3\173\46\0\2\174\1\0\1\174\4\0\3\174\5\0"+
"\3\174\3\0\3\174\74\0\1\175\1\176\1\175\102\0"+
"\1\177\1\200\23\0\1\201\150\0\1\202\12\0\1\203"+
"\76\0\1\204\1\205\1\0\3\206\1\0\73\206\5\0"+
"\1\161\7\0\1\162\31\0\1\162\3\0\3\162\30\0"+
"\1\163\116\0\1\207\116\0\1\210\3\0\3\210\26\0"+
"\1\211\17\0\2\212\1\0\1\212\4\0\3\212\5\0"+
"\3\212\3\0\3\212\46\0\2\213\1\0\1\213\4\0"+
"\3\213\5\0\3\213\3\0\3\213\46\0\2\214\1\0"+
"\1\214\4\0\3\214\5\0\3\214\3\0\3\214\74\0"+
"\3\215\74\0\3\216\21\0\3\206\1\217\73\206\5\0"+
"\1\124\76\0\1\213\17\0\2\220\1\0\1\220\4\0"+
"\3\220\5\0\3\220\3\0\3\220\46\0\2\221\1\0"+
"\1\221\4\0\3\221\5\0\3\221\3\0\3\221\74\0"+
"\3\222\26\0\1\213\17\0\2\223\1\0\1\223\4\0"+
"\3\223\5\0\3\223\3\0\3\223\46\0\2\224\1\0"+
"\1\224\4\0\3\224\5\0\3\224\3\0\3\224\26\0"+
"\1\213\17\0\2\225\1\0\1\225\4\0\3\225\5\0"+
"\3\225\3\0\3\225\26\0\1\213\17\0\2\226\1\0"+
"\1\226\4\0\3\226\5\0\3\226\3\0\3\226\26\0"+
"\1\213\17\0\2\227\1\0\1\227\4\0\3\227\5\0"+
"\3\227\3\0\3\227\26\0\1\213\17\0\2\230\1\0"+
"\1\230\4\0\3\230\5\0\3\230\3\0\3\230\26\0"+
"\1\211\17\0\2\230\1\0\1\230\4\0\3\230\5\0"+
"\3\230\3\0\3\230\21\0";
private static int [] zzUnpackTrans() {
int [] result = new int[4410];
int [] result = new int[4473];
int offset = 0;
offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
return result;
@@ -398,15 +396,14 @@ class _RegExLexer implements FlexLexer {
"\1\1\1\11\1\1\11\11\1\1\2\11\2\1\3\11"+
"\1\1\1\11\1\1\1\11\1\1\2\11\1\1\1\11"+
"\2\1\3\11\2\1\2\11\1\1\3\11\2\1\2\0"+
"\1\11\1\0\1\1\1\11\2\1\10\11\2\1\2\11"+
"\1\1\4\11\2\1\2\11\1\1\3\11\1\0\4\11"+
"\1\0\1\1\4\11\1\0\1\11\1\0\1\11\1\0"+
"\2\1\1\0\3\11\3\1\7\11\1\0\1\1\1\11"+
"\1\0\1\11\1\1\1\11\1\1\2\11\3\1\1\11"+
"\4\1";
"\1\11\1\0\1\1\11\11\2\1\2\11\1\1\3\11"+
"\1\1\1\11\3\1\2\11\1\1\3\11\1\0\4\11"+
"\1\0\1\1\4\11\1\0\1\11\1\0\1\11\2\0"+
"\3\11\6\1\7\11\2\0\2\11\1\1\1\11\2\1"+
"\2\11\2\1\1\11\1\1\1\11\4\1";
private static int [] zzUnpackAttribute() {
int [] result = new int[150];
int [] result = new int[152];
int offset = 0;
offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
return result;
@@ -479,6 +476,8 @@ class _RegExLexer implements FlexLexer {
private boolean allowHorizontalWhitespaceClass;
private boolean allowPosixBracketExpressions;
private boolean allowTransformationEscapes;
private int maxOctal = 0777;
private int minOctalDigits = 1;
_RegExLexer(EnumSet<RegExpCapability> capabilities) {
this((java.io.Reader)null);
@@ -493,6 +492,18 @@ class _RegExLexer implements FlexLexer {
this.allowEmptyCharacterClass = capabilities.contains(RegExpCapability.ALLOW_EMPTY_CHARACTER_CLASS);
this.allowPosixBracketExpressions = capabilities.contains(RegExpCapability.POSIX_BRACKET_EXPRESSIONS);
this.allowTransformationEscapes = capabilities.contains(RegExpCapability.TRANSFORMATION_ESCAPES);
if (capabilities.contains(RegExpCapability.MAX_OCTAL_177)) {
maxOctal = 0177;
}
else if (capabilities.contains(RegExpCapability.MAX_OCTAL_377)) {
maxOctal = 0377;
}
if (capabilities.contains(RegExpCapability.MIN_OCTAL_2_DIGITS)) {
minOctalDigits = 2;
}
else if (capabilities.contains(RegExpCapability.MIN_OCTAL_3_DIGITS)) {
minOctalDigits = 3;
}
}
private void yypushstate(int state) {
@@ -957,89 +968,110 @@ class _RegExLexer implements FlexLexer {
}
case 141: break;
case 47:
{ if (allowOctalNoLeadingZero) {
CharSequence s = yytext();
int i = 1;
for (; i < s.length(); i++) {
if (s.charAt(i) > '7') break;
}
if (i > 1) {
yypushback(yylength() - i);
return RegExpTT.OCT_CHAR;
}
}
if (yystate() == CLASS2) {
yypushback(yylength() - 2);
return RegExpTT.REDUNDANT_ESCAPE;
}
while (yylength() > 2 && Integer.parseInt(yytext().toString().substring(1)) > capturingGroupCount) {
yypushback(1);
}
return RegExpTT.BACKREF;
{ return (yystate() == CLASS2) ? RegExpTT.REDUNDANT_ESCAPE : RegExpTT.ESC_CHARACTER;
}
case 142: break;
case 48:
{ return (allowOctalNoLeadingZero ? RegExpTT.OCT_CHAR : RegExpTT.BAD_OCT_VALUE);
{ return (allowDanglingMetacharacters != Boolean.TRUE && yystate() != CLASS2) ? RegExpTT.ESC_CHARACTER : RegExpTT.REDUNDANT_ESCAPE;
}
case 143: break;
case 49:
{ return (yystate() == CLASS2) ? RegExpTT.REDUNDANT_ESCAPE : RegExpTT.ESC_CHARACTER;
{ return (allowDanglingMetacharacters == Boolean.FALSE && yystate() != CLASS2) ? RegExpTT.ESC_CHARACTER : RegExpTT.REDUNDANT_ESCAPE;
}
case 144: break;
case 50:
{ return (allowDanglingMetacharacters != Boolean.TRUE && yystate() != CLASS2) ? RegExpTT.ESC_CHARACTER : RegExpTT.REDUNDANT_ESCAPE;
{ return RegExpTT.ESC_CHARACTER;
}
case 145: break;
case 51:
{ return (allowDanglingMetacharacters == Boolean.FALSE && yystate() != CLASS2) ? RegExpTT.ESC_CHARACTER : RegExpTT.REDUNDANT_ESCAPE;
{ return (yystate() == CLASS2) ? RegExpTT.ESC_CHARACTER : RegExpTT.REDUNDANT_ESCAPE;
}
case 146: break;
case 52:
{ return RegExpTT.ESC_CHARACTER;
{ return commentMode ? RegExpTT.CHARACTER : RegExpTT.REDUNDANT_ESCAPE;
}
case 147: break;
case 53:
{ return (yystate() == CLASS2) ? RegExpTT.ESC_CHARACTER : RegExpTT.REDUNDANT_ESCAPE;
{ return RegExpTT.ESC_CTRL_CHARACTER;
}
case 148: break;
case 54:
{ return commentMode ? RegExpTT.CHARACTER : RegExpTT.REDUNDANT_ESCAPE;
{ return yystate() != CLASS2 ? RegExpTT.BOUNDARY : RegExpTT.ESC_CHARACTER;
}
case 149: break;
case 55:
{ return RegExpTT.ESC_CTRL_CHARACTER;
{ return RegExpTT.CHAR_CLASS;
}
case 150: break;
case 56:
{ return yystate() != CLASS2 ? RegExpTT.BOUNDARY : RegExpTT.ESC_CHARACTER;
{ if (xmlSchemaMode) return RegExpTT.CHAR_CLASS; else return StringEscapesTokenTypes.INVALID_CHARACTER_ESCAPE_TOKEN;
}
case 151: break;
case 57:
{ return RegExpTT.CHAR_CLASS;
{ yypushstate(PROP); return RegExpTT.PROPERTY;
}
case 152: break;
case 58:
{ if (xmlSchemaMode) return RegExpTT.CHAR_CLASS; else return StringEscapesTokenTypes.INVALID_CHARACTER_ESCAPE_TOKEN;
{ return allowTransformationEscapes ? RegExpTT.CHAR_CLASS : StringEscapesTokenTypes.INVALID_CHARACTER_ESCAPE_TOKEN;
}
case 153: break;
case 59:
{ yypushstate(PROP); return RegExpTT.PROPERTY;
{ String text = yytext().toString().substring(1);
if (allowOctalNoLeadingZero) {
if (Integer.parseInt(text) <= capturingGroupCount && yystate() != CLASS2) return RegExpTT.BACKREF;
int i = 0;
int value = 0;
for (; i < text.length(); i++) {
char c = text.charAt(i);
if (c > '7') break;
value = value * 8 + (c - '0');
}
if (i > 0) {
yypushback(text.length() - i);
if (value > maxOctal) {
yypushback(1);
return RegExpTT.BAD_OCT_VALUE;
}
if (minOctalDigits > i && yystate() != CLASS2) {
return RegExpTT.BAD_OCT_VALUE;
}
return RegExpTT.OCT_CHAR;
}
return StringEscapesTokenTypes.INVALID_CHARACTER_ESCAPE_TOKEN;
}
else {
if (yystate() == CLASS2) {
yypushback(yylength() - 2);
return StringEscapesTokenTypes.INVALID_CHARACTER_ESCAPE_TOKEN;
}
/* java.util.regex.Pattern says about backrefs:
"In this class, \1 through \9 are always interpreted as back references,
and a larger number is accepted as a back reference if at least that many
subexpressions exist at that point in the regular expression, otherwise the
parser will drop digits until the number is smaller or equal to the existing
number of groups or it is one digit."
*/
while (yylength() > 2 && Integer.parseInt(yytext().toString().substring(1)) > capturingGroupCount) {
yypushback(1);
}
return RegExpTT.BACKREF;
}
}
case 154: break;
case 60:
{ return allowTransformationEscapes ? RegExpTT.CHAR_CLASS : StringEscapesTokenTypes.INVALID_CHARACTER_ESCAPE_TOKEN;
{ yypushstate(QUOTED); return RegExpTT.QUOTE_BEGIN;
}
case 155: break;
case 61:
{ yypushstate(QUOTED); return RegExpTT.QUOTE_BEGIN;
{ return RegExpTT.BAD_HEX_VALUE;
}
case 156: break;
case 62:
{ return RegExpTT.BAD_HEX_VALUE;
{ return allowTransformationEscapes ? RegExpTT.CHAR_CLASS : StringEscapesTokenTypes.INVALID_UNICODE_ESCAPE_TOKEN;
}
case 157: break;
case 63:
{ return allowTransformationEscapes ? RegExpTT.CHAR_CLASS : StringEscapesTokenTypes.INVALID_UNICODE_ESCAPE_TOKEN;
{ return (allowOctalNoLeadingZero ? RegExpTT.OCT_CHAR : RegExpTT.BAD_OCT_VALUE);
}
case 158: break;
case 64:
@@ -1127,23 +1159,23 @@ class _RegExLexer implements FlexLexer {
}
case 174: break;
case 80:
{ return RegExpTT.OCT_CHAR;
{ yybegin(NAMED_GROUP); return RegExpTT.RUBY_NAMED_GROUP_CALL;
}
case 175: break;
case 81:
{ yybegin(NAMED_GROUP); return RegExpTT.RUBY_NAMED_GROUP_CALL;
{ yybegin(QUOTED_NAMED_GROUP); return RegExpTT.RUBY_QUOTED_NAMED_GROUP_CALL;
}
case 176: break;
case 82:
{ yybegin(QUOTED_NAMED_GROUP); return RegExpTT.RUBY_QUOTED_NAMED_GROUP_CALL;
{ if (xmlSchemaMode) { yypushback(1); return RegExpTT.CHAR_CLASS; } else return RegExpTT.CTRL;
}
case 177: break;
case 83:
{ if (xmlSchemaMode) { yypushback(1); return RegExpTT.CHAR_CLASS; } else return RegExpTT.CTRL;
{ return StringEscapesTokenTypes.INVALID_UNICODE_ESCAPE_TOKEN;
}
case 178: break;
case 84:
{ return StringEscapesTokenTypes.INVALID_UNICODE_ESCAPE_TOKEN;
{ return RegExpTT.OCT_CHAR;
}
case 179: break;
case 85:

View File

@@ -74,5 +74,25 @@ public enum RegExpCapability {
/**
* \\u, \l, \\U, \L, and \E
*/
TRANSFORMATION_ESCAPES
TRANSFORMATION_ESCAPES,
/**
* \\177 (decimal 127) is maximal octal character
*/
MAX_OCTAL_177,
/**
* \\377 (decimal 255) is maximal octal character
*/
MAX_OCTAL_377,
/**
* At least 2 digits needed in octal escape outside character class to be valid (like regexp under ruby)
*/
MIN_OCTAL_2_DIGITS,
/**
* At least 3 digits needed in octal escape outside character class to be valid (like regexp under python)
*/
MIN_OCTAL_3_DIGITS,
}

View File

@@ -35,6 +35,8 @@ import java.util.EnumSet;
private boolean allowHorizontalWhitespaceClass;
private boolean allowPosixBracketExpressions;
private boolean allowTransformationEscapes;
private int maxOctal = 0777;
private int minOctalDigits = 1;
_RegExLexer(EnumSet<RegExpCapability> capabilities) {
this((java.io.Reader)null);
@@ -49,6 +51,18 @@ import java.util.EnumSet;
this.allowEmptyCharacterClass = capabilities.contains(RegExpCapability.ALLOW_EMPTY_CHARACTER_CLASS);
this.allowPosixBracketExpressions = capabilities.contains(RegExpCapability.POSIX_BRACKET_EXPRESSIONS);
this.allowTransformationEscapes = capabilities.contains(RegExpCapability.TRANSFORMATION_ESCAPES);
if (capabilities.contains(RegExpCapability.MAX_OCTAL_177)) {
maxOctal = 0177;
}
else if (capabilities.contains(RegExpCapability.MAX_OCTAL_377)) {
maxOctal = 0377;
}
if (capabilities.contains(RegExpCapability.MIN_OCTAL_2_DIGITS)) {
minOctalDigits = 2;
}
else if (capabilities.contains(RegExpCapability.MIN_OCTAL_3_DIGITS)) {
minOctalDigits = 3;
}
}
private void yypushstate(int state) {
@@ -90,8 +104,6 @@ import java.util.EnumSet;
%xstate PY_COND_REF
%xstate BRACKET_EXPRESSION
DIGITS=[1-9][0-9]*
DOT="."
LPAREN="("
RPAREN=")"
@@ -151,32 +163,47 @@ HEX_CHAR=[0-9a-fA-F]
{ESCAPE} {XML_CLASS} { if (xmlSchemaMode) return RegExpTT.CHAR_CLASS; else return StringEscapesTokenTypes.INVALID_CHARACTER_ESCAPE_TOKEN; }
/* java.util.regex.Pattern says about backrefs:
"In this class, \1 through \9 are always interpreted as back references,
and a larger number is accepted as a back reference if at least that many
subexpressions exist at that point in the regular expression, otherwise the
parser will drop digits until the number is smaller or equal to the existing
number of groups or it is one digit."
*/
{ESCAPE} {DIGITS} { if (allowOctalNoLeadingZero) {
CharSequence s = yytext();
int i = 1;
for (; i < s.length(); i++) {
if (s.charAt(i) > '7') break;
/* 999 back references should be enough for everybody */
{ESCAPE} [1-9][0-9]{0,2} { String text = yytext().toString().substring(1);
if (allowOctalNoLeadingZero) {
if (Integer.parseInt(text) <= capturingGroupCount && yystate() != CLASS2) return RegExpTT.BACKREF;
int i = 0;
int value = 0;
for (; i < text.length(); i++) {
char c = text.charAt(i);
if (c > '7') break;
value = value * 8 + (c - '0');
}
if (i > 1) {
yypushback(yylength() - i);
if (i > 0) {
yypushback(text.length() - i);
if (value > maxOctal) {
yypushback(1);
return RegExpTT.BAD_OCT_VALUE;
}
if (minOctalDigits > i && yystate() != CLASS2) {
return RegExpTT.BAD_OCT_VALUE;
}
return RegExpTT.OCT_CHAR;
}
return StringEscapesTokenTypes.INVALID_CHARACTER_ESCAPE_TOKEN;
}
if (yystate() == CLASS2) {
yypushback(yylength() - 2);
return RegExpTT.REDUNDANT_ESCAPE;
else {
if (yystate() == CLASS2) {
yypushback(yylength() - 2);
return StringEscapesTokenTypes.INVALID_CHARACTER_ESCAPE_TOKEN;
}
/* java.util.regex.Pattern says about backrefs:
"In this class, \1 through \9 are always interpreted as back references,
and a larger number is accepted as a back reference if at least that many
subexpressions exist at that point in the regular expression, otherwise the
parser will drop digits until the number is smaller or equal to the existing
number of groups or it is one digit."
*/
while (yylength() > 2 && Integer.parseInt(yytext().toString().substring(1)) > capturingGroupCount) {
yypushback(1);
}
return RegExpTT.BACKREF;
}
while (yylength() > 2 && Integer.parseInt(yytext().toString().substring(1)) > capturingGroupCount) {
yypushback(1);
}
return RegExpTT.BACKREF;
}
{ESCAPE} "-" { return (yystate() == CLASS2) ? RegExpTT.ESC_CHARACTER : RegExpTT.REDUNDANT_ESCAPE; }

View File

@@ -20,6 +20,8 @@ import com.intellij.testFramework.LexerTestCase;
import java.util.EnumSet;
import static org.intellij.lang.regexp.RegExpCapability.*;
/**
* @author Bas Leijdekkers
*/
@@ -44,8 +46,8 @@ public class RegExpLexerTest extends LexerTestCase {
}
public void testEditorReplacement() {
RegExpLexer lexer = new RegExpLexer(EnumSet.of(RegExpCapability.TRANSFORMATION_ESCAPES));
String text = "\\U$1\\E\\u$3\\l$4\\L$2\\E";
RegExpLexer lexer = new RegExpLexer(EnumSet.of(TRANSFORMATION_ESCAPES));
final String text = "\\U$1\\E\\u$3\\l$4\\L$2\\E";
doTest(text, "CHAR_CLASS ('\\U')\n" +
"DOLLAR ('$')\n" +
"CHARACTER ('1')\n" +
@@ -80,7 +82,7 @@ public class RegExpLexerTest extends LexerTestCase {
}
public void testIntersection() {
final RegExpLexer lexer = new RegExpLexer(EnumSet.of(RegExpCapability.NESTED_CHARACTER_CLASSES));
final RegExpLexer lexer = new RegExpLexer(EnumSet.of(NESTED_CHARACTER_CLASSES));
doTest("[a&&]", "CLASS_BEGIN ('[')\n" +
"CHARACTER ('a')\n" +
"ANDAND ('&&')\n" +
@@ -98,7 +100,7 @@ public class RegExpLexerTest extends LexerTestCase {
}
public void testPosixBracketExpression() {
final RegExpLexer lexer = new RegExpLexer(EnumSet.of(RegExpCapability.POSIX_BRACKET_EXPRESSIONS));
final RegExpLexer lexer = new RegExpLexer(EnumSet.of(POSIX_BRACKET_EXPRESSIONS));
doTest("[[:xdigit:]]", "CLASS_BEGIN ('[')\n" +
"BRACKET_EXPRESSION_BEGIN ('[:')\n" +
"NAME ('xdigit')\n" +
@@ -107,7 +109,7 @@ public class RegExpLexerTest extends LexerTestCase {
}
public void testNegatedPosixBracketExpression() {
final RegExpLexer lexer = new RegExpLexer(EnumSet.of(RegExpCapability.POSIX_BRACKET_EXPRESSIONS));
final RegExpLexer lexer = new RegExpLexer(EnumSet.of(POSIX_BRACKET_EXPRESSIONS));
doTest("[[:^xdigit:]]", "CLASS_BEGIN ('[')\n" +
"BRACKET_EXPRESSION_BEGIN ('[:')\n" +
"CARET ('^')\n" +
@@ -116,17 +118,142 @@ public class RegExpLexerTest extends LexerTestCase {
"CLASS_END (']')", lexer);
}
public void testOctalWithoutLeadingZero() {
final RegExpLexer lexer = new RegExpLexer(EnumSet.of(RegExpCapability.OCTAL_NO_LEADING_ZERO));
doTest("\\0\\123[\\123]", "OCT_CHAR ('\\0')\n" +
"OCT_CHAR ('\\123')\n" +
"CLASS_BEGIN ('[')\n" +
"OCT_CHAR ('\\123')\n" +
"CLASS_END (']')", lexer);
/**
* \\177 is the maximum valid octal character under Ruby.
*/
public void testMaxOctalNoLeadingZero1() {
final RegExpLexer lexer = new RegExpLexer(EnumSet.of(OCTAL_NO_LEADING_ZERO, MAX_OCTAL_177));
doTest("\\177\\200", "OCT_CHAR ('\\177')\n" +
"BAD_OCT_VALUE ('\\20')\n" +
"CHARACTER ('0')", lexer);
}
/**
* \\377 is the maximum valid octal character under javascript. \\400 is interpreted as \\40 followed by a 0 character.
* The BAD_OCT_VALUE token is converted to OCT_CHAR in com.intellij.lang.javascript.inject.JSRegexpParserDefinition
*/
public void testMaxOctalNoLeadingZero2() {
final RegExpLexer lexer = new RegExpLexer(EnumSet.of(OCTAL_NO_LEADING_ZERO, MAX_OCTAL_377));
doTest("\\177\\200\\377\\400", "OCT_CHAR ('\\177')\n" +
"OCT_CHAR ('\\200')\n" +
"OCT_CHAR ('\\377')\n" +
"BAD_OCT_VALUE ('\\40')\n" +
"CHARACTER ('0')", lexer);
}
/**
* \\777 is valid octal character in python regex dialect.
*/
public void testMaxOctalNoLeadingZero3() {
final RegExpLexer lexer = new RegExpLexer(EnumSet.of(OCTAL_NO_LEADING_ZERO));
doTest("\\177\\200\\377\\400\\777", "OCT_CHAR ('\\177')\n" +
"OCT_CHAR ('\\200')\n" +
"OCT_CHAR ('\\377')\n" +
"OCT_CHAR ('\\400')\n" +
"OCT_CHAR ('\\777')", lexer);
}
/**
* \\1 and \\11 valid under js, both inside and outside character class
*/
public void testOctalNoLeadingZero1() {
final RegExpLexer lexer = new RegExpLexer(EnumSet.of(OCTAL_NO_LEADING_ZERO));
doTest("\\1()\\1\\11[\\1\\11]", "OCT_CHAR ('\\1')\n" +
"GROUP_BEGIN ('(')\n" +
"GROUP_END (')')\n" +
"BACKREF ('\\1')\n" +
"OCT_CHAR ('\\11')\n" +
"CLASS_BEGIN ('[')\n" +
"OCT_CHAR ('\\1')\n" +
"OCT_CHAR ('\\11')\n" +
"CLASS_END (']')", lexer);
}
/**
* \\1 not valid and \\11 valid under ruby, outside character class
*/
public void testOctalNoLeadingZero2() {
final RegExpLexer lexer = new RegExpLexer(EnumSet.of(OCTAL_NO_LEADING_ZERO, MIN_OCTAL_2_DIGITS));
doTest("\\1()\\1\\11[\\1\\11]", "BAD_OCT_VALUE ('\\1')\n" +
"GROUP_BEGIN ('(')\n" +
"GROUP_END (')')\n" +
"BACKREF ('\\1')\n" +
"OCT_CHAR ('\\11')\n" +
"CLASS_BEGIN ('[')\n" +
"OCT_CHAR ('\\1')\n" +
"OCT_CHAR ('\\11')\n" +
"CLASS_END (']')", lexer);
}
/**
* \\1 and \\11 not valid under python, outside character class
*/
public void testOctalNoLeadingZero3() {
final RegExpLexer lexer = new RegExpLexer(EnumSet.of(OCTAL_NO_LEADING_ZERO, MIN_OCTAL_3_DIGITS));
doTest("\\1()\\1\\11\\111[\\1\\11\\111]", "BAD_OCT_VALUE ('\\1')\n" +
"GROUP_BEGIN ('(')\n" +
"GROUP_END (')')\n" +
"BACKREF ('\\1')\n" +
"BAD_OCT_VALUE ('\\11')\n" +
"OCT_CHAR ('\\111')\n" +
"CLASS_BEGIN ('[')\n" +
"OCT_CHAR ('\\1')\n" +
"OCT_CHAR ('\\11')\n" +
"OCT_CHAR ('\\111')\n" +
"CLASS_END (']')", lexer);
}
/** octal is never a back reference inside a character class, valid under js, ruby, python */
public void testOctalInsideCharClass() {
final RegExpLexer lexer = new RegExpLexer(EnumSet.of(OCTAL_NO_LEADING_ZERO));
doTest("()()()()()()()()()()[\\1\\10\\100]", "GROUP_BEGIN ('(')\nGROUP_END (')')\n" +
"GROUP_BEGIN ('(')\nGROUP_END (')')\n" +
"GROUP_BEGIN ('(')\nGROUP_END (')')\n" +
"GROUP_BEGIN ('(')\nGROUP_END (')')\n" +
"GROUP_BEGIN ('(')\nGROUP_END (')')\n" +
"GROUP_BEGIN ('(')\nGROUP_END (')')\n" +
"GROUP_BEGIN ('(')\nGROUP_END (')')\n" +
"GROUP_BEGIN ('(')\nGROUP_END (')')\n" +
"GROUP_BEGIN ('(')\nGROUP_END (')')\n" +
"GROUP_BEGIN ('(')\nGROUP_END (')')\n" +
"CLASS_BEGIN ('[')\n" +
"OCT_CHAR ('\\1')\n" +
"OCT_CHAR ('\\10')\n" +
"OCT_CHAR ('\\100')\n" +
"CLASS_END (']')", lexer);
}
/** \0 always valid under js, ruby, python regex dialects, never a back reference. */
public void testZeroOctalNoLeadingZero() {
final RegExpLexer lexer = new RegExpLexer(EnumSet.of(OCTAL_NO_LEADING_ZERO));
doTest("\\0()\\0[\\0]", "OCT_CHAR ('\\0')\n" +
"GROUP_BEGIN ('(')\n" +
"GROUP_END (')')\n" +
"OCT_CHAR ('\\0')\n" +
"CLASS_BEGIN ('[')\n" +
"OCT_CHAR ('\\0')\n" +
"CLASS_END (']')", lexer);
}
/** three digit octal (\100) always valid, either octal or backreference under js, ruby and python */
public void testThreeDigitOctalNoLeadingZero() {
final RegExpLexer lexer = new RegExpLexer(EnumSet.of(OCTAL_NO_LEADING_ZERO));
doTest("\\100" +
"()()()()()()()()()()" +
"()()()()()()()()()()" +
"()()()()()()()()()()" +
"()()()()()()()()()()" +
"()()()()()()()()()()" +
"()()()()()()()()()()" +
"()()()()()()()()()()" +
"()()()()()()()()()()" +
"()()()()()()()()()()" +
"()()()()()()()()()()\\100[\\100]", null, lexer);
}
public void testOctalFollowedByDigit() {
final RegExpLexer lexer = new RegExpLexer(EnumSet.of(RegExpCapability.OCTAL_NO_LEADING_ZERO));
final RegExpLexer lexer = new RegExpLexer(EnumSet.of(OCTAL_NO_LEADING_ZERO));
doTest("\\39[\\39]", "OCT_CHAR ('\\3')\n" +
"CHARACTER ('9')\n" +
"CLASS_BEGIN ('[')\n" +
@@ -142,7 +269,7 @@ public class RegExpLexerTest extends LexerTestCase {
"CHARACTER ('2')\n" +
"CHARACTER ('3')\n" +
"CLASS_BEGIN ('[')\n" +
"REDUNDANT_ESCAPE ('\\1')\n" +
"INVALID_CHARACTER_ESCAPE_TOKEN ('\\1')\n" +
"CHARACTER ('2')\n" +
"CHARACTER ('3')\n" +
"CLASS_END (']')", lexer);
@@ -150,14 +277,20 @@ public class RegExpLexerTest extends LexerTestCase {
public void testOctalWithLeadingZero2() {
final RegExpLexer lexer = new RegExpLexer(EnumSet.noneOf(RegExpCapability.class));
doTest("\\08\\01\\012\\0123\\0377\\0400", "BAD_OCT_VALUE ('\\0')\n" +
"CHARACTER ('8')\n" +
"OCT_CHAR ('\\01')\n" +
"OCT_CHAR ('\\012')\n" +
"OCT_CHAR ('\\0123')\n" +
"OCT_CHAR ('\\0377')\n" +
"OCT_CHAR ('\\040')\n" +
"CHARACTER ('0')", lexer);
doTest("\\08\\01\\00\\012\\0123\\0377\\0400", "BAD_OCT_VALUE ('\\0')\n" +
"CHARACTER ('8')\n" +
"OCT_CHAR ('\\01')\n" +
"OCT_CHAR ('\\00')\n" +
"OCT_CHAR ('\\012')\n" +
"OCT_CHAR ('\\0123')\n" +
"OCT_CHAR ('\\0377')\n" +
"OCT_CHAR ('\\040')\n" +
"CHARACTER ('0')", lexer);
}
public void testBackReference() {
final RegExpLexer lexer = new RegExpLexer(EnumSet.noneOf(RegExpCapability.class));
doTest("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)\\105", null, lexer);
}
public void testNoNestedCharacterClasses1() {
@@ -188,7 +321,7 @@ public class RegExpLexerTest extends LexerTestCase {
}
public void testNestedCharacterClasses1() {
final RegExpLexer lexer = new RegExpLexer(EnumSet.of(RegExpCapability.NESTED_CHARACTER_CLASSES));
final RegExpLexer lexer = new RegExpLexer(EnumSet.of(NESTED_CHARACTER_CLASSES));
doTest("[a-z&&[^aeuoi]]", "CLASS_BEGIN ('[')\n" +
"CHARACTER ('a')\n" +
"MINUS ('-')\n" +
@@ -206,7 +339,7 @@ public class RegExpLexerTest extends LexerTestCase {
}
public void testNestedCharacterClasses2() {
final RegExpLexer lexer = new RegExpLexer(EnumSet.of(RegExpCapability.NESTED_CHARACTER_CLASSES));
final RegExpLexer lexer = new RegExpLexer(EnumSet.of(NESTED_CHARACTER_CLASSES));
doTest("[]]", "CLASS_BEGIN ('[')\n" +
"CHARACTER (']')\n" +
"CLASS_END (']')", lexer);
@@ -219,6 +352,6 @@ public class RegExpLexerTest extends LexerTestCase {
@Override
protected String getDirPath() {
return "";
return "community/RegExpSupport/testData/lexer";
}
}

View File

@@ -0,0 +1,32 @@
GROUP_BEGIN ('(')
CHARACTER ('a')
GROUP_END (')')
GROUP_BEGIN ('(')
CHARACTER ('b')
GROUP_END (')')
GROUP_BEGIN ('(')
CHARACTER ('c')
GROUP_END (')')
GROUP_BEGIN ('(')
CHARACTER ('d')
GROUP_END (')')
GROUP_BEGIN ('(')
CHARACTER ('e')
GROUP_END (')')
GROUP_BEGIN ('(')
CHARACTER ('f')
GROUP_END (')')
GROUP_BEGIN ('(')
CHARACTER ('g')
GROUP_END (')')
GROUP_BEGIN ('(')
CHARACTER ('h')
GROUP_END (')')
GROUP_BEGIN ('(')
CHARACTER ('i')
GROUP_END (')')
GROUP_BEGIN ('(')
CHARACTER ('j')
GROUP_END (')')
BACKREF ('\10')
CHARACTER ('5')

View File

@@ -0,0 +1,205 @@
OCT_CHAR ('\100')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
GROUP_BEGIN ('(')
GROUP_END (')')
BACKREF ('\100')
CLASS_BEGIN ('[')
OCT_CHAR ('\100')
CLASS_END (']')

View File

@@ -33,8 +33,8 @@ public class PythonRegexpParserDefinition extends RegExpParserDefinition {
public static final IFileElementType PYTHON_REGEXP_FILE = new IFileElementType("PYTHON_REGEXP_FILE", PythonRegexpLanguage.INSTANCE);
protected final EnumSet<RegExpCapability> CAPABILITIES = EnumSet.of(RegExpCapability.DANGLING_METACHARACTERS,
RegExpCapability.OCTAL_NO_LEADING_ZERO,
RegExpCapability.OMIT_NUMBERS_IN_QUANTIFIERS);
RegExpCapability.OMIT_NUMBERS_IN_QUANTIFIERS,
RegExpCapability.MIN_OCTAL_3_DIGITS);
@NotNull
public Lexer createLexer(Project project) {
return new RegExpLexer(CAPABILITIES);