regexp: highlight invalid escapes (such as boundaries) as error inside character class

This commit is contained in:
Bas Leijdekkers
2016-12-22 12:04:19 +01:00
parent d1e2eedb7d
commit f6e35ab7b9
11 changed files with 244 additions and 175 deletions

View File

@@ -132,12 +132,12 @@ class _RegExLexer implements FlexLexer {
/* The ZZ_CMAP_A table has 3056 entries */
static final char ZZ_CMAP_A[] = zzUnpackCMap(
"\10\0\2\63\1\66\1\67\1\70\1\66\22\0\1\14\1\75\1\0\1\76\1\17\1\0\1\72\1\62"+
"\1\2\1\3\1\21\1\22\1\65\1\13\1\1\1\0\1\53\3\55\4\54\2\47\1\71\1\0\1\61\1\74"+
"\1\73\1\20\1\0\2\30\1\37\1\35\1\45\1\46\1\31\1\56\1\40\2\11\1\44\1\64\1\57"+
"\1\11\1\42\1\50\1\33\1\34\1\11\1\43\3\33\1\11\1\32\1\6\1\10\1\7\1\16\1\12"+
"\1\0\1\25\1\26\1\36\1\35\2\25\1\27\1\56\1\40\1\11\1\60\1\43\1\11\1\24\1\11"+
"\1\41\1\11\1\24\1\33\1\24\1\52\2\33\1\51\1\11\1\31\1\4\1\23\1\5\7\0\1\67\24"+
"\10\0\2\60\1\67\1\70\1\71\1\67\22\0\1\14\1\76\1\0\1\77\1\17\1\0\1\73\1\63"+
"\1\2\1\3\1\21\1\22\1\66\1\13\1\1\1\0\1\53\3\55\4\54\2\47\1\72\1\0\1\62\1\75"+
"\1\74\1\20\1\0\2\30\1\37\1\35\1\45\1\46\1\31\1\56\1\40\2\11\1\44\1\65\1\57"+
"\1\11\1\42\1\50\1\64\1\34\1\11\1\43\3\33\1\11\1\32\1\6\1\10\1\7\1\16\1\12"+
"\1\0\1\25\1\26\1\36\1\35\2\25\1\27\1\56\1\40\1\11\1\61\1\43\1\11\1\24\1\11"+
"\1\41\1\11\1\24\1\33\1\24\1\52\2\33\1\51\1\11\1\31\1\4\1\23\1\5\7\0\1\70\24"+
"\0\1\11\12\0\1\11\4\0\1\11\5\0\27\11\1\0\12\11\4\0\14\11\16\0\5\11\7\0\1\11"+
"\1\0\1\11\1\0\5\11\1\0\2\11\2\0\4\11\1\0\1\11\6\0\1\11\1\0\3\11\1\0\1\11\1"+
"\0\4\11\1\0\23\11\1\0\13\11\10\0\6\11\1\0\26\11\2\0\1\11\6\0\10\11\10\0\13"+
@@ -161,7 +161,7 @@ class _RegExLexer implements FlexLexer {
"\11\3\0\11\11\1\0\1\11\5\0\17\11\1\0\16\11\2\0\14\11\13\0\1\11\15\0\7\11\7"+
"\0\16\11\15\0\2\11\12\15\3\0\3\11\11\0\4\11\1\0\4\11\3\0\2\11\11\0\10\11\1"+
"\0\1\11\1\0\1\11\1\0\1\11\1\0\6\11\1\0\7\11\1\0\1\11\3\0\3\11\1\0\7\11\3\0"+
"\4\11\2\0\6\11\14\0\2\67\7\0\1\11\15\0\1\11\2\0\1\11\4\0\1\11\2\0\12\11\1"+
"\4\11\2\0\6\11\14\0\2\70\7\0\1\11\15\0\1\11\2\0\1\11\4\0\1\11\2\0\12\11\1"+
"\0\1\11\3\0\5\11\6\0\1\11\1\0\1\11\1\0\1\11\1\0\4\11\1\0\13\11\2\0\4\11\5"+
"\0\5\11\4\0\1\11\4\0\2\11\13\0\5\11\6\0\4\11\3\0\2\11\14\0\10\11\7\0\10\11"+
"\1\0\7\11\6\0\2\11\12\0\5\11\5\0\2\11\3\0\7\11\6\0\3\11\12\15\2\11\13\0\11"+
@@ -192,21 +192,21 @@ class _RegExLexer implements FlexLexer {
"\1\7\1\10\1\11\1\12\1\13\1\14\1\15\1\16"+
"\1\17\1\20\1\21\1\22\1\23\1\2\1\24\1\25"+
"\1\26\1\27\1\30\1\31\1\32\1\31\1\33\1\34"+
"\1\35\1\12\1\36\1\37\1\2\1\40\1\41\1\24"+
"\1\42\1\43\1\44\1\45\1\46\1\47\1\1\1\26"+
"\1\50\1\51\2\52\1\53\2\0\1\54\1\0\1\55"+
"\1\56\1\57\1\60\1\61\1\62\1\12\1\63\1\64"+
"\1\65\1\66\1\12\1\66\1\67\2\70\1\71\1\72"+
"\1\73\1\74\1\75\1\76\1\77\1\100\1\101\1\12"+
"\1\102\1\103\1\104\1\0\1\105\1\106\1\107\1\110"+
"\1\0\1\111\1\112\1\113\1\114\1\115\1\0\1\116"+
"\1\0\1\117\2\0\1\120\1\121\1\122\1\73\2\75"+
"\1\123\2\124\1\125\1\126\1\127\1\130\1\131\1\132"+
"\1\133\2\0\1\73\2\75\1\134\1\123\2\124\1\135"+
"\1\75\1\123\1\136\1\75\1\137\4\75";
"\1\35\1\12\1\36\1\37\1\2\1\12\1\40\1\41"+
"\1\24\1\42\1\43\1\44\1\45\1\46\1\47\1\1"+
"\1\26\1\50\1\51\2\52\1\53\2\0\1\54\1\0"+
"\1\55\1\56\1\57\1\60\1\61\1\62\1\12\1\63"+
"\1\64\1\65\1\66\1\12\1\66\1\67\2\70\1\71"+
"\1\72\1\73\1\74\1\75\1\76\1\77\1\100\1\101"+
"\1\12\1\102\1\103\1\104\1\0\1\105\1\106\1\107"+
"\1\110\1\0\1\111\1\112\1\113\1\114\1\115\1\0"+
"\1\116\1\0\1\117\2\0\1\120\1\121\1\122\1\73"+
"\2\75\1\123\2\124\1\125\1\126\1\127\1\130\1\131"+
"\1\132\1\133\2\0\1\73\2\75\1\134\1\123\2\124"+
"\1\135\1\75\1\123\1\136\1\75\1\137\4\75";
private static int [] zzUnpackAction() {
int [] result = new int[152];
int [] result = new int[153];
int offset = 0;
offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
return result;
@@ -231,28 +231,29 @@ class _RegExLexer implements FlexLexer {
private static final int [] ZZ_ROWMAP = zzUnpackRowMap();
private static final String ZZ_ROWMAP_PACKED_0 =
"\0\0\0\77\0\176\0\275\0\374\0\u013b\0\u017a\0\u01b9"+
"\0\u01f8\0\u0237\0\u0276\0\u02b5\0\u02f4\0\u0333\0\u0372\0\u03b1"+
"\0\u03f0\0\u042f\0\u042f\0\u046e\0\u042f\0\u04ad\0\u042f\0\u04ec"+
"\0\u042f\0\u052b\0\u042f\0\u042f\0\u042f\0\u042f\0\u042f\0\u042f"+
"\0\u042f\0\u042f\0\u042f\0\u056a\0\u042f\0\u042f\0\u05a9\0\u05e8"+
"\0\u042f\0\u042f\0\u042f\0\u0627\0\u042f\0\u0666\0\u042f\0\u06a5"+
"\0\u042f\0\u042f\0\u06e4\0\u042f\0\u04ad\0\u04ec\0\u042f\0\u042f"+
"\0\u042f\0\u0723\0\u0762\0\u042f\0\u042f\0\u07a1\0\u042f\0\u042f"+
"\0\u042f\0\u07e0\0\u081f\0\u085e\0\u089d\0\u042f\0\u08dc\0\u091b"+
"\0\u042f\0\u042f\0\u042f\0\u042f\0\u042f\0\u042f\0\u042f\0\u042f"+
"\0\u042f\0\u095a\0\u0999\0\u042f\0\u042f\0\u09d8\0\u042f\0\u042f"+
"\0\u042f\0\u0a17\0\u042f\0\u0a56\0\u0a95\0\u0ad4\0\u042f\0\u042f"+
"\0\u0b13\0\u042f\0\u042f\0\u042f\0\u0b52\0\u042f\0\u042f\0\u042f"+
"\0\u042f\0\u0b91\0\u0bd0\0\u042f\0\u042f\0\u042f\0\u042f\0\u0c0f"+
"\0\u042f\0\u0c4e\0\u042f\0\u0c8d\0\u0ccc\0\u042f\0\u042f\0\u042f"+
"\0\u0d0b\0\u0d4a\0\u0d89\0\u0dc8\0\u0e07\0\u0e46\0\u042f\0\u042f"+
"\0\u042f\0\u042f\0\u042f\0\u042f\0\u042f\0\u0e85\0\u0ec4\0\u042f"+
"\0\u042f\0\u0f03\0\u042f\0\u0f42\0\u0f81\0\u042f\0\u042f\0\u0fc0"+
"\0\u0fff\0\u042f\0\u103e\0\u042f\0\u107d\0\u10bc\0\u10fb\0\u113a";
"\0\0\0\100\0\200\0\300\0\u0100\0\u0140\0\u0180\0\u01c0"+
"\0\u0200\0\u0240\0\u0280\0\u02c0\0\u0300\0\u0340\0\u0380\0\u03c0"+
"\0\u0400\0\u0440\0\u0440\0\u0480\0\u0440\0\u04c0\0\u0440\0\u0500"+
"\0\u0440\0\u0540\0\u0440\0\u0440\0\u0440\0\u0440\0\u0440\0\u0440"+
"\0\u0440\0\u0440\0\u0440\0\u0580\0\u0440\0\u0440\0\u05c0\0\u0600"+
"\0\u0440\0\u0440\0\u0440\0\u0640\0\u0440\0\u0680\0\u0440\0\u06c0"+
"\0\u0440\0\u0440\0\u0700\0\u0740\0\u0440\0\u04c0\0\u0500\0\u0440"+
"\0\u0440\0\u0440\0\u0780\0\u07c0\0\u0440\0\u0440\0\u0800\0\u0440"+
"\0\u0440\0\u0440\0\u0840\0\u0880\0\u08c0\0\u0900\0\u0440\0\u0940"+
"\0\u0980\0\u0440\0\u0440\0\u0440\0\u0440\0\u0440\0\u0440\0\u0440"+
"\0\u0440\0\u0440\0\u09c0\0\u0a00\0\u0440\0\u0440\0\u0a40\0\u0440"+
"\0\u0440\0\u0440\0\u0a80\0\u0440\0\u0ac0\0\u0b00\0\u0b40\0\u0440"+
"\0\u0440\0\u0b80\0\u0440\0\u0440\0\u0440\0\u0bc0\0\u0440\0\u0440"+
"\0\u0440\0\u0440\0\u0c00\0\u0c40\0\u0440\0\u0440\0\u0440\0\u0440"+
"\0\u0c80\0\u0440\0\u0cc0\0\u0440\0\u0d00\0\u0d40\0\u0440\0\u0440"+
"\0\u0440\0\u0d80\0\u0dc0\0\u0e00\0\u0e40\0\u0e80\0\u0ec0\0\u0440"+
"\0\u0440\0\u0440\0\u0440\0\u0440\0\u0440\0\u0440\0\u0f00\0\u0f40"+
"\0\u0440\0\u0440\0\u0f80\0\u0440\0\u0fc0\0\u1000\0\u0440\0\u0440"+
"\0\u1040\0\u1080\0\u0440\0\u10c0\0\u0440\0\u1100\0\u1140\0\u1180"+
"\0\u11c0";
private static int [] zzUnpackRowMap() {
int [] result = new int[152];
int [] result = new int[153];
int offset = 0;
offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
return result;
@@ -277,84 +278,91 @@ class _RegExLexer implements FlexLexer {
private static final String ZZ_TRANS_PACKED_0 =
"\1\22\1\23\1\24\1\25\1\26\1\27\1\30\1\31"+
"\1\32\3\22\1\33\1\22\1\34\1\35\1\36\1\37"+
"\1\40\1\41\37\22\1\42\2\22\1\42\1\22\1\42"+
"\5\22\1\43\10\22\1\44\66\22\5\45\1\46\3\45"+
"\1\40\1\41\34\22\1\42\6\22\1\42\1\22\1\42"+
"\5\22\1\43\10\22\1\44\67\22\5\45\1\46\3\45"+
"\1\47\4\45\1\34\5\45\23\47\1\45\3\47\3\45"+
"\3\47\3\45\1\47\17\45\1\46\7\45\1\50\31\45"+
"\1\50\3\45\3\50\7\45\1\51\11\45\7\52\1\53"+
"\1\54\55\52\3\0\24\52\1\55\47\52\3\0\6\52"+
"\4\22\1\26\1\22\1\56\1\57\1\60\2\22\1\61"+
"\47\22\1\62\2\22\1\62\1\22\1\62\1\22\1\63"+
"\4\22\4\52\1\26\1\52\1\30\1\52\1\32\5\52"+
"\1\64\47\52\1\42\1\22\1\42\6\52\4\45\1\65"+
"\1\45\1\66\1\45\1\32\21\45\1\67\1\45\1\67"+
"\2\45\1\67\2\45\1\67\1\45\1\67\12\45\1\67"+
"\4\45\1\67\16\45\1\65\1\45\1\66\1\45\1\32"+
"\66\45\3\70\1\71\5\70\1\72\1\70\1\73\10\70"+
"\23\72\1\70\3\72\3\70\3\72\3\70\1\72\4\70"+
"\1\74\5\70\66\14\1\75\10\14\11\70\1\76\12\70"+
"\23\76\1\70\3\76\3\70\3\76\3\70\1\76\6\70"+
"\1\77\14\70\1\76\12\70\23\76\1\70\3\76\3\70"+
"\3\76\1\70\1\100\1\70\1\76\15\70\1\71\5\70"+
"\1\76\12\70\23\76\1\70\3\76\3\70\3\76\3\70"+
"\1\76\15\70\1\71\5\70\1\76\3\70\1\50\6\70"+
"\23\76\1\50\3\76\3\50\3\76\3\70\1\76\12\70"+
"\11\101\1\47\4\101\1\34\5\101\23\47\1\101\3\47"+
"\3\101\3\47\3\101\1\47\4\101\1\102\5\101\117\0"+
"\1\103\73\0\1\104\31\0\1\104\3\0\3\104\7\0"+
"\1\105\20\0\1\106\1\107\5\0\1\110\60\0\1\111"+
"\3\112\1\113\1\114\1\115\1\111\1\115\1\116\1\111"+
"\1\117\1\120\1\111\1\115\5\112\2\121\1\122\1\123"+
"\3\124\3\125\1\126\2\127\2\130\3\131\1\116\1\132"+
"\1\133\1\134\1\135\1\136\2\132\1\137\1\140\1\141"+
"\2\111\1\120\1\116\1\111\1\120\1\111\1\120\6\111"+
"\45\0\1\142\33\0\2\47\5\0\5\47\6\0\35\47"+
"\3\0\1\47\27\0\1\50\31\0\1\50\3\0\3\50"+
"\30\0\1\143\76\0\1\144\1\145\5\0\1\110\52\0"+
"\1\146\5\0\1\111\3\112\1\113\1\114\3\115\1\116"+
"\1\111\1\117\1\120\1\111\1\115\5\112\2\121\1\122"+
"\1\123\3\124\3\125\1\126\2\127\2\130\3\131\1\116"+
"\1\132\1\133\1\134\1\135\1\136\2\132\1\137\1\140"+
"\1\141\2\111\1\120\1\116\1\111\1\120\1\111\1\120"+
"\6\111\72\0\1\147\15\0\1\72\12\0\23\72\1\0"+
"\3\72\3\0\3\72\3\0\1\72\23\0\1\73\12\0"+
"\23\73\1\0\3\73\3\0\3\73\3\0\1\73\23\0"+
"\5\76\6\0\35\76\3\0\1\76\21\0\1\150\71\0"+
"\1\151\37\0\1\152\16\0\1\153\1\154\6\0\1\155"+
"\1\0\1\155\1\156\1\157\1\160\5\0\1\161\7\0"+
"\1\104\31\0\1\104\3\0\3\104\7\0\1\105\26\0"+
"\1\162\31\0\1\162\3\0\3\162\30\0\1\106\76\0"+
"\1\163\1\164\72\0\1\165\153\0\1\166\1\167\14\0"+
"\77\170\47\0\1\171\3\0\3\171\25\0\1\172\20\0"+
"\2\173\1\0\1\173\4\0\3\173\5\0\3\173\3\0"+
"\3\173\46\0\2\174\1\0\1\174\4\0\3\174\5\0"+
"\3\174\3\0\3\174\74\0\1\175\1\176\1\175\102\0"+
"\1\177\1\200\23\0\1\201\150\0\1\202\12\0\1\203"+
"\76\0\1\204\1\205\1\0\3\206\1\0\73\206\5\0"+
"\1\161\7\0\1\162\31\0\1\162\3\0\3\162\30\0"+
"\1\163\116\0\1\207\116\0\1\210\3\0\3\210\26\0"+
"\1\211\17\0\2\212\1\0\1\212\4\0\3\212\5\0"+
"\3\212\3\0\3\212\46\0\2\213\1\0\1\213\4\0"+
"\3\213\5\0\3\213\3\0\3\213\46\0\2\214\1\0"+
"\1\214\4\0\3\214\5\0\3\214\3\0\3\214\74\0"+
"\3\215\74\0\3\216\21\0\3\206\1\217\73\206\5\0"+
"\1\124\76\0\1\213\17\0\2\220\1\0\1\220\4\0"+
"\3\220\5\0\3\220\3\0\3\220\46\0\2\221\1\0"+
"\1\221\4\0\3\221\5\0\3\221\3\0\3\221\74\0"+
"\3\222\26\0\1\213\17\0\2\223\1\0\1\223\4\0"+
"\3\223\5\0\3\223\3\0\3\223\46\0\2\224\1\0"+
"\1\224\4\0\3\224\5\0\3\224\3\0\3\224\26\0"+
"\1\213\17\0\2\225\1\0\1\225\4\0\3\225\5\0"+
"\3\225\3\0\3\225\26\0\1\213\17\0\2\226\1\0"+
"\1\226\4\0\3\226\5\0\3\226\3\0\3\226\26\0"+
"\1\213\17\0\2\227\1\0\1\227\4\0\3\227\5\0"+
"\3\227\3\0\3\227\26\0\1\213\17\0\2\230\1\0"+
"\1\230\4\0\3\230\5\0\3\230\3\0\3\230\26\0"+
"\1\211\17\0\2\230\1\0\1\230\4\0\3\230\5\0"+
"\3\230\3\0\3\230\21\0";
"\2\47\1\45\1\47\2\45\2\47\17\45\1\46\7\45"+
"\1\50\31\45\1\50\3\45\3\50\10\45\1\51\11\45"+
"\7\52\1\53\1\54\56\52\3\0\24\52\1\55\50\52"+
"\3\0\6\52\4\22\1\26\1\22\1\56\1\57\1\60"+
"\2\22\1\61\44\22\1\62\6\22\1\62\1\22\1\62"+
"\1\22\1\63\4\22\4\52\1\26\1\52\1\30\1\52"+
"\1\64\5\52\1\65\50\52\1\42\1\22\1\42\6\52"+
"\4\45\1\66\1\45\1\67\1\45\1\64\21\45\1\70"+
"\1\45\1\70\2\45\1\70\2\45\1\70\1\45\1\70"+
"\12\45\1\70\5\45\1\70\16\45\1\66\1\45\1\67"+
"\1\45\1\64\67\45\3\71\1\72\5\71\1\73\1\71"+
"\1\74\10\71\23\73\1\71\3\73\3\71\2\73\1\71"+
"\1\73\2\71\2\73\4\71\1\75\5\71\67\14\1\76"+
"\10\14\11\71\1\77\12\71\23\77\1\71\3\77\3\71"+
"\2\77\1\71\1\77\2\71\2\77\6\71\1\100\14\71"+
"\1\77\12\71\23\77\1\71\3\77\3\71\2\77\1\71"+
"\1\77\1\71\1\101\2\77\15\71\1\72\5\71\1\77"+
"\12\71\23\77\1\71\3\77\3\71\2\77\1\71\1\77"+
"\2\71\2\77\15\71\1\72\5\71\1\77\3\71\1\50"+
"\6\71\23\77\1\50\3\77\3\50\2\77\1\71\1\77"+
"\2\71\2\77\12\71\11\102\1\47\4\102\1\34\5\102"+
"\23\47\1\102\3\47\3\102\2\47\1\102\1\47\2\102"+
"\2\47\4\102\1\103\5\102\120\0\1\104\74\0\1\105"+
"\31\0\1\105\3\0\3\105\10\0\1\106\20\0\1\107"+
"\1\110\5\0\1\111\61\0\1\112\3\113\1\114\1\115"+
"\1\116\1\112\1\116\1\117\1\112\1\120\1\121\1\112"+
"\1\116\5\113\2\122\1\123\1\124\3\125\3\126\1\127"+
"\2\130\2\131\3\132\1\117\1\133\1\134\1\135\1\136"+
"\1\137\2\133\1\140\1\141\1\121\1\142\2\112\1\126"+
"\1\117\1\112\1\121\1\112\1\121\6\112\45\0\1\143"+
"\34\0\2\47\5\0\5\47\6\0\34\47\1\0\1\47"+
"\2\0\2\47\27\0\1\50\31\0\1\50\3\0\3\50"+
"\31\0\1\144\77\0\1\145\1\146\5\0\1\111\53\0"+
"\1\147\5\0\1\112\3\113\1\114\1\115\3\116\1\117"+
"\1\112\1\120\1\121\1\112\1\116\5\113\3\122\4\117"+
"\3\126\1\127\2\130\2\131\3\132\1\117\1\133\1\134"+
"\1\135\1\136\1\137\2\133\1\140\1\141\1\121\1\117"+
"\2\112\2\117\1\112\1\121\1\112\1\121\6\112\73\0"+
"\1\150\4\0\1\112\3\113\1\114\1\115\1\116\1\112"+
"\1\116\1\117\1\112\1\120\1\121\1\112\1\116\5\113"+
"\2\122\5\117\3\126\1\127\2\130\2\131\3\132\1\117"+
"\1\133\1\134\1\135\1\136\1\137\2\133\1\140\1\141"+
"\1\121\1\117\2\112\2\117\1\112\1\121\1\112\1\121"+
"\6\112\11\0\1\73\12\0\23\73\1\0\3\73\3\0"+
"\2\73\1\0\1\73\2\0\2\73\23\0\1\74\12\0"+
"\23\74\1\0\3\74\3\0\2\74\1\0\1\74\2\0"+
"\2\74\23\0\5\77\6\0\34\77\1\0\1\77\2\0"+
"\2\77\21\0\1\151\72\0\1\152\37\0\1\153\17\0"+
"\1\154\1\155\6\0\1\156\1\0\1\156\1\157\1\160"+
"\1\161\5\0\1\162\7\0\1\105\31\0\1\105\3\0"+
"\3\105\10\0\1\106\26\0\1\163\31\0\1\163\3\0"+
"\3\163\31\0\1\107\77\0\1\164\1\165\73\0\1\166"+
"\155\0\1\167\1\170\14\0\100\171\47\0\1\172\3\0"+
"\3\172\26\0\1\173\20\0\2\174\1\0\1\174\4\0"+
"\3\174\5\0\3\174\3\0\3\174\47\0\2\175\1\0"+
"\1\175\4\0\3\175\5\0\3\175\3\0\3\175\75\0"+
"\1\176\1\177\1\176\104\0\1\200\1\201\23\0\1\202"+
"\152\0\1\203\12\0\1\204\77\0\1\205\1\206\1\0"+
"\3\207\1\0\74\207\5\0\1\162\7\0\1\163\31\0"+
"\1\163\3\0\3\163\31\0\1\164\117\0\1\210\117\0"+
"\1\211\3\0\3\211\27\0\1\212\17\0\2\213\1\0"+
"\1\213\4\0\3\213\5\0\3\213\3\0\3\213\47\0"+
"\2\214\1\0\1\214\4\0\3\214\5\0\3\214\3\0"+
"\3\214\47\0\2\215\1\0\1\215\4\0\3\215\5\0"+
"\3\215\3\0\3\215\75\0\3\216\75\0\3\217\22\0"+
"\3\207\1\220\74\207\5\0\1\125\77\0\1\214\17\0"+
"\2\221\1\0\1\221\4\0\3\221\5\0\3\221\3\0"+
"\3\221\47\0\2\222\1\0\1\222\4\0\3\222\5\0"+
"\3\222\3\0\3\222\75\0\3\223\27\0\1\214\17\0"+
"\2\224\1\0\1\224\4\0\3\224\5\0\3\224\3\0"+
"\3\224\47\0\2\225\1\0\1\225\4\0\3\225\5\0"+
"\3\225\3\0\3\225\27\0\1\214\17\0\2\226\1\0"+
"\1\226\4\0\3\226\5\0\3\226\3\0\3\226\27\0"+
"\1\214\17\0\2\227\1\0\1\227\4\0\3\227\5\0"+
"\3\227\3\0\3\227\27\0\1\214\17\0\2\230\1\0"+
"\1\230\4\0\3\230\5\0\3\230\3\0\3\230\27\0"+
"\1\214\17\0\2\231\1\0\1\231\4\0\3\231\5\0"+
"\3\231\3\0\3\231\27\0\1\212\17\0\2\231\1\0"+
"\1\231\4\0\3\231\5\0\3\231\3\0\3\231\22\0";
private static int [] zzUnpackTrans() {
int [] result = new int[4473];
int [] result = new int[4608];
int offset = 0;
offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
return result;
@@ -394,7 +402,7 @@ class _RegExLexer implements FlexLexer {
private static final String ZZ_ATTRIBUTE_PACKED_0 =
"\13\0\1\1\5\0\2\11\1\1\1\11\1\1\1\11"+
"\1\1\1\11\1\1\11\11\1\1\2\11\2\1\3\11"+
"\1\1\1\11\1\1\1\11\1\1\2\11\1\1\1\11"+
"\1\1\1\11\1\1\1\11\1\1\2\11\2\1\1\11"+
"\2\1\3\11\2\1\2\11\1\1\3\11\2\1\2\0"+
"\1\11\1\0\1\1\11\11\2\1\2\11\1\1\3\11"+
"\1\1\1\11\3\1\2\11\1\1\3\11\1\0\4\11"+
@@ -403,7 +411,7 @@ class _RegExLexer implements FlexLexer {
"\2\11\2\1\1\11\1\1\1\11\4\1";
private static int [] zzUnpackAttribute() {
int [] result = new int[152];
int [] result = new int[153];
int offset = 0;
offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
return result;
@@ -996,7 +1004,7 @@ class _RegExLexer implements FlexLexer {
}
case 148: break;
case 54:
{ return yystate() != CLASS2 ? RegExpTT.BOUNDARY : RegExpTT.ESC_CHARACTER;
{ return RegExpTT.BOUNDARY;
}
case 149: break;
case 55:

View File

@@ -368,7 +368,6 @@ public class RegExpParser implements PsiParser {
}
}
else if (type == StringEscapesTokenTypes.INVALID_CHARACTER_ESCAPE_TOKEN) {
builder.error("Illegal/unsupported escape sequence");
builder.advanceLexer();
marker.done(RegExpElementTypes.CHAR);
}

View File

@@ -123,7 +123,7 @@ META2= {DOT} | "$" | "?" | "*" | "+" | "|" | {LBRACE} | {LPAREN} | {RPAREN}
CONTROL="t" | "n" | "r" | "f" | "a" | "e"
BOUNDARY="b" | "b{g}"| "B" | "A" | "z" | "Z" | "G"
CLASS="w" | "W" | "s" | "S" | "d" | "D" | "v" | "V" | "X" | "R"
CLASS="w" | "W" | "s" | "S" | "d" | "D" | "v" | "V" | "X"
XML_CLASS="c" | "C" | "i" | "I"
PROP="p" | "P"
TRANSFORMATION= "l" | "L" | "U" | "E"
@@ -152,9 +152,9 @@ HEX_CHAR=[0-9a-fA-F]
{ESCAPE} "u" {HEX_CHAR}{1,3} { return StringEscapesTokenTypes.INVALID_UNICODE_ESCAPE_TOKEN; }
/* octal escapes */
{ESCAPE} "0" [0-7][0-7]? { return RegExpTT.OCT_CHAR; }
{ESCAPE} "0" [0-7]{1,2} { return RegExpTT.OCT_CHAR; }
/* no more than decimal 255 */
{ESCAPE} "0" [0-3][0-7][0-7] { if (allowOctalNoLeadingZero) yypushback(1); return RegExpTT.OCT_CHAR; }
{ESCAPE} "0" [0-3][0-7]{2} { if (allowOctalNoLeadingZero) yypushback(1); return RegExpTT.OCT_CHAR; }
{ESCAPE} "0" { return (allowOctalNoLeadingZero ? RegExpTT.OCT_CHAR : RegExpTT.BAD_OCT_VALUE); }
/* single character after "\c" */
@@ -212,28 +212,30 @@ HEX_CHAR=[0-9a-fA-F]
{ESCAPE} {META1} { return RegExpTT.ESC_CHARACTER; }
{ESCAPE} {META2} { return (yystate() == CLASS2) ? RegExpTT.REDUNDANT_ESCAPE : RegExpTT.ESC_CHARACTER; }
{ESCAPE} {CLASS} { return RegExpTT.CHAR_CLASS; }
{ESCAPE} "R" { return RegExpTT.CHAR_CLASS; }
{ESCAPE} {PROP} { yypushstate(PROP); return RegExpTT.PROPERTY; }
{ESCAPE} {BOUNDARY} { return yystate() != CLASS2 ? RegExpTT.BOUNDARY : RegExpTT.ESC_CHARACTER; }
{ESCAPE} {CONTROL} { return RegExpTT.ESC_CTRL_CHARACTER; }
{ESCAPE} [hH] { return (allowHexDigitClass || allowHorizontalWhitespaceClass ? RegExpTT.CHAR_CLASS : StringEscapesTokenTypes.INVALID_CHARACTER_ESCAPE_TOKEN); }
{ESCAPE} "N" { yypushstate(NAMED); return RegExpTT.NAMED_CHARACTER; }
{ESCAPE} "k<" { yybegin(NAMED_GROUP); return RegExpTT.RUBY_NAMED_GROUP_REF; }
{ESCAPE} "k'" { yybegin(QUOTED_NAMED_GROUP); return RegExpTT.RUBY_QUOTED_NAMED_GROUP_REF; }
{ESCAPE} "g<" { yybegin(NAMED_GROUP); return RegExpTT.RUBY_NAMED_GROUP_CALL; }
{ESCAPE} "g'" { yybegin(QUOTED_NAMED_GROUP); return RegExpTT.RUBY_QUOTED_NAMED_GROUP_CALL; }
{ESCAPE} {TRANSFORMATION} { return allowTransformationEscapes ? RegExpTT.CHAR_CLASS : StringEscapesTokenTypes.INVALID_CHARACTER_ESCAPE_TOKEN; }
{ESCAPE} [:letter:] { return StringEscapesTokenTypes.INVALID_CHARACTER_ESCAPE_TOKEN; }
{ESCAPE} [hH] { return (allowHexDigitClass || allowHorizontalWhitespaceClass ? RegExpTT.CHAR_CLASS : StringEscapesTokenTypes.INVALID_CHARACTER_ESCAPE_TOKEN); }
{ESCAPE} "N" { yypushstate(NAMED); return RegExpTT.NAMED_CHARACTER; }
{ESCAPE} {TRANSFORMATION} { return allowTransformationEscapes ? RegExpTT.CHAR_CLASS : StringEscapesTokenTypes.INVALID_CHARACTER_ESCAPE_TOKEN; }
{ESCAPE} [\n\b\t\r\f ] { return commentMode ? RegExpTT.CHARACTER : RegExpTT.REDUNDANT_ESCAPE; }
<CLASS2> {
{ESCAPE} {RBRACKET} { return RegExpTT.ESC_CHARACTER; }
{ESCAPE} {RBRACKET} { return RegExpTT.ESC_CHARACTER; }
{ESCAPE} "b" { return RegExpTT.ESC_CTRL_CHARACTER; } /* = backspace inside character class under python, ruby, javascript */
}
{ESCAPE} {ANY} { return RegExpTT.REDUNDANT_ESCAPE; }
<YYINITIAL> {
{ESCAPE} "k<" { yybegin(NAMED_GROUP); return RegExpTT.RUBY_NAMED_GROUP_REF; }
{ESCAPE} "k'" { yybegin(QUOTED_NAMED_GROUP); return RegExpTT.RUBY_QUOTED_NAMED_GROUP_REF; }
{ESCAPE} "g<" { yybegin(NAMED_GROUP); return RegExpTT.RUBY_NAMED_GROUP_CALL; }
{ESCAPE} "g'" { yybegin(QUOTED_NAMED_GROUP); return RegExpTT.RUBY_QUOTED_NAMED_GROUP_CALL; }
{ESCAPE} "R" { return RegExpTT.CHAR_CLASS; }
{ESCAPE} {BOUNDARY} { return RegExpTT.BOUNDARY; }
}
{ESCAPE} [:letter:] { return StringEscapesTokenTypes.INVALID_CHARACTER_ESCAPE_TOKEN; }
{ESCAPE} {ANY} { return RegExpTT.REDUNDANT_ESCAPE; }
{ESCAPE} { return StringEscapesTokenTypes.INVALID_CHARACTER_ESCAPE_TOKEN; }

View File

@@ -24,6 +24,8 @@ import com.intellij.lang.annotation.Annotator;
import com.intellij.openapi.util.TextRange;
import com.intellij.psi.PsiComment;
import com.intellij.psi.PsiElement;
import com.intellij.psi.StringEscapesTokenTypes;
import com.intellij.psi.tree.IElementType;
import com.intellij.psi.util.PsiTreeUtil;
import com.intellij.util.containers.ContainerUtil;
import org.intellij.lang.regexp.RegExpLanguageHosts;
@@ -181,40 +183,44 @@ public final class RegExpAnnotator extends RegExpElementVisitor implements Annot
@Override
public void visitRegExpChar(final RegExpChar ch) {
final Character value = ch.getValue();
if (value == null || (value == '\b' && !myLanguageHosts.supportsLiteralBackspace(ch))) {
switch (ch.getType()) {
case CHAR:
myHolder.createErrorAnnotation(ch, "Illegal/unsupported escape sequence");
break;
case HEX:
myHolder.createErrorAnnotation(ch, "Illegal hexadecimal escape sequence");
break;
case OCT:
myHolder.createErrorAnnotation(ch, "Illegal octal escape sequence");
break;
case UNICODE:
myHolder.createErrorAnnotation(ch, "Illegal unicode escape sequence");
break;
case INVALID:
// produces a parser error. already handled by IDEA and possibly suppressed by IntelliLang
break;
final PsiElement child = ch.getFirstChild();
IElementType type = child.getNode().getElementType();
if (type == StringEscapesTokenTypes.INVALID_CHARACTER_ESCAPE_TOKEN) {
myHolder.createErrorAnnotation(ch, "Illegal/unsupported escape sequence");
return;
}
else if (type == RegExpTT.BAD_HEX_VALUE) {
myHolder.createErrorAnnotation(ch, "Illegal hexadecimal escape sequence");
return;
}
else if (type == RegExpTT.BAD_OCT_VALUE) {
myHolder.createErrorAnnotation(ch, "Illegal octal escape sequence");
return;
}
else if (type == StringEscapesTokenTypes.INVALID_UNICODE_ESCAPE_TOKEN) {
myHolder.createErrorAnnotation(ch, "Illegal unicode escape sequence");
return;
}
final String text = ch.getUnescapedText();
if (type == RegExpTT.ESC_CTRL_CHARACTER && text.equals("\\b") && !myLanguageHosts.supportsLiteralBackspace(ch)) {
myHolder.createErrorAnnotation(ch, "Illegal/unsupported escape sequence");
}
if (text.startsWith("\\") && myLanguageHosts.isRedundantEscape(ch, text)) {
final ASTNode astNode = ch.getNode().getFirstChildNode();
if (astNode != null && astNode.getElementType() == RegExpTT.REDUNDANT_ESCAPE) {
final Annotation a = myHolder.createWeakWarningAnnotation(ch, "Redundant character escape");
registerFix(a, new RemoveRedundantEscapeAction(ch));
}
}
else {
final String text = ch.getUnescapedText();
if (text.startsWith("\\") && myLanguageHosts.isRedundantEscape(ch, text)) {
final ASTNode astNode = ch.getNode().getFirstChildNode();
if (astNode != null && astNode.getElementType() == RegExpTT.REDUNDANT_ESCAPE) {
final Annotation a = myHolder.createWeakWarningAnnotation(ch, "Redundant character escape");
registerFix(a, new RemoveRedundantEscapeAction(ch));
}
final RegExpChar.Type charType = ch.getType();
if (charType == RegExpChar.Type.HEX || charType == RegExpChar.Type.UNICODE) {
if (ch.getValue() == null) {
myHolder.createErrorAnnotation(ch, "Illegal unicode escape sequence");
return;
}
if (ch.getType() == RegExpChar.Type.HEX) {
if (text.charAt(text.length() - 1) == '}') {
if (!myLanguageHosts.supportsExtendedHexCharacter(ch)) {
myHolder.createErrorAnnotation(ch, "This hex character syntax is not supported");
}
if (text.charAt(text.length() - 1) == '}') {
if (!myLanguageHosts.supportsExtendedHexCharacter(ch)) {
myHolder.createErrorAnnotation(ch, "This hex character syntax is not supported");
}
}
}

View File

@@ -345,6 +345,52 @@ public class RegExpLexerTest extends LexerTestCase {
"CLASS_END (']')", lexer);
}
public void testBoundaries() {
final RegExpLexer lexer = new RegExpLexer(EnumSet.noneOf(RegExpCapability.class));
doTest("\\b\\b{g}\\B\\A\\z\\Z\\G[\\b\\b{g}\\B\\A\\z\\Z\\G]", "BOUNDARY ('\\b')\n" +
"BOUNDARY ('\\b{g}')\n" +
"BOUNDARY ('\\B')\n" +
"BOUNDARY ('\\A')\n" +
"BOUNDARY ('\\z')\n" +
"BOUNDARY ('\\Z')\n" +
"BOUNDARY ('\\G')\n" +
"CLASS_BEGIN ('[')\n" +
"ESC_CTRL_CHARACTER ('\\b')\n" +
"ESC_CTRL_CHARACTER ('\\b')\n" +
"CHARACTER ('{')\n" +
"CHARACTER ('g')\n" +
"CHARACTER ('}')\n" +
"INVALID_CHARACTER_ESCAPE_TOKEN ('\\B')\n" +
"INVALID_CHARACTER_ESCAPE_TOKEN ('\\A')\n" +
"INVALID_CHARACTER_ESCAPE_TOKEN ('\\z')\n" +
"INVALID_CHARACTER_ESCAPE_TOKEN ('\\Z')\n" +
"INVALID_CHARACTER_ESCAPE_TOKEN ('\\G')\n" +
"CLASS_END (']')", lexer);
}
public void testEscapesInsideCharClass() {
final RegExpLexer lexer = new RegExpLexer(EnumSet.noneOf(RegExpCapability.class));
doTest("[\\k<a> (?<t>t)\\g'q'\\R]", "CLASS_BEGIN ('[')\n" +
"INVALID_CHARACTER_ESCAPE_TOKEN ('\\k')\n" +
"CHARACTER ('<')\n" +
"CHARACTER ('a')\n" +
"CHARACTER ('>')\n" +
"CHARACTER (' ')\n" +
"CHARACTER ('(')\n" +
"CHARACTER ('?')\n" +
"CHARACTER ('<')\n" +
"CHARACTER ('t')\n" +
"CHARACTER ('>')\n" +
"CHARACTER ('t')\n" +
"CHARACTER (')')\n" +
"INVALID_CHARACTER_ESCAPE_TOKEN ('\\g')\n" +
"CHARACTER (''')\n" +
"CHARACTER ('q')\n" +
"CHARACTER (''')\n" +
"INVALID_CHARACTER_ESCAPE_TOKEN ('\\R')\n" +
"CLASS_END (']')", lexer);
}
@Override
protected Lexer createLexer() {
return null;

View File

@@ -121,6 +121,22 @@
<pattern><![CDATA[<weak_warning descr="Redundant character escape">\]</weak_warning><weak_warning descr="Redundant character escape">\}</weak_warning>]]></pattern>
<expected>OK</expected>
</test>
<test verify="false">
<pattern><![CDATA[<error descr="Illegal/unsupported escape sequence">\q</error>]]></pattern>
<expected>OK</expected>
</test>
<test verify="false">
<pattern>a<![CDATA[<error descr="Illegal/unsupported escape sequence">\</error>]]></pattern>
<expected>OK</expected>
</test>
<test verify="false">
<pattern><![CDATA[<error descr="Illegal/unsupported escape sequence">\E</error>]]]]></pattern>
<expected>OK</expected>
</test>
<test>
<pattern>\x08</pattern><!-- backspace character -->
<expected>OK</expected>
</test>
</escapes>
<namedchars>

View File

@@ -4,5 +4,5 @@ REGEXP_FILE
RegExpClassImpl: <[\b]>
PsiElement(CLASS_BEGIN)('[')
RegExpCharImpl: <\b>
PsiElement(ESC_CHARACTER)('\b')
PsiElement(ESC_CTRL_CHARACTER)('\b')
PsiElement(CLASS_END)(']')

View File

@@ -2,6 +2,4 @@ REGEXP_FILE
RegExpPatternImpl: <\q>
RegExpBranchImpl: <\q>
RegExpCharImpl: <\q>
PsiErrorElement:Illegal/unsupported escape sequence
<empty list>
PsiElement(INVALID_CHARACTER_ESCAPE_TOKEN)('\q')

View File

@@ -4,6 +4,4 @@ REGEXP_FILE
RegExpCharImpl: <a>
PsiElement(CHARACTER)('a')
RegExpCharImpl: <\>
PsiErrorElement:Illegal/unsupported escape sequence
<empty list>
PsiElement(INVALID_CHARACTER_ESCAPE_TOKEN)('\')

View File

@@ -8,6 +8,4 @@ REGEXP_FILE
RegExpCharImpl: <c>
PsiElement(CHARACTER)('c')
RegExpCharImpl: <\>
PsiErrorElement:Illegal/unsupported escape sequence
<empty list>
PsiElement(INVALID_CHARACTER_ESCAPE_TOKEN)('\')

View File

@@ -2,6 +2,4 @@ REGEXP_FILE
RegExpPatternImpl: <\E>
RegExpBranchImpl: <\E>
RegExpCharImpl: <\E>
PsiErrorElement:Illegal/unsupported escape sequence
<empty list>
PsiElement(INVALID_CHARACTER_ESCAPE_TOKEN)('\E')