WI-48209 Support PCRE regex branch reset /(?|)/

https://www.regular-expressions.info/branchreset.html

GitOrigin-RevId: 6b10c7c137c4758a20ffb6c09e1172ffc0be4613
This commit is contained in:
Kirill Smelov
2020-03-24 17:06:41 +03:00
committed by intellij-monorepo-bot
parent 0ddf56cf42
commit ba3312c584
7 changed files with 37 additions and 27 deletions

View File

@@ -213,7 +213,7 @@ class _RegExLexer implements FlexLexer {
"\1\12\1\100\1\101\1\102\1\103\1\104\1\105\1\106"+
"\1\12\1\107\1\110\1\111\1\112\1\113\1\114\1\115"+
"\1\116\1\117\1\120\1\121\1\122\1\12\1\123\1\124"+
"\1\0\1\125\2\26\1\126\2\61\1\127\1\0\1\130"+
"\1\0\1\125\2\26\1\126\2\61\1\127\1\130\1\0"+
"\1\131\1\132\1\133\1\134\1\0\1\135\1\136\1\137"+
"\1\140\2\0\1\141\6\0\1\142\1\143\1\144\1\145"+
"\2\146\1\101\1\147\1\150\1\151\1\152\1\153\1\154"+
@@ -265,7 +265,7 @@ class _RegExLexer implements FlexLexer {
"\0\u0e85\0\u052b\0\u052b\0\u052b\0\u052b\0\u052b\0\u052b\0\u052b"+
"\0\u052b\0\u052b\0\u052b\0\u052b\0\u052b\0\u0ec4\0\u052b\0\u052b"+
"\0\u0f03\0\u052b\0\u0f42\0\u052b\0\u052b\0\u052b\0\u0f81\0\u052b"+
"\0\u0fc0\0\u0fff\0\u052b\0\u052b\0\u052b\0\u052b\0\u103e\0\u052b"+
"\0\u052b\0\u0fc0\0\u0fff\0\u052b\0\u052b\0\u052b\0\u103e\0\u052b"+
"\0\u052b\0\u052b\0\u052b\0\u107d\0\u10bc\0\u052b\0\u10fb\0\u113a"+
"\0\u1179\0\u11b8\0\u11f7\0\u1236\0\u1275\0\u052b\0\u052b\0\u052b"+
"\0\u12b4\0\u12f3\0\u1332\0\u052b\0\u1371\0\u052b\0\u13b0\0\u052b"+
@@ -366,24 +366,24 @@ class _RegExLexer implements FlexLexer {
"\32\53\1\0\2\53\1\0\1\53\2\0\2\53\1\201"+
"\3\0\1\53\12\0\1\205\57\0\1\201\14\0\1\206"+
"\17\0\2\207\1\0\1\207\4\0\3\207\5\0\4\207"+
"\3\0\2\207\23\0\1\210\37\0\1\211\20\0\1\212"+
"\1\213\2\0\1\214\1\215\1\216\1\217\1\0\1\220"+
"\1\221\6\0\1\222\76\0\1\223\2\0\1\224\4\0"+
"\1\127\31\0\2\127\3\0\2\127\1\225\25\0\1\226"+
"\2\0\1\227\4\0\1\230\31\0\2\230\3\0\2\230"+
"\36\0\1\231\31\0\2\231\3\0\2\231\1\232\24\0"+
"\1\233\76\0\1\234\43\0\1\235\3\0\2\235\5\0"+
"\1\236\1\237\12\0\77\240\47\0\1\241\4\0\1\242"+
"\1\241\70\0\2\243\3\0\2\243\25\0\1\244\20\0"+
"\2\245\1\0\1\245\4\0\3\245\5\0\4\245\3\0"+
"\2\245\25\0\1\246\20\0\2\247\1\0\1\247\4\0"+
"\3\247\5\0\4\247\3\0\2\247\104\0\1\250\1\251"+
"\16\0\1\234\43\0\1\235\3\0\2\235\30\0\1\252"+
"\100\0\1\203\1\0\1\203\1\0\1\204\6\0\23\203"+
"\2\204\3\203\2\204\1\0\2\203\1\0\1\203\2\0"+
"\2\203\4\0\1\203\10\0\1\253\17\0\2\207\1\0"+
"\1\207\4\0\3\207\5\0\4\207\3\0\2\207\104\0"+
"\1\254\3\0\1\255\4\0\1\216\71\0\1\256\5\0"+
"\3\0\2\207\23\0\1\210\17\0\1\211\17\0\1\212"+
"\20\0\1\213\1\214\2\0\1\215\1\216\1\0\1\217"+
"\1\0\1\220\1\221\6\0\1\222\76\0\1\223\2\0"+
"\1\224\4\0\1\127\31\0\2\127\3\0\2\127\1\225"+
"\25\0\1\226\2\0\1\227\4\0\1\230\31\0\2\230"+
"\3\0\2\230\36\0\1\231\31\0\2\231\3\0\2\231"+
"\1\232\24\0\1\233\76\0\1\234\43\0\1\235\3\0"+
"\2\235\5\0\1\236\1\237\12\0\77\240\47\0\1\241"+
"\4\0\1\242\1\241\70\0\2\243\3\0\2\243\25\0"+
"\1\244\20\0\2\245\1\0\1\245\4\0\3\245\5\0"+
"\4\245\3\0\2\245\25\0\1\246\20\0\2\247\1\0"+
"\1\247\4\0\3\247\5\0\4\247\3\0\2\247\104\0"+
"\1\250\1\251\16\0\1\234\43\0\1\235\3\0\2\235"+
"\30\0\1\252\100\0\1\203\1\0\1\203\1\0\1\204"+
"\6\0\23\203\2\204\3\203\2\204\1\0\2\203\1\0"+
"\1\203\2\0\2\203\4\0\1\203\10\0\1\253\17\0"+
"\2\207\1\0\1\207\4\0\3\207\5\0\4\207\3\0"+
"\2\207\104\0\1\254\3\0\1\255\76\0\1\256\5\0"+
"\1\257\1\0\3\260\1\0\73\260\5\0\1\261\76\0"+
"\1\223\2\0\1\224\4\0\1\225\31\0\2\225\3\0"+
"\2\225\26\0\1\262\76\0\1\263\2\0\1\264\4\0"+
@@ -452,7 +452,7 @@ class _RegExLexer implements FlexLexer {
"\1\1\3\11\2\1\1\11\1\1\4\11\11\1\3\0"+
"\3\11\1\1\4\11\2\1\2\11\1\1\4\11\2\1"+
"\1\11\2\1\2\11\1\1\14\11\1\1\2\11\1\0"+
"\1\11\1\1\3\11\1\1\1\11\1\0\1\1\4\11"+
"\1\11\1\1\3\11\1\1\2\11\1\0\1\1\3\11"+
"\1\0\4\11\2\0\1\11\6\0\1\1\3\11\3\1"+
"\1\11\1\1\1\11\1\1\10\11\1\0\3\11\1\0"+
"\1\11\1\0\1\11\4\0\2\1\3\11\1\1\4\11"+
@@ -1345,27 +1345,27 @@ class _RegExLexer implements FlexLexer {
// fall through
case 212: break;
case 88:
{ yybegin(NAMED_GROUP); capturingGroupCount++; return RegExpTT.RUBY_NAMED_GROUP;
{ return RegExpTT.PCRE_BRANCH_RESET;
}
// fall through
case 213: break;
case 89:
{ yybegin(QUOTED_NAMED_GROUP); capturingGroupCount++; return RegExpTT.RUBY_QUOTED_NAMED_GROUP;
{ yybegin(NAMED_GROUP); capturingGroupCount++; return RegExpTT.RUBY_NAMED_GROUP;
}
// fall through
case 214: break;
case 90:
{ return RegExpTT.POS_LOOKAHEAD;
{ yybegin(QUOTED_NAMED_GROUP); capturingGroupCount++; return RegExpTT.RUBY_QUOTED_NAMED_GROUP;
}
// fall through
case 215: break;
case 91:
{ return RegExpTT.NON_CAPT_GROUP;
{ return RegExpTT.POS_LOOKAHEAD;
}
// fall through
case 216: break;
case 92:
{ yybegin(NAMED_GROUP); return RegExpTT.PCRE_RECURSIVE_NAMED_GROUP_REF;
{ return RegExpTT.NON_CAPT_GROUP;
}
// fall through
case 217: break;

View File

@@ -34,6 +34,7 @@ public class RegExpBraceMatcher implements PairedBraceMatcher {
new BracePair(RegExpTT.PYTHON_NAMED_GROUP, RegExpTT.GROUP_END, true),
new BracePair(RegExpTT.PYTHON_NAMED_GROUP_REF, RegExpTT.GROUP_END, true),
new BracePair(RegExpTT.PCRE_COND_REF, RegExpTT.GROUP_END, true),
new BracePair(RegExpTT.PCRE_BRANCH_RESET, RegExpTT.GROUP_END, true),
new BracePair(RegExpTT.RUBY_NAMED_GROUP, RegExpTT.GROUP_END, true),
new BracePair(RegExpTT.RUBY_QUOTED_NAMED_GROUP, RegExpTT.GROUP_END, true),
new BracePair(RegExpTT.RUBY_NAMED_GROUP_REF, RegExpTT.GT, true),

View File

@@ -109,6 +109,7 @@ public class RegExpHighlighter extends SyntaxHighlighterBase {
ourMap.put(RegExpTT.POS_LOOKBEHIND, PARENTHS);
ourMap.put(RegExpTT.NEG_LOOKBEHIND, PARENTHS);
ourMap.put(RegExpTT.POS_LOOKAHEAD, PARENTHS);
ourMap.put(RegExpTT.PCRE_BRANCH_RESET, PARENTHS);
ourMap.put(RegExpTT.NEG_LOOKAHEAD, PARENTHS);
ourMap.put(RegExpTT.SET_OPTIONS, PARENTHS);
ourMap.put(RegExpTT.PYTHON_NAMED_GROUP, PARENTHS);

View File

@@ -147,6 +147,8 @@ public interface RegExpTT {
IElementType PYTHON_COND_REF = new RegExpElementType("PYTHON_COND_REF");
/** (?(condition pattern)yes-pattern|no-pattern) */
IElementType PCRE_COND_REF = new RegExpElementType("PCRE_COND_REF");
/** (?|regex) */
IElementType PCRE_BRANCH_RESET = new RegExpElementType("PCRE_BRANCH_RESET");
/** (?<name>... */
IElementType RUBY_NAMED_GROUP = new RegExpElementType("RUBY_NAMED_GROUP");
/** \k<name> */
@@ -176,7 +178,7 @@ public interface RegExpTT {
TokenSet QUANTIFIERS = TokenSet.create(QUEST, PLUS, STAR, LBRACE);
TokenSet GROUPS = TokenSet.create(GROUP_BEGIN, NON_CAPT_GROUP, ATOMIC_GROUP, POS_LOOKAHEAD, NEG_LOOKAHEAD, POS_LOOKBEHIND, NEG_LOOKBEHIND);
TokenSet GROUPS = TokenSet.create(GROUP_BEGIN, NON_CAPT_GROUP, ATOMIC_GROUP, POS_LOOKAHEAD, NEG_LOOKAHEAD, POS_LOOKBEHIND, NEG_LOOKBEHIND, PCRE_BRANCH_RESET);
TokenSet BOUNDARIES = TokenSet.create(BOUNDARY, CARET, DOLLAR);
}

View File

@@ -71,6 +71,8 @@ public interface RegExpGroup extends RegExpAtom, PsiNamedElement {
/** (?<!pattern) */
NEGATIVE_LOOKBEHIND,
/** (?|pattern) */
PCRE_BRANCH_RESET,
/** (?i:pattern) */
OPTIONS

View File

@@ -106,6 +106,9 @@ public class RegExpGroupImpl extends RegExpElementImpl implements RegExpGroup {
else if (elementType == RegExpTT.NEG_LOOKBEHIND) {
return Type.NEGATIVE_LOOKBEHIND;
}
else if (elementType == RegExpTT.PCRE_BRANCH_RESET) {
return Type.PCRE_BRANCH_RESET;
}
throw new AssertionError();
}

View File

@@ -429,6 +429,7 @@ BACK_REFERENCES_GROUP = [1-9][0-9]{0,2}
"(?P<" { yybegin(NAMED_GROUP); capturingGroupCount++; return RegExpTT.PYTHON_NAMED_GROUP; }
"(?P=" { yybegin(PY_NAMED_GROUP_REF); return RegExpTT.PYTHON_NAMED_GROUP_REF; }
"(?(" { yybegin(PY_COND_REF); return RegExpTT.PYTHON_COND_REF; }
"(?|" { return RegExpTT.PCRE_BRANCH_RESET; }
"(?<" { yybegin(NAMED_GROUP); capturingGroupCount++; return RegExpTT.RUBY_NAMED_GROUP; }
"(?'" { yybegin(QUOTED_NAMED_GROUP); capturingGroupCount++; return RegExpTT.RUBY_QUOTED_NAMED_GROUP; }