WI-60369 Support an ability to call a PCRE numbered group

GitOrigin-RevId: b58439eda68ea2df047c54330a0c6fe3bbc7946b
This commit is contained in:
Filippova Maria
2022-07-21 16:48:30 +02:00
committed by intellij-monorepo-bot
parent 7144d5063c
commit 2a0ffa17ff
14 changed files with 555 additions and 446 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -62,6 +62,7 @@ error.unknown.property.name=Unknown property name
error.unknown.property.value=Unknown property value
error.unresolved.back.reference=Unresolved back reference
error.unresolved.named.group.reference=Unresolved named group reference
error.unresolved.numbered.group.reference=Unresolved numbered group reference
filetype.regular.expression.description=Regular expression
inspection.group.name.regexp=RegExp
inspection.name.anonymous.group.or.numeric.back.reference=Anonymous capturing group or numeric back reference
@@ -123,6 +124,7 @@ parse.error.closing.brace.or.number.expected='}' or number expected
parse.error.comma.expected=',' expected
parse.error.empty.property=Empty property
parse.error.group.name.expected=Group name expected
parse.error.group.number.expected=Group number expected
parse.error.group.name.or.number.expected=Group name or number expected
parse.error.illegal.category.shorthand=Illegal category shorthand
parse.error.illegal.character.range=Illegal character range

View File

@@ -21,6 +21,7 @@ public class RegExpBraceMatcher implements PairedBraceMatcher {
new BracePair(RegExpTT.PYTHON_NAMED_GROUP_REF, RegExpTT.GROUP_END, true),
new BracePair(RegExpTT.PCRE_RECURSIVE_NAMED_GROUP_REF, RegExpTT.GROUP_END, true),
new BracePair(RegExpTT.CONDITIONAL, RegExpTT.GROUP_END, true),
new BracePair(RegExpTT.PCRE_NUMBERED_GROUP_REF, RegExpTT.GROUP_END, true),
new BracePair(RegExpTT.PCRE_BRANCH_RESET, RegExpTT.GROUP_END, true),
new BracePair(RegExpTT.RUBY_NAMED_GROUP, RegExpTT.GROUP_END, true),
new BracePair(RegExpTT.RUBY_QUOTED_NAMED_GROUP, RegExpTT.GROUP_END, true),

View File

@@ -118,6 +118,11 @@ public enum RegExpCapability {
*/
PCRE_BACK_REFERENCES,
/**
* (?group_id)
*/
PCRE_NUMBERED_GROUP_REF,
/**
* Allow PCRE conditions DEFINE and VERSION[>]?=n.m in conditional groups
*/

View File

@@ -117,6 +117,7 @@ public class RegExpHighlighter extends SyntaxHighlighterBase {
ourMap.put(RegExpTT.PYTHON_NAMED_GROUP_REF, PARENTHS);
ourMap.put(RegExpTT.PCRE_RECURSIVE_NAMED_GROUP_REF, PARENTHS);
ourMap.put(RegExpTT.CONDITIONAL, PARENTHS);
ourMap.put(RegExpTT.PCRE_NUMBERED_GROUP_REF, PARENTHS);
ourMap.put(RegExpTT.RUBY_NAMED_GROUP, PARENTHS);
ourMap.put(RegExpTT.RUBY_QUOTED_NAMED_GROUP, PARENTHS);
ourMap.put(RegExpTT.GROUP_BEGIN, PARENTHS);

View File

@@ -408,6 +408,9 @@ public class RegExpParser implements PsiParser, LightPsiParser {
else if (type == RegExpTT.PYTHON_NAMED_GROUP_REF || type == RegExpTT.PCRE_RECURSIVE_NAMED_GROUP_REF) {
parseNamedGroupRef(builder, marker, RegExpTT.GROUP_END);
}
else if (type == RegExpTT.PCRE_NUMBERED_GROUP_REF) {
parseNumberedGroupRef(builder, marker);
}
else if (type == RegExpTT.RUBY_NAMED_GROUP_REF || type == RegExpTT.RUBY_NAMED_GROUP_CALL) {
parseNamedGroupRef(builder, marker, RegExpTT.GT);
}
@@ -522,6 +525,13 @@ public class RegExpParser implements PsiParser, LightPsiParser {
marker.done(RegExpElementTypes.NAMED_GROUP_REF);
}
private static void parseNumberedGroupRef(PsiBuilder builder, PsiBuilder.Marker marker) {
builder.advanceLexer();
checkMatches(builder, RegExpTT.NUMBER, RegExpBundle.message("parse.error.group.number.expected"));
checkMatches(builder, RegExpTT.GROUP_END, RegExpBundle.message("parse.error.unclosed.group.reference"));
marker.done(RegExpElementTypes.NAMED_GROUP_REF);
}
private static boolean isLetter(CharSequence text) {
if (text == null) return false;
assert text.length() == 1;

View File

@@ -143,6 +143,8 @@ public interface RegExpTT {
IElementType PYTHON_NAMED_GROUP = new RegExpElementType("PYTHON_NAMED_GROUP");
/** (?P>name) or (?&name) */
IElementType PCRE_RECURSIVE_NAMED_GROUP_REF = new RegExpElementType("PCRE_RECURSIVE_NAMED_GROUP");
/** (?group id) */
IElementType PCRE_NUMBERED_GROUP_REF = new RegExpElementType("PCRE_NUMBERED_GROUP_REF");
/** (?P=name) */
IElementType PYTHON_NAMED_GROUP_REF = new RegExpElementType("PYTHON_NAMED_GROUP_REF");
/** (?(id/name/lookaround)yes-pattern|no-pattern) */

View File

@@ -74,7 +74,8 @@ final class RegExpEquivalenceChecker {
private static boolean areNamedGroupRefsEquivalent(RegExpNamedGroupRef namedGroupRef1, RegExpNamedGroupRef namedGroupRef2) {
final String name = namedGroupRef1.getGroupName();
return name != null && name.equals(namedGroupRef2.getGroupName());
if (name != null) return name.equals(namedGroupRef2.getGroupName());
return namedGroupRef1.getText().equals(namedGroupRef2.getText());
}
private static boolean areIntersectionsEquivalent(RegExpIntersection intersection1, RegExpIntersection intersection2) {

View File

@@ -12,5 +12,6 @@ public interface RegExpNamedGroupRef extends RegExpAtom {
boolean isPythonNamedGroupRef();
boolean isRubyNamedGroupRef();
boolean isPcreNumberedGroupRef();
boolean isNamedGroupRef();
}

View File

@@ -9,6 +9,7 @@ import com.intellij.psi.PsiReference;
import com.intellij.psi.SyntaxTraverser;
import com.intellij.psi.tree.TokenSet;
import com.intellij.util.IncorrectOperationException;
import java.util.List;
import java.util.Objects;
import org.intellij.lang.regexp.RegExpTT;
import org.intellij.lang.regexp.psi.RegExpElementVisitor;
@@ -35,6 +36,18 @@ public class RegExpNamedGroupRefImpl extends RegExpElementImpl implements RegExp
@Override
@Nullable
public RegExpGroup resolve() {
if (isPcreNumberedGroupRef()) {
Integer groupNumber;
ASTNode node = getNode().findChildByType(RegExpTT.NUMBER);
if (node == null) return null;
try {
groupNumber = Integer.parseInt(node.getText());
}
catch (NumberFormatException e) {
groupNumber = null;
}
return groupNumber == null ? null : resolveNumberedGroupRef(groupNumber, getContainingFile());
}
final String groupName = getGroupName();
return groupName == null ? null : resolve(groupName, getContainingFile());
}
@@ -46,6 +59,19 @@ public class RegExpNamedGroupRefImpl extends RegExpElementImpl implements RegExp
.first();
}
@Nullable
static RegExpGroup resolveNumberedGroupRef(int groupNumber, PsiFile file) {
if (groupNumber < 1) return null;
List<RegExpGroup> groups = SyntaxTraverser.psiTraverser(file)
.filter(RegExpGroup.class)
.filter(RegExpGroup::isCapturing)
.toList();
if (groups.size() < groupNumber) {
return null;
}
return groups.get(groupNumber - 1);
}
@Override
@Nullable
public String getGroupName() {
@@ -64,6 +90,11 @@ public class RegExpNamedGroupRefImpl extends RegExpElementImpl implements RegExp
return node.findChildByType(RUBY_GROUP_REF_TOKENS) != null;
}
@Override
public boolean isPcreNumberedGroupRef() {
return getNode().findChildByType(RegExpTT.PCRE_NUMBERED_GROUP_REF) != null;
}
@Override
public boolean isNamedGroupRef() {
return getNode().findChildByType(RegExpTT.RUBY_NAMED_GROUP_REF) != null;

View File

@@ -45,6 +45,7 @@ import static org.intellij.lang.regexp.RegExpCapability.*;
private boolean allowMysqlBracketExpressions;
private boolean allowPcreBackReferences;
private boolean allowPcreConditions;
private boolean allowPcreNumberedGroupRef;
private int maxOctal = 0777;
private int minOctalDigits = 1;
private boolean whitespaceInClass;
@@ -66,6 +67,7 @@ import static org.intellij.lang.regexp.RegExpCapability.*;
this.allowTransformationEscapes = capabilities.contains(TRANSFORMATION_ESCAPES);
this.allowMysqlBracketExpressions = capabilities.contains(MYSQL_BRACKET_EXPRESSIONS);
this.allowPcreBackReferences = capabilities.contains(PCRE_BACK_REFERENCES);
this.allowPcreNumberedGroupRef = capabilities.contains(PCRE_NUMBERED_GROUP_REF);
this.allowPcreConditions = capabilities.contains(PCRE_CONDITIONS);
if (capabilities.contains(MAX_OCTAL_177)) {
maxOctal = 0177;
@@ -118,6 +120,7 @@ import static org.intellij.lang.regexp.RegExpCapability.*;
%xstate NAMED_GROUP
%xstate QUOTED_NAMED_GROUP
%xstate PY_NAMED_GROUP_REF
%xstate PCRE_NUMBERED_GROUP
%xstate BRACKET_EXPRESSION
%xstate MYSQL_CHAR_EXPRESSION
%xstate MYSQL_CHAR_EQ_EXPRESSION
@@ -448,6 +451,12 @@ BACK_REFERENCES_GROUP = [1-9][0-9]{0,2}
"(?<" { yybegin(NAMED_GROUP); capturingGroupCount++; return RegExpTT.RUBY_NAMED_GROUP; }
"(?'" { yybegin(QUOTED_NAMED_GROUP); capturingGroupCount++; return RegExpTT.RUBY_QUOTED_NAMED_GROUP; }
"(?"[+-]? / [:digit:] { if (allowPcreNumberedGroupRef) {
yybegin(PCRE_NUMBERED_GROUP);
return RegExpTT.PCRE_NUMBERED_GROUP_REF;
}
else { yypushback(yylength() - 2); yybegin(OPTIONS); return RegExpTT.SET_OPTIONS; }}
"(?" { yybegin(OPTIONS); return RegExpTT.SET_OPTIONS; }
}
@@ -479,6 +488,11 @@ BACK_REFERENCES_GROUP = [1-9][0-9]{0,2}
{ANY} { yybegin(YYINITIAL); yypushback(1); }
}
<PCRE_NUMBERED_GROUP> {
[:digit:]+ { return RegExpTT.NUMBER; }
{ANY} { yybegin(YYINITIAL); yypushback(1); }
}
<CONDITIONAL1> {
"(?=" { yybegin(YYINITIAL); return RegExpTT.POS_LOOKAHEAD; }
"(?!" { yybegin(YYINITIAL); return RegExpTT.NEG_LOOKAHEAD; }

View File

@@ -32,6 +32,7 @@ import com.intellij.psi.util.PsiTreeUtil;
import com.intellij.util.containers.ContainerUtil;
import org.intellij.lang.regexp.*;
import org.intellij.lang.regexp.psi.*;
import org.intellij.lang.regexp.psi.impl.RegExpNamedGroupRefImpl;
import org.intellij.lang.regexp.psi.impl.RegExpGroupImpl;
import org.jetbrains.annotations.Nls;
import org.jetbrains.annotations.NonNls;
@@ -294,7 +295,7 @@ public final class RegExpAnnotator extends RegExpElementVisitor implements Annot
.create();
return;
}
if (groupRef.getGroupName() == null) {
if (!groupRef.isPcreNumberedGroupRef() && groupRef.getGroupName() == null) {
return;
}
final RegExpGroup group = groupRef.resolve();
@@ -304,6 +305,13 @@ public final class RegExpAnnotator extends RegExpElementVisitor implements Annot
myHolder.newAnnotation(HighlightSeverity.ERROR, RegExpBundle.message("error.unresolved.named.group.reference")).range(node)
.highlightType(ProblemHighlightType.LIKE_UNKNOWN_SYMBOL).create();
}
else {
final ASTNode number = groupRef.getNode().findChildByType(RegExpTT.NUMBER);
if (number != null) {
myHolder.newAnnotation(HighlightSeverity.ERROR, RegExpBundle.message("error.unresolved.numbered.group.reference")).range(number)
.highlightType(ProblemHighlightType.LIKE_UNKNOWN_SYMBOL).create();
}
}
}
else if (PsiTreeUtil.isAncestor(group, groupRef, true)) {
myHolder.newAnnotation(HighlightSeverity.WARNING,

View File

@@ -757,6 +757,11 @@ public class RegExpLexerTest extends LexerTestCase {
"ESC_CHARACTER ('\\#')", lexer);
}
public void testNumberedGroupRef() {
final RegExpLexer lexer = new RegExpLexer(EnumSet.of(PCRE_NUMBERED_GROUP_REF));
doTest("(abcd)(?1)", null, lexer);
}
@Override
protected Lexer createLexer() {
return null;

View File

@@ -0,0 +1,9 @@
GROUP_BEGIN ('(')
CHARACTER ('a')
CHARACTER ('b')
CHARACTER ('c')
CHARACTER ('d')
GROUP_END (')')
PCRE_NUMBERED_GROUP_REF ('(?')
NUMBER ('1')
GROUP_END (')')