diff --git a/RegExpSupport/src/org/intellij/lang/regexp/RegExpParser.java b/RegExpSupport/src/org/intellij/lang/regexp/RegExpParser.java index 138186863224..6c6323e6451e 100644 --- a/RegExpSupport/src/org/intellij/lang/regexp/RegExpParser.java +++ b/RegExpSupport/src/org/intellij/lang/regexp/RegExpParser.java @@ -21,6 +21,7 @@ import com.intellij.lang.PsiBuilder; import com.intellij.lang.PsiParser; import com.intellij.psi.tree.IElementType; import com.intellij.psi.tree.TokenSet; +import org.intellij.lang.regexp.psi.impl.RegExpCharImpl; import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.Nullable; @@ -526,6 +527,22 @@ public class RegExpParser implements PsiParser, LightPsiParser { checkMatches(builder, RegExpTT.RBRACE, "'}' expected"); marker.done(RegExpElementTypes.NAMED_CHARACTER); } + else if (builder.getTokenType() == RegExpTT.UNICODE_CHAR) { + final String text1 = builder.getTokenText(); + assert text1 != null; + final int value1 = RegExpCharImpl.unescapeChar(text1); + builder.advanceLexer(); + // merge surrogate pairs into single regexp char + if (!Character.isSupplementaryCodePoint(value1) && Character.isHighSurrogate((char)value1)) { + final String text2 = builder.getTokenText(); + assert text2 != null; + final int value2 = RegExpCharImpl.unescapeChar(text2); + if (!Character.isSupplementaryCodePoint(value2) && Character.isLowSurrogate((char)value2)) { + builder.advanceLexer(); + } + } + marker.done(RegExpElementTypes.CHAR); + } else { builder.advanceLexer(); marker.done(RegExpElementTypes.CHAR); diff --git a/RegExpSupport/src/org/intellij/lang/regexp/psi/impl/RegExpCharImpl.java b/RegExpSupport/src/org/intellij/lang/regexp/psi/impl/RegExpCharImpl.java index f2472b1f2208..984db72ad04d 100644 --- a/RegExpSupport/src/org/intellij/lang/regexp/psi/impl/RegExpCharImpl.java +++ b/RegExpSupport/src/org/intellij/lang/regexp/psi/impl/RegExpCharImpl.java @@ -57,12 +57,26 @@ public class RegExpCharImpl extends RegExpElementImpl implements RegExpChar { @Override public int getValue() { - final String s = getUnescapedText(); - if (s.equals("\\") && getType() == Type.CHAR) return '\\'; - return unescapeChar(s); + final ASTNode node = getNode(); + final IElementType type = node.getFirstChildNode().getElementType(); + if (type == RegExpTT.BAD_OCT_VALUE || + type == RegExpTT.BAD_HEX_VALUE || + type == RegExpTT.BAD_CHARACTER || + type == StringEscapesTokenTypes.INVALID_UNICODE_ESCAPE_TOKEN) { + return -1; + } + final String text = getUnescapedText(); + if (text.length() == 1 && type == RegExpTT.CHARACTER) { + return text.codePointAt(0); + } + else if (type == RegExpTT.UNICODE_CHAR) { + final int i = text.indexOf('\\', 1); + if (i >= 0) return Character.toCodePoint((char)unescapeChar(text.substring(0, i)), (char)unescapeChar(text.substring(i))); + } + return unescapeChar(text); } - private static int unescapeChar(String s) { + public static int unescapeChar(String s) { final int length = s.length(); assert length > 0; diff --git a/RegExpSupport/src/org/intellij/lang/regexp/validation/RegExpAnnotator.java b/RegExpSupport/src/org/intellij/lang/regexp/validation/RegExpAnnotator.java index 66b8046e41f9..d6a2c045c84b 100644 --- a/RegExpSupport/src/org/intellij/lang/regexp/validation/RegExpAnnotator.java +++ b/RegExpSupport/src/org/intellij/lang/regexp/validation/RegExpAnnotator.java @@ -91,39 +91,16 @@ public final class RegExpAnnotator extends RegExpElementVisitor implements Annot if (to == null) { return; } - int fromCodePoint = from.getValue(); - int toCodePoint = to.getValue(); + final int fromCodePoint = from.getValue(); + final int toCodePoint = to.getValue(); if (fromCodePoint == -1 || toCodePoint == -1) { return; } - int errorStart = range.getTextOffset(); - int errorEnd = errorStart + range.getTextLength(); - // \ud800\udc00-\udbff\udfff - if (!Character.isSupplementaryCodePoint(fromCodePoint) && Character.isLowSurrogate((char)fromCodePoint)) { - final PsiElement prevSibling = range.getPrevSibling(); - if (prevSibling instanceof RegExpChar) { - final int prevSiblingValue = ((RegExpChar)prevSibling).getValue(); - if (!Character.isSupplementaryCodePoint(prevSiblingValue) && Character.isHighSurrogate((char)prevSiblingValue)) { - fromCodePoint = Character.toCodePoint((char)prevSiblingValue, (char)fromCodePoint); - errorStart -= prevSibling.getTextLength(); - } - } - } - if (!Character.isSupplementaryCodePoint(toCodePoint) && Character.isHighSurrogate((char)toCodePoint)) { - final PsiElement nextSibling = range.getNextSibling(); - if (nextSibling instanceof RegExpChar) { - final int nextSiblingValue = ((RegExpChar)nextSibling).getValue(); - if (!Character.isSupplementaryCodePoint(nextSiblingValue) && Character.isLowSurrogate((char)nextSiblingValue)) { - toCodePoint = Character.toCodePoint((char)toCodePoint, (char)nextSiblingValue); - errorEnd += nextSibling.getTextLength(); - } - } - } if (toCodePoint < fromCodePoint) { - myHolder.createErrorAnnotation(new TextRange(errorStart, errorEnd), "Illegal character range (to < from)"); + myHolder.newAnnotation(HighlightSeverity.ERROR, "Illegal character range (to < from)").range(range).create(); } else if (toCodePoint == fromCodePoint) { - myHolder.createWarningAnnotation(new TextRange(errorStart, errorEnd), "Redundant character range"); + myHolder.newAnnotation(HighlightSeverity.WARNING, "Redundant character range").range(range).create(); } } diff --git a/RegExpSupport/testData/psi/Charclasses64.txt b/RegExpSupport/testData/psi/Charclasses64.txt index b3a14f509b7d..db36e4e6cae3 100644 --- a/RegExpSupport/testData/psi/Charclasses64.txt +++ b/RegExpSupport/testData/psi/Charclasses64.txt @@ -3,14 +3,12 @@ REGEXP_FILE RegExpBranchImpl: <[\ud800\udc00-\udbff\udfff]> RegExpClassImpl: <[\ud800\udc00-\udbff\udfff]> PsiElement(CLASS_BEGIN)('[') - RegExpCharImpl: <\ud800> - PsiElement(UNICODE_CHAR)('\ud800') - RegExpCharRangeImpl: <\udc00-\udbff> - RegExpCharImpl: <\udc00> + RegExpCharRangeImpl: <\ud800\udc00-\udbff\udfff> + RegExpCharImpl: <\ud800\udc00> + PsiElement(UNICODE_CHAR)('\ud800') PsiElement(UNICODE_CHAR)('\udc00') PsiElement(MINUS)('-') - RegExpCharImpl: <\udbff> + RegExpCharImpl: <\udbff\udfff> PsiElement(UNICODE_CHAR)('\udbff') - RegExpCharImpl: <\udfff> - PsiElement(UNICODE_CHAR)('\udfff') + PsiElement(UNICODE_CHAR)('\udfff') PsiElement(CLASS_END)(']') \ No newline at end of file diff --git a/RegExpSupport/testData/psi/Escapes16.txt b/RegExpSupport/testData/psi/Escapes16.txt index a9b7d00fb6b6..d8120dabe04c 100644 --- a/RegExpSupport/testData/psi/Escapes16.txt +++ b/RegExpSupport/testData/psi/Escapes16.txt @@ -3,14 +3,12 @@ REGEXP_FILE RegExpBranchImpl: <[\udbff\udfff-\ud800\udc00]> RegExpClassImpl: <[\udbff\udfff-\ud800\udc00]> PsiElement(CLASS_BEGIN)('[') - RegExpCharImpl: <\udbff> - PsiElement(UNICODE_CHAR)('\udbff') - RegExpCharRangeImpl: <\udfff-\ud800> - RegExpCharImpl: <\udfff> + RegExpCharRangeImpl: <\udbff\udfff-\ud800\udc00> + RegExpCharImpl: <\udbff\udfff> + PsiElement(UNICODE_CHAR)('\udbff') PsiElement(UNICODE_CHAR)('\udfff') PsiElement(MINUS)('-') - RegExpCharImpl: <\ud800> + RegExpCharImpl: <\ud800\udc00> PsiElement(UNICODE_CHAR)('\ud800') - RegExpCharImpl: <\udc00> - PsiElement(UNICODE_CHAR)('\udc00') + PsiElement(UNICODE_CHAR)('\udc00') PsiElement(CLASS_END)(']') \ No newline at end of file diff --git a/RegExpSupport/testData/psi/Escapes17.txt b/RegExpSupport/testData/psi/Escapes17.txt index b3a14f509b7d..db36e4e6cae3 100644 --- a/RegExpSupport/testData/psi/Escapes17.txt +++ b/RegExpSupport/testData/psi/Escapes17.txt @@ -3,14 +3,12 @@ REGEXP_FILE RegExpBranchImpl: <[\ud800\udc00-\udbff\udfff]> RegExpClassImpl: <[\ud800\udc00-\udbff\udfff]> PsiElement(CLASS_BEGIN)('[') - RegExpCharImpl: <\ud800> - PsiElement(UNICODE_CHAR)('\ud800') - RegExpCharRangeImpl: <\udc00-\udbff> - RegExpCharImpl: <\udc00> + RegExpCharRangeImpl: <\ud800\udc00-\udbff\udfff> + RegExpCharImpl: <\ud800\udc00> + PsiElement(UNICODE_CHAR)('\ud800') PsiElement(UNICODE_CHAR)('\udc00') PsiElement(MINUS)('-') - RegExpCharImpl: <\udbff> + RegExpCharImpl: <\udbff\udfff> PsiElement(UNICODE_CHAR)('\udbff') - RegExpCharImpl: <\udfff> - PsiElement(UNICODE_CHAR)('\udfff') + PsiElement(UNICODE_CHAR)('\udfff') PsiElement(CLASS_END)(']') \ No newline at end of file