RegExp: don't fail on parsing if file ends after a high surrogate character (EA-352432)

GitOrigin-RevId: ed171446eafcb6419e200d4317ce8f141a667d7c
This commit is contained in:
Bas Leijdekkers
2023-01-23 11:55:36 +01:00
committed by intellij-monorepo-bot
parent 4e777d7b67
commit 51b7d597cf
3 changed files with 12 additions and 5 deletions

View File

@@ -602,10 +602,11 @@ public class RegExpParser implements PsiParser, LightPsiParser {
// merge surrogate pairs into single regexp char
if (!Character.isSupplementaryCodePoint(value1) && Character.isHighSurrogate((char)value1)) {
final String text2 = builder.getTokenText();
assert text2 != null;
final int value2 = RegExpCharImpl.unescapeChar(text2);
if (!Character.isSupplementaryCodePoint(value2) && Character.isLowSurrogate((char)value2)) {
builder.advanceLexer();
if (text2 != null) {
final int value2 = RegExpCharImpl.unescapeChar(text2);
if (!Character.isSupplementaryCodePoint(value2) && Character.isLowSurrogate((char)value2)) {
builder.advanceLexer();
}
}
}
marker.done(RegExpElementTypes.CHAR);

View File

@@ -1,4 +1,4 @@
// Copyright 2000-2020 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license that can be found in the LICENSE file.
// Copyright 2000-2023 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license.
package org.intellij.lang.regexp;
import com.intellij.mock.MockSmartPointerManager;
@@ -227,6 +227,7 @@ public class RegExpParsingTest extends ParsingTestCase {
public void testEscapes28() throws IOException { doCodeTest("[a\\]]"); }
public void testEscapes29() throws IOException { doCodeTest("[^a\\]]"); }
public void testEscapes30() throws IOException { doCodeTest("\\[\\]$"); }
public void testEscapes31() throws IOException { doCodeTest("\\ud800"); }
public void testAnchors1() throws IOException { doCodeTest("^*"); }
public void testAnchors2() throws IOException { doCodeTest("$*"); }

View File

@@ -0,0 +1,5 @@
REGEXP_FILE
RegExpPatternImpl: <\ud800>
RegExpBranchImpl: <\ud800>
RegExpCharImpl: <\ud800>
PsiElement(UNICODE_CHAR)('\ud800')