RegExp: don't fail on parsing if file ends after a high surrogate character (EA-352432)

GitOrigin-RevId: ed171446eafcb6419e200d4317ce8f141a667d7c
2025-12-14 09:12:22 +07:00 · 2023-01-23 11:55:36 +01:00
parent 4e777d7b67
commit 51b7d597cf
3 changed files with 12 additions and 5 deletions
--- a/RegExpSupport/src/org/intellij/lang/regexp/RegExpParser.java
+++ b/RegExpSupport/src/org/intellij/lang/regexp/RegExpParser.java
@@ -602,10 +602,11 @@ public class RegExpParser implements PsiParser, LightPsiParser {
      // merge surrogate pairs into single regexp char
      if (!Character.isSupplementaryCodePoint(value1) && Character.isHighSurrogate((char)value1)) {
        final String text2 = builder.getTokenText();
-        assert text2 != null;
-        final int value2 = RegExpCharImpl.unescapeChar(text2);
-        if (!Character.isSupplementaryCodePoint(value2) && Character.isLowSurrogate((char)value2)) {
-          builder.advanceLexer();
+        if (text2 != null) {
+          final int value2 = RegExpCharImpl.unescapeChar(text2);
+          if (!Character.isSupplementaryCodePoint(value2) && Character.isLowSurrogate((char)value2)) {
+            builder.advanceLexer();
+          }
        }
      }
      marker.done(RegExpElementTypes.CHAR);
--- a/RegExpSupport/test/org/intellij/lang/regexp/RegExpParsingTest.java
+++ b/RegExpSupport/test/org/intellij/lang/regexp/RegExpParsingTest.java
@@ -1,4 +1,4 @@
-// Copyright 2000-2020 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license that can be found in the LICENSE file.
+// Copyright 2000-2023 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license.
 package org.intellij.lang.regexp;

 import com.intellij.mock.MockSmartPointerManager;
@@ -227,6 +227,7 @@ public class RegExpParsingTest extends ParsingTestCase {
  public void testEscapes28() throws IOException { doCodeTest("[a\\]]"); }
  public void testEscapes29() throws IOException { doCodeTest("[^a\\]]"); }
  public void testEscapes30() throws IOException { doCodeTest("\\[\\]$"); }
+  public void testEscapes31() throws IOException { doCodeTest("\\ud800"); }

  public void testAnchors1() throws IOException { doCodeTest("^*"); }
  public void testAnchors2() throws IOException { doCodeTest("$*"); }
--- a/RegExpSupport/testData/psi/Escapes31.txt
+++ b/RegExpSupport/testData/psi/Escapes31.txt
@@ -0,0 +1,5 @@
+REGEXP_FILE
+  RegExpPatternImpl: <\ud800>
+    RegExpBranchImpl: <\ud800>
+      RegExpCharImpl: <\ud800>
+        PsiElement(UNICODE_CHAR)('\ud800')