From 51b7d597cf0f3f078eda5a0c0e4e5311639c88d1 Mon Sep 17 00:00:00 2001
From: Bas Leijdekkers <basleijdekkers@gmail.com>
Date: Mon, 23 Jan 2023 11:55:36 +0100
Subject: [PATCH] RegExp: don't fail on parsing if file ends after a high
 surrogate character (EA-352432)

GitOrigin-RevId: ed171446eafcb6419e200d4317ce8f141a667d7c
---
 .../src/org/intellij/lang/regexp/RegExpParser.java       | 9 +++++----
 .../test/org/intellij/lang/regexp/RegExpParsingTest.java | 3 ++-
 RegExpSupport/testData/psi/Escapes31.txt                 | 5 +++++
 3 files changed, 12 insertions(+), 5 deletions(-)
 create mode 100644 RegExpSupport/testData/psi/Escapes31.txt

diff --git a/RegExpSupport/src/org/intellij/lang/regexp/RegExpParser.java b/RegExpSupport/src/org/intellij/lang/regexp/RegExpParser.java
index ebe670882222..1b70006eb38c 100644
--- a/RegExpSupport/src/org/intellij/lang/regexp/RegExpParser.java
+++ b/RegExpSupport/src/org/intellij/lang/regexp/RegExpParser.java
@@ -602,10 +602,11 @@ public class RegExpParser implements PsiParser, LightPsiParser {
       // merge surrogate pairs into single regexp char
       if (!Character.isSupplementaryCodePoint(value1) && Character.isHighSurrogate((char)value1)) {
         final String text2 = builder.getTokenText();
-        assert text2 != null;
-        final int value2 = RegExpCharImpl.unescapeChar(text2);
-        if (!Character.isSupplementaryCodePoint(value2) && Character.isLowSurrogate((char)value2)) {
-          builder.advanceLexer();
+        if (text2 != null) {
+          final int value2 = RegExpCharImpl.unescapeChar(text2);
+          if (!Character.isSupplementaryCodePoint(value2) && Character.isLowSurrogate((char)value2)) {
+            builder.advanceLexer();
+          }
         }
       }
       marker.done(RegExpElementTypes.CHAR);
diff --git a/RegExpSupport/test/org/intellij/lang/regexp/RegExpParsingTest.java b/RegExpSupport/test/org/intellij/lang/regexp/RegExpParsingTest.java
index 9d5b9ac49508..7d21bc7f65e8 100644
--- a/RegExpSupport/test/org/intellij/lang/regexp/RegExpParsingTest.java
+++ b/RegExpSupport/test/org/intellij/lang/regexp/RegExpParsingTest.java
@@ -1,4 +1,4 @@
-// Copyright 2000-2020 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license that can be found in the LICENSE file.
+// Copyright 2000-2023 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license.
 package org.intellij.lang.regexp;
 
 import com.intellij.mock.MockSmartPointerManager;
@@ -227,6 +227,7 @@ public class RegExpParsingTest extends ParsingTestCase {
   public void testEscapes28() throws IOException { doCodeTest("[a\\]]"); }
   public void testEscapes29() throws IOException { doCodeTest("[^a\\]]"); }
   public void testEscapes30() throws IOException { doCodeTest("\\[\\]$"); }
+  public void testEscapes31() throws IOException { doCodeTest("\\ud800"); }
 
   public void testAnchors1() throws IOException { doCodeTest("^*"); }
   public void testAnchors2() throws IOException { doCodeTest("$*"); }
diff --git a/RegExpSupport/testData/psi/Escapes31.txt b/RegExpSupport/testData/psi/Escapes31.txt
new file mode 100644
index 000000000000..dc27dae97af5
--- /dev/null
+++ b/RegExpSupport/testData/psi/Escapes31.txt
@@ -0,0 +1,5 @@
+REGEXP_FILE
+  RegExpPatternImpl: <\ud800>
+    RegExpBranchImpl: <\ud800>
+      RegExpCharImpl: <\ud800>
+        PsiElement(UNICODE_CHAR)('\ud800')
\ No newline at end of file