From 51b7d597cf0f3f078eda5a0c0e4e5311639c88d1 Mon Sep 17 00:00:00 2001 From: Bas Leijdekkers Date: Mon, 23 Jan 2023 11:55:36 +0100 Subject: [PATCH] RegExp: don't fail on parsing if file ends after a high surrogate character (EA-352432) GitOrigin-RevId: ed171446eafcb6419e200d4317ce8f141a667d7c --- .../src/org/intellij/lang/regexp/RegExpParser.java | 9 +++++---- .../test/org/intellij/lang/regexp/RegExpParsingTest.java | 3 ++- RegExpSupport/testData/psi/Escapes31.txt | 5 +++++ 3 files changed, 12 insertions(+), 5 deletions(-) create mode 100644 RegExpSupport/testData/psi/Escapes31.txt diff --git a/RegExpSupport/src/org/intellij/lang/regexp/RegExpParser.java b/RegExpSupport/src/org/intellij/lang/regexp/RegExpParser.java index ebe670882222..1b70006eb38c 100644 --- a/RegExpSupport/src/org/intellij/lang/regexp/RegExpParser.java +++ b/RegExpSupport/src/org/intellij/lang/regexp/RegExpParser.java @@ -602,10 +602,11 @@ public class RegExpParser implements PsiParser, LightPsiParser { // merge surrogate pairs into single regexp char if (!Character.isSupplementaryCodePoint(value1) && Character.isHighSurrogate((char)value1)) { final String text2 = builder.getTokenText(); - assert text2 != null; - final int value2 = RegExpCharImpl.unescapeChar(text2); - if (!Character.isSupplementaryCodePoint(value2) && Character.isLowSurrogate((char)value2)) { - builder.advanceLexer(); + if (text2 != null) { + final int value2 = RegExpCharImpl.unescapeChar(text2); + if (!Character.isSupplementaryCodePoint(value2) && Character.isLowSurrogate((char)value2)) { + builder.advanceLexer(); + } } } marker.done(RegExpElementTypes.CHAR); diff --git a/RegExpSupport/test/org/intellij/lang/regexp/RegExpParsingTest.java b/RegExpSupport/test/org/intellij/lang/regexp/RegExpParsingTest.java index 9d5b9ac49508..7d21bc7f65e8 100644 --- a/RegExpSupport/test/org/intellij/lang/regexp/RegExpParsingTest.java +++ b/RegExpSupport/test/org/intellij/lang/regexp/RegExpParsingTest.java @@ -1,4 +1,4 @@ -// Copyright 2000-2020 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license that can be found in the LICENSE file. +// Copyright 2000-2023 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license. package org.intellij.lang.regexp; import com.intellij.mock.MockSmartPointerManager; @@ -227,6 +227,7 @@ public class RegExpParsingTest extends ParsingTestCase { public void testEscapes28() throws IOException { doCodeTest("[a\\]]"); } public void testEscapes29() throws IOException { doCodeTest("[^a\\]]"); } public void testEscapes30() throws IOException { doCodeTest("\\[\\]$"); } + public void testEscapes31() throws IOException { doCodeTest("\\ud800"); } public void testAnchors1() throws IOException { doCodeTest("^*"); } public void testAnchors2() throws IOException { doCodeTest("$*"); } diff --git a/RegExpSupport/testData/psi/Escapes31.txt b/RegExpSupport/testData/psi/Escapes31.txt new file mode 100644 index 000000000000..dc27dae97af5 --- /dev/null +++ b/RegExpSupport/testData/psi/Escapes31.txt @@ -0,0 +1,5 @@ +REGEXP_FILE + RegExpPatternImpl: <\ud800> + RegExpBranchImpl: <\ud800> + RegExpCharImpl: <\ud800> + PsiElement(UNICODE_CHAR)('\ud800') \ No newline at end of file