PY-63393 Handle keywords terminating lexing of f-strings fragments in the lowermost JFlex lexer

Previously, we acknowledged them in PythonIndentingProcessor.adjustBraceLevel, inserting synthetic STATEMENT_BREAK in front of them to stop recovery in the parser, similarly to how we handle other kinds of incomplete brackets, but the state inside PyLexerFStringHelper was not reset, so it kept trying to find matching closing brackets, quotes and interpreting colons as PyTokenTypes.FSTRING_FRAGMENT_FORMAT_START instead of just PyTokenTypes.COLON. The state in PyLexerFStringHelper and PythonIndentingProcessor became out of sync, which led to assertion violations. It's not an optimal solution, since now these tokens are listed both in PythonTokenSetContributor.getUnbalancedBracesRecoveryTokens and in Python.flex lexer specification, and we need to keep them in sync. Also, PythonTokenSetContributor can provide additional tokens from other languages, such as Cython. But it's simple and seems "good enough" to patch the problem in the release. GitOrigin-RevId: 4e156314cc02aba0634d5d9e3008177f49105051
2026-01-06 03:21:12 +07:00 · 2023-10-05 15:12:50 +03:00
parent 2e8e817447
commit 10808546fc
8 changed files with 704 additions and 514 deletions
--- a/python/python-psi-impl/gen/com/jetbrains/python/lexer/_PythonLexer.java
+++ b/python/python-psi-impl/gen/com/jetbrains/python/lexer/_PythonLexer.java
--- a/python/python-psi-impl/src/com/jetbrains/python/PythonTokenSetContributor.java
+++ b/python/python-psi-impl/src/com/jetbrains/python/PythonTokenSetContributor.java
@@ -78,6 +78,7 @@ public class PythonTokenSetContributor extends PythonDialectsTokenSetContributor
  @NotNull
  @Override
  public TokenSet getUnbalancedBracesRecoveryTokens() {
+    // Sync these tokens with Python.flex under the <FSTRING_FRAGMENT> state
    return TokenSet.create(DEF_KEYWORD, CLASS_KEYWORD, RETURN_KEYWORD, WITH_KEYWORD, WHILE_KEYWORD, BREAK_KEYWORD, CONTINUE_KEYWORD,
                           RAISE_KEYWORD, TRY_KEYWORD, EXCEPT_KEYWORD, FINALLY_KEYWORD);
  }
--- a/python/python-psi-impl/src/com/jetbrains/python/lexer/PyLexerFStringHelper.kt
+++ b/python/python-psi-impl/src/com/jetbrains/python/lexer/PyLexerFStringHelper.kt
@@ -163,7 +163,9 @@ class PyLexerFStringHelper(private val myLexer: FlexLexerEx) {
  fun reset() {
    // There is no need to be smarter about it, since LexerEditorHighlighter always resets 
    // the lexer state to YYINITIAL where there can't be any f-strings.
-    myFStringStates.clear()
+    if (myFStringStates.isNotEmpty()) {
+      dropFStringStateWithAllNested(0)
+    }
  }

  fun getTextTokenType(): IElementType {
--- a/python/python-psi-impl/src/com/jetbrains/python/lexer/Python.flex
+++ b/python/python-psi-impl/src/com/jetbrains/python/lexer/Python.flex
@@ -106,12 +106,11 @@ FSTRING_FRAGMENT_TYPE_CONVERSION = "!" [^=:'\"} \t\r\n]*
 private final PyLexerFStringHelper fStringHelper = new PyLexerFStringHelper(this);

 private int getSpaceLength(CharSequence string) {
-String string1 = string.toString();
-string1 = StringUtil.trimEnd(string1, "\\");
-string1 = StringUtil.trimEnd(string1, ";");
-final String s = StringUtil.trimTrailing(string1);
-return yylength()-s.length();
-
+  String string1 = string.toString();
+  string1 = StringUtil.trimEnd(string1, "\\");
+  string1 = StringUtil.trimEnd(string1, ";");
+  final String s = StringUtil.trimTrailing(string1);
+  return yylength() - s.length();
 }
 %}

@@ -150,6 +149,19 @@ return yylength()-s.length();

  // Should be impossible inside expression fragments: any openingQuotes should be matched as a string literal there
  // {FSTRING_QUOTES} { return hasMatchingFStringStart(yytext().toString()) ? PyTokenTypes.FSTRING_END : PyTokenTypes.FSTRING_TEXT; }
+  
+  // Sync these tokens with PythonTokenSetContributor.getUnbalancedBracesRecoveryTokens
+  "def"           { fStringHelper.reset(); return PyTokenTypes.DEF_KEYWORD; }
+  "class"         { fStringHelper.reset(); return PyTokenTypes.CLASS_KEYWORD; } 
+  "return"        { fStringHelper.reset(); return PyTokenTypes.RETURN_KEYWORD; }
+  "with"          { fStringHelper.reset(); return PyTokenTypes.WITH_KEYWORD; } 
+  "while"         { fStringHelper.reset(); return PyTokenTypes.WHILE_KEYWORD; } 
+  "break"         { fStringHelper.reset(); return PyTokenTypes.BREAK_KEYWORD; } 
+  "continue"      { fStringHelper.reset(); return PyTokenTypes.CONTINUE_KEYWORD; } 
+  "raise"         { fStringHelper.reset(); return PyTokenTypes.RAISE_KEYWORD; } 
+  "try"           { fStringHelper.reset(); return PyTokenTypes.TRY_KEYWORD; } 
+  "except"        { fStringHelper.reset(); return PyTokenTypes.EXCEPT_KEYWORD; }
+  "finally"       { fStringHelper.reset(); return PyTokenTypes.FINALLY_KEYWORD; } 
 }

 <FSTRING_FRAGMENT_FORMAT> {
--- a/python/testData/psi/CompleteFStringFragmentTerminatedAtStatementOnlyKeyword.py
+++ b/python/testData/psi/CompleteFStringFragmentTerminatedAtStatementOnlyKeyword.py
@@ -0,0 +1,2 @@
+s = f'{
+raise:foo}'
--- a/python/testData/psi/CompleteFStringFragmentTerminatedAtStatementOnlyKeyword.txt
+++ b/python/testData/psi/CompleteFStringFragmentTerminatedAtStatementOnlyKeyword.txt
@@ -0,0 +1,37 @@
+PyFile:CompleteFStringFragmentTerminatedAtStatementOnlyKeyword.py
+  PyAssignmentStatement
+    PyTargetExpression: s
+      PsiElement(Py:IDENTIFIER)('s')
+    PsiWhiteSpace(' ')
+    PsiElement(Py:EQ)('=')
+    PsiWhiteSpace(' ')
+    PyStringLiteralExpression: {
+
+      PyFormattedStringElement
+        PsiElement(Py:FSTRING_START)('f'')
+        PyFStringFragment
+          PsiElement(Py:FSTRING_FRAGMENT_START)('{')
+          PsiErrorElement:Expression expected
+            PsiWhiteSpace('\n')
+          PsiErrorElement:Type conversion, ':' or '}' expected
+            <empty list>
+        PsiErrorElement:' expected
+          <empty list>
+  PyRaiseStatement
+    PsiElement(Py:RAISE_KEYWORD)('raise')
+    PsiErrorElement:End of statement expected
+      <empty list>
+  PsiElement(Py:COLON)(':')
+  PsiErrorElement:Statement expected, found Py:COLON
+    <empty list>
+  PyExpressionStatement
+    PyReferenceExpression: foo
+      PsiElement(Py:IDENTIFIER)('foo')
+    PsiErrorElement:End of statement expected
+      <empty list>
+  PsiElement(Py:RBRACE)('}')
+  PsiErrorElement:Statement expected, found Py:RBRACE
+    <empty list>
+  PyExpressionStatement
+    PyStringLiteralExpression: 
+      PsiElement(Py:SINGLE_QUOTED_STRING)(''')
--- a/python/testSrc/com/jetbrains/python/PythonLexerTest.java
+++ b/python/testSrc/com/jetbrains/python/PythonLexerTest.java
@@ -520,6 +520,16 @@ public class PythonLexerTest extends PyLexerTestCase {
           "BAD_CHARACTER", "Py:IMPORT_KEYWORD", "Py:SPACE", "Py:IDENTIFIER", "Py:STATEMENT_BREAK");
  }

+  // PY-63393
+  public void testFStringFragmentContainingStatementOnlyRecoveryKeyword() {
+    doTest("""
+             s = f'{
+             raise:foo}'""",
+           "Py:IDENTIFIER", "Py:SPACE", "Py:EQ", "Py:SPACE", "Py:FSTRING_START", "Py:FSTRING_FRAGMENT_START", "Py:LINE_BREAK",
+           "Py:STATEMENT_BREAK", "Py:LINE_BREAK", "Py:RAISE_KEYWORD", "Py:COLON", "Py:IDENTIFIER", "Py:RBRACE", "Py:SINGLE_QUOTED_STRING",
+           "Py:STATEMENT_BREAK");
+  }
+
  private static void doTest(String text, String... expectedTokens) {
    PyLexerTestCase.doLexerTest(text, new PythonIndentingLexer(), expectedTokens);
  }
--- a/python/testSrc/com/jetbrains/python/parsing/PythonParsingTest.java
+++ b/python/testSrc/com/jetbrains/python/parsing/PythonParsingTest.java
@@ -865,6 +865,11 @@ public class PythonParsingTest extends ParsingTestCase {
    doTest(LanguageLevel.PYTHON36);
  }

+  // PY-63393
+  public void testCompleteFStringFragmentTerminatedAtStatementOnlyKeyword() {
+    doTest(LanguageLevel.PYTHON36);
+  }
+
  public void testNestedIncompleteFStringFragmentRecoveryStoppedAtStatementOnlyKeyword() {
    doTest(LanguageLevel.PYTHON36);
  }
@@ -1315,7 +1320,7 @@ public class PythonParsingTest extends ParsingTestCase {
  public void testTypeKeywordAsIdentifier() {
    doTest(LanguageLevel.PYTHON312);
  }
-
+  
  public void doTest() {
    doTest(LanguageLevel.PYTHON26);
  }