PY-63393 Handle keywords terminating lexing of f-strings fragments in the lowermost JFlex lexer

Previously, we acknowledged them in PythonIndentingProcessor.adjustBraceLevel, inserting
synthetic STATEMENT_BREAK in front of them to stop recovery in the parser, similarly
to how we handle other kinds of incomplete brackets, but the state inside PyLexerFStringHelper
was not reset, so it kept trying to find matching closing brackets, quotes and interpreting
colons as PyTokenTypes.FSTRING_FRAGMENT_FORMAT_START instead of just PyTokenTypes.COLON.
The state in PyLexerFStringHelper and PythonIndentingProcessor became out of sync, which
led to assertion violations.

It's not an optimal solution, since now these tokens are listed both in
PythonTokenSetContributor.getUnbalancedBracesRecoveryTokens and in Python.flex lexer
specification, and we need to keep them in sync. Also, PythonTokenSetContributor
can provide additional tokens from other languages, such as Cython. But it's simple
and seems "good enough" to patch the problem in the release.

GitOrigin-RevId: 4e156314cc02aba0634d5d9e3008177f49105051
This commit is contained in:
Mikhail Golubev
2023-10-05 15:12:50 +03:00
committed by intellij-monorepo-bot
parent 2e8e817447
commit 10808546fc
8 changed files with 704 additions and 514 deletions

View File

@@ -78,6 +78,7 @@ public class PythonTokenSetContributor extends PythonDialectsTokenSetContributor
@NotNull
@Override
public TokenSet getUnbalancedBracesRecoveryTokens() {
// Sync these tokens with Python.flex under the <FSTRING_FRAGMENT> state
return TokenSet.create(DEF_KEYWORD, CLASS_KEYWORD, RETURN_KEYWORD, WITH_KEYWORD, WHILE_KEYWORD, BREAK_KEYWORD, CONTINUE_KEYWORD,
RAISE_KEYWORD, TRY_KEYWORD, EXCEPT_KEYWORD, FINALLY_KEYWORD);
}

View File

@@ -163,7 +163,9 @@ class PyLexerFStringHelper(private val myLexer: FlexLexerEx) {
fun reset() {
// There is no need to be smarter about it, since LexerEditorHighlighter always resets
// the lexer state to YYINITIAL where there can't be any f-strings.
myFStringStates.clear()
if (myFStringStates.isNotEmpty()) {
dropFStringStateWithAllNested(0)
}
}
fun getTextTokenType(): IElementType {

View File

@@ -106,12 +106,11 @@ FSTRING_FRAGMENT_TYPE_CONVERSION = "!" [^=:'\"} \t\r\n]*
private final PyLexerFStringHelper fStringHelper = new PyLexerFStringHelper(this);
private int getSpaceLength(CharSequence string) {
String string1 = string.toString();
string1 = StringUtil.trimEnd(string1, "\\");
string1 = StringUtil.trimEnd(string1, ";");
final String s = StringUtil.trimTrailing(string1);
return yylength()-s.length();
String string1 = string.toString();
string1 = StringUtil.trimEnd(string1, "\\");
string1 = StringUtil.trimEnd(string1, ";");
final String s = StringUtil.trimTrailing(string1);
return yylength() - s.length();
}
%}
@@ -150,6 +149,19 @@ return yylength()-s.length();
// Should be impossible inside expression fragments: any openingQuotes should be matched as a string literal there
// {FSTRING_QUOTES} { return hasMatchingFStringStart(yytext().toString()) ? PyTokenTypes.FSTRING_END : PyTokenTypes.FSTRING_TEXT; }
// Sync these tokens with PythonTokenSetContributor.getUnbalancedBracesRecoveryTokens
"def" { fStringHelper.reset(); return PyTokenTypes.DEF_KEYWORD; }
"class" { fStringHelper.reset(); return PyTokenTypes.CLASS_KEYWORD; }
"return" { fStringHelper.reset(); return PyTokenTypes.RETURN_KEYWORD; }
"with" { fStringHelper.reset(); return PyTokenTypes.WITH_KEYWORD; }
"while" { fStringHelper.reset(); return PyTokenTypes.WHILE_KEYWORD; }
"break" { fStringHelper.reset(); return PyTokenTypes.BREAK_KEYWORD; }
"continue" { fStringHelper.reset(); return PyTokenTypes.CONTINUE_KEYWORD; }
"raise" { fStringHelper.reset(); return PyTokenTypes.RAISE_KEYWORD; }
"try" { fStringHelper.reset(); return PyTokenTypes.TRY_KEYWORD; }
"except" { fStringHelper.reset(); return PyTokenTypes.EXCEPT_KEYWORD; }
"finally" { fStringHelper.reset(); return PyTokenTypes.FINALLY_KEYWORD; }
}
<FSTRING_FRAGMENT_FORMAT> {

View File

@@ -0,0 +1,2 @@
s = f'{
raise:foo}'

View File

@@ -0,0 +1,37 @@
PyFile:CompleteFStringFragmentTerminatedAtStatementOnlyKeyword.py
PyAssignmentStatement
PyTargetExpression: s
PsiElement(Py:IDENTIFIER)('s')
PsiWhiteSpace(' ')
PsiElement(Py:EQ)('=')
PsiWhiteSpace(' ')
PyStringLiteralExpression: {
PyFormattedStringElement
PsiElement(Py:FSTRING_START)('f'')
PyFStringFragment
PsiElement(Py:FSTRING_FRAGMENT_START)('{')
PsiErrorElement:Expression expected
PsiWhiteSpace('\n')
PsiErrorElement:Type conversion, ':' or '}' expected
<empty list>
PsiErrorElement:' expected
<empty list>
PyRaiseStatement
PsiElement(Py:RAISE_KEYWORD)('raise')
PsiErrorElement:End of statement expected
<empty list>
PsiElement(Py:COLON)(':')
PsiErrorElement:Statement expected, found Py:COLON
<empty list>
PyExpressionStatement
PyReferenceExpression: foo
PsiElement(Py:IDENTIFIER)('foo')
PsiErrorElement:End of statement expected
<empty list>
PsiElement(Py:RBRACE)('}')
PsiErrorElement:Statement expected, found Py:RBRACE
<empty list>
PyExpressionStatement
PyStringLiteralExpression:
PsiElement(Py:SINGLE_QUOTED_STRING)(''')

View File

@@ -520,6 +520,16 @@ public class PythonLexerTest extends PyLexerTestCase {
"BAD_CHARACTER", "Py:IMPORT_KEYWORD", "Py:SPACE", "Py:IDENTIFIER", "Py:STATEMENT_BREAK");
}
// PY-63393
public void testFStringFragmentContainingStatementOnlyRecoveryKeyword() {
doTest("""
s = f'{
raise:foo}'""",
"Py:IDENTIFIER", "Py:SPACE", "Py:EQ", "Py:SPACE", "Py:FSTRING_START", "Py:FSTRING_FRAGMENT_START", "Py:LINE_BREAK",
"Py:STATEMENT_BREAK", "Py:LINE_BREAK", "Py:RAISE_KEYWORD", "Py:COLON", "Py:IDENTIFIER", "Py:RBRACE", "Py:SINGLE_QUOTED_STRING",
"Py:STATEMENT_BREAK");
}
private static void doTest(String text, String... expectedTokens) {
PyLexerTestCase.doLexerTest(text, new PythonIndentingLexer(), expectedTokens);
}

View File

@@ -865,6 +865,11 @@ public class PythonParsingTest extends ParsingTestCase {
doTest(LanguageLevel.PYTHON36);
}
// PY-63393
public void testCompleteFStringFragmentTerminatedAtStatementOnlyKeyword() {
doTest(LanguageLevel.PYTHON36);
}
public void testNestedIncompleteFStringFragmentRecoveryStoppedAtStatementOnlyKeyword() {
doTest(LanguageLevel.PYTHON36);
}
@@ -1315,7 +1320,7 @@ public class PythonParsingTest extends ParsingTestCase {
public void testTypeKeywordAsIdentifier() {
doTest(LanguageLevel.PYTHON312);
}
public void doTest() {
doTest(LanguageLevel.PYTHON26);
}