PY-32123 Ignore escape sequences in raw f-strings by adding special token type for their text

GitOrigin-RevId: 0b15201c60ac56daa45f22bb5ff3c1f8836efee3
This commit is contained in:
Mikhail Golubev
2020-01-06 20:55:04 +03:00
committed by intellij-monorepo-bot
parent 0ca3b164b0
commit be2d55e603
19 changed files with 192 additions and 31 deletions

View File

@@ -181,6 +181,7 @@ public class PyTokenTypes {
public static final PyElementType DEDENT = new PyElementType("DEDENT");
public static final PyElementType FSTRING_TEXT = new PyElementType("FSTRING_TEXT");
public static final PyElementType FSTRING_RAW_TEXT = new PyElementType("FSTRING_RAW_TEXT");
public static final PyElementType FSTRING_START = new PyElementType("FSTRING_START");
public static final PyElementType FSTRING_END = new PyElementType("FSTRING_END");
public static final PyElementType FSTRING_FRAGMENT_START = new PyElementType("FSTRING_FRAGMENT_START");
@@ -195,4 +196,6 @@ public class PyTokenTypes {
FSTRING_FRAGMENT_END,
FSTRING_FRAGMENT_FORMAT_START,
FSTRING_FRAGMENT_TYPE_CONVERSION);
public static final TokenSet FSTRING_TEXT_TOKENS = TokenSet.create(FSTRING_TEXT, FSTRING_RAW_TEXT);
}

View File

@@ -1271,7 +1271,7 @@ return yylength()-s.length();
// fall through
case 160: break;
case 40:
{ return PyTokenTypes.FSTRING_TEXT;
{ return fStringHelper.getTextTokenType();
}
// fall through
case 161: break;

View File

@@ -1,10 +1,15 @@
// Copyright 2000-2018 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license that can be found in the LICENSE file.
package com.jetbrains.python.lexer
import com.intellij.psi.tree.IElementType
import com.intellij.util.text.CharArrayUtil
import com.jetbrains.python.PyTokenTypes
class PyFStringLiteralLexer: PyStringLiteralLexerBase(PyTokenTypes.FSTRING_TEXT) {
class PyFStringLiteralLexer(fStringTextToken: IElementType) : PyStringLiteralLexerBase(fStringTextToken) {
init {
assert(PyTokenTypes.FSTRING_TEXT_TOKENS.contains(fStringTextToken))
}
override fun locateToken(start: Int): Int {
if (start >= myBufferEnd) {
return myBufferEnd
@@ -19,8 +24,7 @@ class PyFStringLiteralLexer: PyStringLiteralLexerBase(PyTokenTypes.FSTRING_TEXT)
}
}
// TODO actually keep track of "raw" prefixes of f-strings somehow
override fun isRaw(): Boolean = false
override fun isRaw(): Boolean = myOriginalLiteralToken == PyTokenTypes.FSTRING_RAW_TEXT
override fun isUnicodeMode(): Boolean = true

View File

@@ -25,15 +25,18 @@ class PyLexerFStringHelper(private val myLexer: FlexLexerEx) {
}
private fun pushFString(prefixAndQuotes: String): PyElementType {
val openingQuotes = prefixAndQuotes.substring(PyStringLiteralUtil.getPrefixLength(prefixAndQuotes))
myFStringStates.push(FStringState(myLexer.yystate(), myLexer.tokenStart, openingQuotes))
val prefixLength = PyStringLiteralUtil.getPrefixLength(prefixAndQuotes)
val openingQuotes = prefixAndQuotes.substring(prefixLength)
val prefix = prefixAndQuotes.substring(0, prefixLength)
myFStringStates.push(FStringState(myLexer.yystate(), myLexer.tokenStart, prefix, openingQuotes))
myLexer.yybegin(_PythonLexer.FSTRING)
return PyTokenTypes.FSTRING_START
}
fun handleFStringEnd(): IElementType {
val textType = getTextTokenType()
val (type, offset) = findFStringTerminator(myLexer.yytext().toString())
return if (offset == 0) type!! else PyTokenTypes.FSTRING_TEXT
return if (offset == 0) type!! else textType
}
fun handleFragmentStart(): IElementType {
@@ -96,7 +99,7 @@ class PyLexerFStringHelper(private val myLexer: FlexLexerEx) {
val text = myLexer.yytext().toString()
val (_, offset) = findFStringTerminator(text)
if (offset == text.length) {
return PyTokenTypes.FSTRING_TEXT
return getTextTokenType()
}
return PyTokenTypes.LINE_BREAK
}
@@ -169,7 +172,18 @@ class PyLexerFStringHelper(private val myLexer: FlexLexerEx) {
myFStringStates.clear()
}
private data class FStringState(val oldState: Int, val offset: Int, val openingQuotes: String) {
fun getTextTokenType(): IElementType {
assert(myFStringStates.isNotEmpty())
if (PyStringLiteralUtil.isRawPrefix(myFStringStates.peek().prefix)) {
return PyTokenTypes.FSTRING_RAW_TEXT
}
return PyTokenTypes.FSTRING_TEXT
}
private data class FStringState(val oldState: Int,
val offset: Int,
val prefix: String,
val openingQuotes: String) {
val fragmentStates = Stack<FragmentState>()
}

View File

@@ -101,8 +101,8 @@ return yylength()-s.length();
%%
<FSTRING> {
{FSTRING_TEXT_NO_QUOTES} { return PyTokenTypes.FSTRING_TEXT; }
"\\" { return PyTokenTypes.FSTRING_TEXT; }
{FSTRING_TEXT_NO_QUOTES} { return fStringHelper.getTextTokenType(); }
"\\" { return fStringHelper.getTextTokenType(); }
[\n] { return fStringHelper.handleLineBreakInLiteralText(); }
{FSTRING_QUOTES} { return fStringHelper.handleFStringEnd(); }
"{" { return fStringHelper.handleFragmentStart(); }
@@ -136,8 +136,8 @@ return yylength()-s.length();
}
<FSTRING_FRAGMENT_FORMAT> {
{FSTRING_FORMAT_TEXT_NO_QUOTES} { return PyTokenTypes.FSTRING_TEXT; }
"\\" { return PyTokenTypes.FSTRING_TEXT; }
{FSTRING_FORMAT_TEXT_NO_QUOTES} { return fStringHelper.getTextTokenType(); }
"\\" { return fStringHelper.getTextTokenType(); }
[\n] { return fStringHelper.handleLineBreakInLiteralText(); }
{FSTRING_QUOTES} { return fStringHelper.handleFStringEnd(); }
"{" { return fStringHelper.handleFragmentStart(); }

View File

@@ -127,7 +127,7 @@ public class ExpressionParsing extends Parsing {
final PsiBuilder.Marker marker = builder.mark();
nextToken();
while (true) {
if (atToken(PyTokenTypes.FSTRING_TEXT)) {
if (atAnyOfTokens(PyTokenTypes.FSTRING_TEXT_TOKENS)) {
nextToken();
}
else if (atToken(PyTokenTypes.FSTRING_FRAGMENT_START)) {
@@ -210,7 +210,7 @@ public class ExpressionParsing extends Parsing {
final PsiBuilder.Marker marker = myContext.getBuilder().mark();
nextToken();
while (true) {
if (atToken(PyTokenTypes.FSTRING_TEXT)) {
if (atAnyOfTokens(PyTokenTypes.FSTRING_TEXT_TOKENS)) {
nextToken();
}
else if (atToken(PyTokenTypes.FSTRING_FRAGMENT_START)) {

View File

@@ -18,6 +18,7 @@ package com.jetbrains.python.parsing;
import com.intellij.lang.PsiBuilder;
import com.intellij.openapi.diagnostic.Logger;
import com.intellij.psi.tree.IElementType;
import com.intellij.psi.tree.TokenSet;
import com.jetbrains.python.PyPsiBundle;
import com.jetbrains.python.PyElementTypes;
import com.jetbrains.python.PyTokenTypes;
@@ -98,6 +99,10 @@ public class Parsing {
return false;
}
protected boolean atAnyOfTokens(@NotNull TokenSet tokenTypes) {
return tokenTypes.contains(myBuilder.getTokenType());
}
protected boolean matchToken(final IElementType tokenType) {
if (myBuilder.getTokenType() == tokenType) {
myBuilder.advanceLexer();

View File

@@ -43,7 +43,7 @@ public class PyFormattedStringElementImpl extends PyElementImpl implements PyFor
final TextRange contentRange = getContentRange();
return SyntaxTraverser.psiApi()
.children(this)
.filter(child -> child.getNode().getElementType() == PyTokenTypes.FSTRING_TEXT)
.filter(child -> PyTokenTypes.FSTRING_TEXT_TOKENS.contains(child.getNode().getElementType()))
.map(PsiElement::getTextRangeInParent)
.map(range -> range.intersection(contentRange))
.toList();
@@ -103,7 +103,7 @@ public class PyFormattedStringElementImpl extends PyElementImpl implements PyFor
result.add(Pair.create(relChildRange, child.getText()));
}
}
else if (childType == PyTokenTypes.FSTRING_TEXT) {
else if (PyTokenTypes.FSTRING_TEXT_TOKENS.contains(childType)) {
if (continuousTextStart == -1) {
continuousTextStart = relChildRange.getStartOffset();
}

View File

@@ -49,9 +49,13 @@ public class PyHighlighter extends SyntaxHighlighterBase {
PyTokenTypes.TRIPLE_QUOTED_UNICODE
);
ret.registerLayer(
new PyFStringLiteralLexer(),
new PyFStringLiteralLexer(PyTokenTypes.FSTRING_TEXT),
PyTokenTypes.FSTRING_TEXT
);
ret.registerLayer(
new PyFStringLiteralLexer(PyTokenTypes.FSTRING_RAW_TEXT),
PyTokenTypes.FSTRING_RAW_TEXT
);
return ret;
}
@@ -136,7 +140,8 @@ public class PyHighlighter extends SyntaxHighlighterBase {
keys.put(PyTokenTypes.FSTRING_START, PY_UNICODE_STRING);
keys.put(PyTokenTypes.FSTRING_END, PY_UNICODE_STRING);
keys.put(PyTokenTypes.FSTRING_TEXT, PY_UNICODE_STRING);
keys.put(PyTokenTypes.FSTRING_RAW_TEXT, PY_UNICODE_STRING);
keys.put(PyTokenTypes.FSTRING_FRAGMENT_TYPE_CONVERSION, PY_FSTRING_FRAGMENT_TYPE_CONVERSION);
keys.put(PyTokenTypes.FSTRING_FRAGMENT_FORMAT_START, PY_FSTRING_FRAGMENT_COLON);
keys.put(PyTokenTypes.FSTRING_FRAGMENT_START, PY_FSTRING_FRAGMENT_BRACES);

View File

@@ -0,0 +1 @@
s = f'{x:\n}'

View File

@@ -0,0 +1,19 @@
PyFile:FStringEscapeInFormatPartOfPlainLiteral.py
PyAssignmentStatement
PyTargetExpression: s
PsiElement(Py:IDENTIFIER)('s')
PsiWhiteSpace(' ')
PsiElement(Py:EQ)('=')
PsiWhiteSpace(' ')
PyStringLiteralExpression: {x:\n}
PyFormattedStringElement
PsiElement(Py:FSTRING_START)('f'')
PyFStringFragment
PsiElement(Py:FSTRING_FRAGMENT_START)('{')
PyReferenceExpression: x
PsiElement(Py:IDENTIFIER)('x')
PyFStringFragmentFormatPart
PsiElement(Py:FSTRING_FRAGMENT_FORMAT_START)(':')
PsiElement(Py:FSTRING_TEXT)('\n')
PsiElement(Py:FSTRING_FRAGMENT_END)('}')
PsiElement(Py:FSTRING_END)(''')

View File

@@ -0,0 +1 @@
s = fr'{x:\n}'

View File

@@ -0,0 +1,19 @@
PyFile:FStringEscapeInFormatPartOfRawLiteral.py
PyAssignmentStatement
PyTargetExpression: s
PsiElement(Py:IDENTIFIER)('s')
PsiWhiteSpace(' ')
PsiElement(Py:EQ)('=')
PsiWhiteSpace(' ')
PyStringLiteralExpression: {x:\n}
PyFormattedStringElement
PsiElement(Py:FSTRING_START)('fr'')
PyFStringFragment
PsiElement(Py:FSTRING_FRAGMENT_START)('{')
PyReferenceExpression: x
PsiElement(Py:IDENTIFIER)('x')
PyFStringFragmentFormatPart
PsiElement(Py:FSTRING_FRAGMENT_FORMAT_START)(':')
PsiElement(Py:FSTRING_RAW_TEXT)('\n')
PsiElement(Py:FSTRING_FRAGMENT_END)('}')
PsiElement(Py:FSTRING_END)(''')

View File

@@ -0,0 +1 @@
s = rf'foo{f"\n"}bar\n'

View File

@@ -0,0 +1,22 @@
PyFile:FStringPlainInsideRawFString.py
PyAssignmentStatement
PyTargetExpression: s
PsiElement(Py:IDENTIFIER)('s')
PsiWhiteSpace(' ')
PsiElement(Py:EQ)('=')
PsiWhiteSpace(' ')
PyStringLiteralExpression: foo{f"\n"}bar\n
PyFormattedStringElement
PsiElement(Py:FSTRING_START)('rf'')
PsiElement(Py:FSTRING_RAW_TEXT)('foo')
PyFStringFragment
PsiElement(Py:FSTRING_FRAGMENT_START)('{')
PyStringLiteralExpression:
PyFormattedStringElement
PsiElement(Py:FSTRING_START)('f"')
PsiElement(Py:FSTRING_TEXT)('\n')
PsiElement(Py:FSTRING_END)('"')
PsiElement(Py:FSTRING_FRAGMENT_END)('}')
PsiElement(Py:FSTRING_RAW_TEXT)('bar\n')
PsiElement(Py:FSTRING_END)(''')

View File

@@ -0,0 +1 @@
s = f'foo{rf"\n"}bar\n'

View File

@@ -0,0 +1,22 @@
PyFile:FStringRawFStringInsidePlainFString.py
PyAssignmentStatement
PyTargetExpression: s
PsiElement(Py:IDENTIFIER)('s')
PsiWhiteSpace(' ')
PsiElement(Py:EQ)('=')
PsiWhiteSpace(' ')
PyStringLiteralExpression: foo{rf"\n"}bar
PyFormattedStringElement
PsiElement(Py:FSTRING_START)('f'')
PsiElement(Py:FSTRING_TEXT)('foo')
PyFStringFragment
PsiElement(Py:FSTRING_FRAGMENT_START)('{')
PyStringLiteralExpression: \n
PyFormattedStringElement
PsiElement(Py:FSTRING_START)('rf"')
PsiElement(Py:FSTRING_RAW_TEXT)('\n')
PsiElement(Py:FSTRING_END)('"')
PsiElement(Py:FSTRING_FRAGMENT_END)('}')
PsiElement(Py:FSTRING_TEXT)('bar\n')
PsiElement(Py:FSTRING_END)(''')

View File

@@ -207,40 +207,64 @@ public class PythonHighlightingLexerTest extends PyLexerTestCase {
// PY-31758
public void testFStringEscapeSequences() {
doTestStringHighlighting(LanguageLevel.PYTHON36, "f'foo\\nbar'",
doTestStringHighlighting(LanguageLevel.PYTHON36, "f'foo\\nbar'",
"Py:FSTRING_START", "Py:FSTRING_TEXT", "VALID_STRING_ESCAPE_TOKEN", "Py:FSTRING_TEXT", "Py:FSTRING_END");
doTestStringHighlighting(LanguageLevel.PYTHON36, "f'foo\\\nbar'",
doTestStringHighlighting(LanguageLevel.PYTHON36, "f'foo\\\nbar'",
"Py:FSTRING_START", "Py:FSTRING_TEXT", "VALID_STRING_ESCAPE_TOKEN", "Py:FSTRING_TEXT", "Py:FSTRING_END");
doTestStringHighlighting(LanguageLevel.PYTHON36, "f'foo\\u0041bar'",
doTestStringHighlighting(LanguageLevel.PYTHON36, "f'foo\\u0041bar'",
"Py:FSTRING_START", "Py:FSTRING_TEXT", "VALID_STRING_ESCAPE_TOKEN", "Py:FSTRING_TEXT", "Py:FSTRING_END");
doTestStringHighlighting(LanguageLevel.PYTHON36, "f'foo\\x41bar'",
doTestStringHighlighting(LanguageLevel.PYTHON36, "f'foo\\x41bar'",
"Py:FSTRING_START", "Py:FSTRING_TEXT", "VALID_STRING_ESCAPE_TOKEN", "Py:FSTRING_TEXT", "Py:FSTRING_END");
doTestStringHighlighting(LanguageLevel.PYTHON36, "f'foo\\101bar'",
doTestStringHighlighting(LanguageLevel.PYTHON36, "f'foo\\101bar'",
"Py:FSTRING_START", "Py:FSTRING_TEXT", "VALID_STRING_ESCAPE_TOKEN", "Py:FSTRING_TEXT", "Py:FSTRING_END");
doTestStringHighlighting(LanguageLevel.PYTHON36, "f'foo\\N{GREEK SMALL LETTER ALPHA}bar'",
doTestStringHighlighting(LanguageLevel.PYTHON36, "f'foo\\N{GREEK SMALL LETTER ALPHA}bar'",
"Py:FSTRING_START", "Py:FSTRING_TEXT", "VALID_STRING_ESCAPE_TOKEN", "Py:FSTRING_TEXT", "Py:FSTRING_END");
doTestStringHighlighting(LanguageLevel.PYTHON36, "f'foo\\",
"Py:FSTRING_START", "Py:FSTRING_TEXT", "INVALID_CHARACTER_ESCAPE_TOKEN");
doTestStringHighlighting(LanguageLevel.PYTHON36, "f'foo\\u00'",
doTestStringHighlighting(LanguageLevel.PYTHON36, "f'foo\\u00'",
"Py:FSTRING_START", "Py:FSTRING_TEXT", "INVALID_UNICODE_ESCAPE_TOKEN", "Py:FSTRING_END");
doTestStringHighlighting(LanguageLevel.PYTHON36, "f'foo\\uZZZZbar'",
doTestStringHighlighting(LanguageLevel.PYTHON36, "f'foo\\uZZZZbar'",
"Py:FSTRING_START", "Py:FSTRING_TEXT", "INVALID_UNICODE_ESCAPE_TOKEN", "Py:FSTRING_TEXT", "Py:FSTRING_END");
doTestStringHighlighting(LanguageLevel.PYTHON36, "f'foo\\x0'",
doTestStringHighlighting(LanguageLevel.PYTHON36, "f'foo\\x0'",
"Py:FSTRING_START", "Py:FSTRING_TEXT", "INVALID_UNICODE_ESCAPE_TOKEN", "Py:FSTRING_END");
doTestStringHighlighting(LanguageLevel.PYTHON36, "f'foo\\xZZbar'",
doTestStringHighlighting(LanguageLevel.PYTHON36, "f'foo\\xZZbar'",
"Py:FSTRING_START", "Py:FSTRING_TEXT", "INVALID_UNICODE_ESCAPE_TOKEN", "Py:FSTRING_TEXT", "Py:FSTRING_END");
doTestStringHighlighting(LanguageLevel.PYTHON36, "f'foo\\10'",
"Py:FSTRING_START", "Py:FSTRING_TEXT", "VALID_STRING_ESCAPE_TOKEN", "Py:FSTRING_END");
doTestStringHighlighting(LanguageLevel.PYTHON36, "f'foo\\777'",
"Py:FSTRING_START", "Py:FSTRING_TEXT", "VALID_STRING_ESCAPE_TOKEN", "Py:FSTRING_TEXT", "Py:FSTRING_END");
doTestStringHighlighting(LanguageLevel.PYTHON36, "f'foo\\N{GREEK SMALL LETTER ALPHA'",
doTestStringHighlighting(LanguageLevel.PYTHON36, "f'foo\\N{GREEK SMALL LETTER ALPHA'",
"Py:FSTRING_START", "Py:FSTRING_TEXT", "INVALID_UNICODE_ESCAPE_TOKEN", "Py:FSTRING_END");
doTestStringHighlighting(LanguageLevel.PYTHON36, "f'{x:\\n}'",
"Py:FSTRING_START", "Py:FSTRING_FRAGMENT_START", "Py:IDENTIFIER", "Py:FSTRING_FRAGMENT_FORMAT_START",
"VALID_STRING_ESCAPE_TOKEN", "Py:FSTRING_FRAGMENT_END", "Py:FSTRING_END");
}
// PY-32123
public void testRawFStringEscapeSequences() {
doTestStringHighlighting(LanguageLevel.PYTHON36, "rf'foo\\nbar'",
"Py:FSTRING_START", "Py:FSTRING_RAW_TEXT", "Py:FSTRING_RAW_TEXT", "Py:FSTRING_RAW_TEXT", "Py:FSTRING_END");
doTestStringHighlighting(LanguageLevel.PYTHON36, "rf'foo\\\nbar'",
"Py:FSTRING_START", "Py:FSTRING_RAW_TEXT", "Py:FSTRING_RAW_TEXT", "Py:FSTRING_RAW_TEXT", "Py:FSTRING_END");
doTestStringHighlighting(LanguageLevel.PYTHON36, "rf'foo\\",
"Py:FSTRING_START", "Py:FSTRING_RAW_TEXT", "Py:FSTRING_RAW_TEXT");
doTestStringHighlighting(LanguageLevel.PYTHON36, "rf'{x:\\n}'",
"Py:FSTRING_START", "Py:FSTRING_FRAGMENT_START", "Py:IDENTIFIER", "Py:FSTRING_FRAGMENT_FORMAT_START",
"Py:FSTRING_RAW_TEXT", "Py:FSTRING_RAW_TEXT", "Py:FSTRING_FRAGMENT_END", "Py:FSTRING_END");
doTestStringHighlighting(LanguageLevel.PYTHON36, "rf'{f\"\\n\"}'",
"Py:FSTRING_START", "Py:FSTRING_FRAGMENT_START",
"Py:FSTRING_START", "VALID_STRING_ESCAPE_TOKEN", "Py:FSTRING_END",
"Py:FSTRING_FRAGMENT_END", "Py:FSTRING_END");
doTestStringHighlighting(LanguageLevel.PYTHON36, "f'{rf\"\\n\"}'",
"Py:FSTRING_START", "Py:FSTRING_FRAGMENT_START",
"Py:FSTRING_START", "Py:FSTRING_RAW_TEXT", "Py:FSTRING_RAW_TEXT", "Py:FSTRING_END",
"Py:FSTRING_FRAGMENT_END", "Py:FSTRING_END");
}
private static void doTest(LanguageLevel languageLevel, String text, String... expectedTokens) {

View File

@@ -848,6 +848,26 @@ public class PythonParsingTest extends ParsingTestCase {
doTest(LanguageLevel.PYTHON36);
}
// PY-32123
public void testFStringRawFStringInsidePlainFString() {
doTest(LanguageLevel.PYTHON36);
}
// PY-32123
public void testFStringPlainInsideRawFString() {
doTest(LanguageLevel.PYTHON36);
}
// PY-32123
public void testFStringEscapeInFormatPartOfRawLiteral() {
doTest(LanguageLevel.PYTHON36);
}
// PY-32123
public void testFStringEscapeInFormatPartOfPlainLiteral() {
doTest(LanguageLevel.PYTHON36);
}
// PY-19036
public void testAwaitInNonAsyncNestedFunction() {
doTest(LanguageLevel.PYTHON35);