PY-72185 Implement incremental parsing for PSI leaves for Python

Merge-request: IJ-MR-138193
Merged-by: Daniil Kalinin <Daniil.Kalinin@jetbrains.com>

GitOrigin-RevId: 7d004677a5b7a4637ea956f13ed85abc0d88604e
This commit is contained in:
Daniil Kalinin
2024-07-17 12:15:36 +00:00
committed by intellij-monorepo-bot
parent 4f263103af
commit e9a6246df3
118 changed files with 967 additions and 23 deletions

View File

@@ -13,5 +13,6 @@
<orderEntry type="module" module-name="intellij.platform.core" />
<orderEntry type="module" module-name="intellij.platform.util" />
<orderEntry type="library" name="fastutil-min" level="project" />
<orderEntry type="module" module-name="intellij.platform.core.impl" />
</component>
</module>

View File

@@ -99,16 +99,6 @@ public interface PyElementTypes {
PyElementType DICT_COMP_EXPRESSION = new PyElementType("DICT_COMP_EXPRESSION", Companion.getINSTANCE().getDictCompExpressionConstructor());
TokenSet STATEMENT_LISTS = TokenSet.create(STATEMENT_LIST);
TokenSet BINARY_OPS = TokenSet.create(PyTokenTypes.OR_KEYWORD, PyTokenTypes.AND_KEYWORD, PyTokenTypes.LT, PyTokenTypes.GT,
PyTokenTypes.OR, PyTokenTypes.XOR, PyTokenTypes.AND, PyTokenTypes.LTLT, PyTokenTypes.GTGT,
PyTokenTypes.EQEQ, PyTokenTypes.GE, PyTokenTypes.LE, PyTokenTypes.NE, PyTokenTypes.NE_OLD,
PyTokenTypes.IN_KEYWORD, PyTokenTypes.IS_KEYWORD, PyTokenTypes.NOT_KEYWORD, PyTokenTypes.PLUS,
PyTokenTypes.MINUS, PyTokenTypes.MULT, PyTokenTypes.AT, PyTokenTypes.FLOORDIV, PyTokenTypes.DIV,
PyTokenTypes.PERC, PyTokenTypes.EXP);
TokenSet UNARY_OPS = TokenSet.create(PyTokenTypes.NOT_KEYWORD, PyTokenTypes.PLUS, PyTokenTypes.MINUS, PyTokenTypes.TILDE,
PyTokenTypes.AWAIT_KEYWORD);
// Parts
PyElementType IF_PART_IF = new PyElementType("IF_IF", Companion.getINSTANCE().getIfPartIfConstructor());
PyElementType IF_PART_ELIF = new PyElementType("IF_ELIF", Companion.getINSTANCE().getIfPartElifConstructor());

View File

@@ -4,13 +4,13 @@ package com.jetbrains.python;
import com.intellij.psi.TokenType;
import com.intellij.psi.tree.IElementType;
import com.intellij.psi.tree.TokenSet;
import com.jetbrains.python.psi.PyElementType;
import com.jetbrains.python.psi.*;
public final class PyTokenTypes {
private PyTokenTypes() {
}
public static final PyElementType IDENTIFIER = new PyElementType("IDENTIFIER");
public static final PyElementType IDENTIFIER = new PyReparseableIdentifier("IDENTIFIER");
public static final PyElementType LINE_BREAK = new PyElementType("LINE_BREAK");
public static final PyElementType STATEMENT_BREAK = new PyElementType("STATEMENT_BREAK");
public static final PyElementType SPACE = new PyElementType("SPACE");
@@ -19,7 +19,7 @@ public final class PyTokenTypes {
public static final IElementType BAD_CHARACTER = TokenType.BAD_CHARACTER;
public static final PyElementType INCONSISTENT_DEDENT = new PyElementType("INCONSISTENT_DEDENT");
public static final PyElementType END_OF_LINE_COMMENT = new PyElementType("END_OF_LINE_COMMENT");
public static final PyElementType END_OF_LINE_COMMENT = new PyReparseableEndOfLineCommentType("END_OF_LINE_COMMENT");
public static final PyElementType AND_KEYWORD = new PyElementType("AND_KEYWORD");
public static final PyElementType AS_KEYWORD = new PyElementType("AS_KEYWORD");
@@ -69,12 +69,12 @@ public final class PyTokenTypes {
public static final PyElementType FLOAT_LITERAL = new PyElementType("FLOAT_LITERAL");
public static final PyElementType IMAGINARY_LITERAL = new PyElementType("IMAGINARY_LITERAL");
public static final PyElementType SINGLE_QUOTED_STRING = new PyElementType("SINGLE_QUOTED_STRING");
public static final PyElementType TRIPLE_QUOTED_STRING = new PyElementType("TRIPLE_QUOTED_STRING");
public static final PyElementType SINGLE_QUOTED_STRING = new PyReparseableSingleQuotedStringTokenType("SINGLE_QUOTED_STRING");
public static final PyElementType TRIPLE_QUOTED_STRING = new PyReparseableTripleQuotedStringTokenType("TRIPLE_QUOTED_STRING");
public static final PyElementType SINGLE_QUOTED_UNICODE = new PyElementType("SINGLE_QUOTED_UNICODE");
public static final PyElementType TRIPLE_QUOTED_UNICODE = new PyElementType("TRIPLE_QUOTED_UNICODE");
public static final PyElementType DOCSTRING = new PyElementType("DOCSTRING");
public static final PyElementType DOCSTRING = new PyReparseableTripleQuotedStringTokenType("DOCSTRING");
public static final TokenSet UNICODE_NODES = TokenSet.create(TRIPLE_QUOTED_UNICODE, SINGLE_QUOTED_UNICODE);
public static final TokenSet TRIPLE_NODES = TokenSet.create(TRIPLE_QUOTED_UNICODE, TRIPLE_QUOTED_STRING);
@@ -142,6 +142,15 @@ public final class PyTokenTypes {
public static final TokenSet COMPARISON_OPERATIONS = TokenSet.create(
LT, GT, EQEQ, GE, LE, NE, NE_OLD, IN_KEYWORD, IS_KEYWORD, NOT_KEYWORD);
public static final TokenSet UNARY_OPS = TokenSet.create(NOT_KEYWORD, PLUS, MINUS, TILDE, AWAIT_KEYWORD);
public static final TokenSet BINARY_OPS = TokenSet.create(OR_KEYWORD, AND_KEYWORD, LT, GT,
OR, XOR, AND, LTLT, GTGT,
EQEQ, GE, LE, NE, NE_OLD,
IN_KEYWORD, IS_KEYWORD, NOT_KEYWORD, PLUS,
MINUS, MULT, AT, FLOORDIV, DIV,
PERC, EXP);
public static final TokenSet SHIFT_OPERATIONS = TokenSet.create(LTLT, GTGT);
public static final TokenSet ADDITIVE_OPERATIONS = TokenSet.create(PLUS, MINUS);
public static final TokenSet MULTIPLICATIVE_OPERATIONS = TokenSet.create(MULT, AT, FLOORDIV, DIV, PERC);

View File

@@ -0,0 +1,15 @@
package com.jetbrains.python.psi;
import org.jetbrains.annotations.NotNull;
public class PyReparseableEndOfLineCommentType extends PyReparseableTokenTypeWithSimpleCheck {
public PyReparseableEndOfLineCommentType(@NotNull String debugName) {
super(debugName);
}
@Override
public boolean isReparseable(@NotNull String newText) {
return newText.startsWith("#");
}
}

View File

@@ -0,0 +1,99 @@
package com.jetbrains.python.psi;
import com.intellij.lang.ASTFactory;
import com.intellij.lang.ASTNode;
import com.intellij.openapi.diagnostic.Logger;
import com.intellij.openapi.util.registry.Registry;
import com.intellij.openapi.util.text.StringUtil;
import com.intellij.psi.PsiWhiteSpace;
import com.intellij.psi.impl.source.tree.TreeUtil;
import com.jetbrains.python.lexer.PythonLexer;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
public class PyReparseableIdentifier extends PyReparseableTokenType {
@SuppressWarnings("LoggerInitializedWithForeignClass")
private static final Logger LOG = Logger.getInstance(PyReparseableTokenType.class);
public PyReparseableIdentifier(@NotNull String debugName) {
super(debugName);
}
@Override
public @Nullable ASTNode reparseLeaf(@NotNull ASTNode leaf, @NotNull CharSequence newText) {
if (!Registry.is("python.ast.leaves.incremental.reparse")) {
return null;
}
if (LOG.isDebugEnabled()) {
LOG.debug("Attempting to reparse leaf element of type" + this
+ "\nold text: \n" + leaf.getText()
+ "\n\nnew text: \n" + newText);
}
if (newText.isEmpty()) {
LOG.debug("New text is empty");
return null;
}
var lexingContainer = leaf.getTreeParent();
if (lexingContainer == null) {
LOG.debug("No re-lexing container for a leaf");
return null;
}
var originalCharSequence = lexingContainer.getChars();
var originalLeafRangeInLexingContainer = leaf.getTextRange().shiftLeft(lexingContainer.getStartOffset());
var updatedCharSequence = StringUtil.replaceSubSequence(
originalCharSequence, originalLeafRangeInLexingContainer.getStartOffset(), originalLeafRangeInLexingContainer.getEndOffset(),
newText);
var currentLeaf = TreeUtil.findFirstLeaf(lexingContainer);
PythonLexer lexer = new PythonLexer();
lexer.start(updatedCharSequence);
while (true) {
if (currentLeaf == null) {
LOG.debug("We are out of original leaves");
return null;
}
var tokenType = lexer.getTokenType();
if (currentLeaf instanceof PsiWhiteSpace) {
currentLeaf = TreeUtil.nextLeaf(currentLeaf);
lexer.advance();
continue;
}
if (tokenType != currentLeaf.getElementType()) {
LOG.debug("Wrong token type lexed: ", tokenType, " instead of ", currentLeaf.getElementType());
return null;
}
var currentLeafRangeInLexingContainer = currentLeaf.getTextRange().shiftLeft(lexingContainer.getStartOffset());
if (currentLeaf == leaf) {
var expectedEndOffset = currentLeafRangeInLexingContainer.getStartOffset() + newText.length();
if (lexer.getTokenEnd() != expectedEndOffset) {
LOG.debug("Wrong end offset, got ", lexer.getTokenEnd(), " instead of ", expectedEndOffset);
return null;
}
break;
}
else if (currentLeafRangeInLexingContainer.getEndOffset() != lexer.getTokenEnd()) {
LOG.debug("Wrong token end offset for: ", tokenType,
"; got ", lexer.getTokenEnd(),
" instead of ", currentLeafRangeInLexingContainer.getEndOffset());
return null;
}
currentLeaf = TreeUtil.nextLeaf(currentLeaf);
lexer.advance();
}
LOG.debug("Reparse is successful");
return ASTFactory.leaf(this, newText);
}
}

View File

@@ -0,0 +1,36 @@
package com.jetbrains.python.psi;
import com.intellij.psi.tree.IElementType;
import com.jetbrains.python.PyTokenTypes;
import com.jetbrains.python.lexer.PythonLexer;
import org.jetbrains.annotations.NotNull;
public class PyReparseableSingleQuotedStringTokenType extends PyReparseableTokenTypeWithSimpleCheck {
public PyReparseableSingleQuotedStringTokenType(@NotNull String debugName) {
super(debugName);
}
@Override
public boolean isReparseable(@NotNull String newText) {
if (!isSingleQuotedString(newText) || isChangedToTripleQuoted(newText)) { // fail-fast
return false;
}
PythonLexer lexer = new PythonLexer();
lexer.start(newText);
IElementType firstTokenType = lexer.getTokenType();
lexer.advance();
IElementType nextTokenType = lexer.getTokenType();
return firstTokenType == PyTokenTypes.DOCSTRING && nextTokenType == null;
}
private static boolean isSingleQuotedString(@NotNull String newText) {
return (newText.startsWith("\"") && newText.endsWith("\"")) ||
(newText.startsWith("'") && newText.endsWith("'"));
}
private static boolean isChangedToTripleQuoted(@NotNull String newText) {
return (newText.startsWith("\"\"\"") && newText.endsWith("\"\"\"")) ||
(newText.startsWith("'''") && newText.endsWith("'''"));
}
}

View File

@@ -0,0 +1,12 @@
package com.jetbrains.python.psi;
import com.intellij.lang.ASTNode;
import com.intellij.psi.tree.IReparseableLeafElementType;
import org.jetbrains.annotations.NotNull;
public abstract class PyReparseableTokenType extends PyElementType implements IReparseableLeafElementType<ASTNode> {
public PyReparseableTokenType(@NotNull String debugName) {
super(debugName);
}
}

View File

@@ -0,0 +1,39 @@
package com.jetbrains.python.psi;
import com.intellij.lang.ASTFactory;
import com.intellij.lang.ASTNode;
import com.intellij.openapi.diagnostic.Logger;
import com.intellij.openapi.util.registry.Registry;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
public abstract class PyReparseableTokenTypeWithSimpleCheck extends PyReparseableTokenType {
@SuppressWarnings("LoggerInitializedWithForeignClass")
private static final Logger LOG = Logger.getInstance(PyReparseableTokenType.class);
public PyReparseableTokenTypeWithSimpleCheck(@NotNull String debugName) {
super(debugName);
}
@Override
public @Nullable ASTNode reparseLeaf(@NotNull ASTNode leaf, @NotNull CharSequence newText) {
if (!Registry.is("python.ast.leaves.incremental.reparse")) {
return null;
}
if (LOG.isDebugEnabled()) {
LOG.debug("Attempting to reparse leaf element of type" + this
+ "\nold text: \n" + leaf.getText()
+ "\n\nnew text: \n" + newText);
}
if (isReparseable(newText.toString())) {
LOG.debug("Reparse is successful");
return ASTFactory.leaf(this, newText);
}
LOG.debug("Reparse is declined");
return null;
}
public abstract boolean isReparseable(@NotNull String newText);
}

View File

@@ -0,0 +1,31 @@
package com.jetbrains.python.psi;
import com.intellij.psi.tree.IElementType;
import com.jetbrains.python.PyTokenTypes;
import com.jetbrains.python.lexer.PythonLexer;
import org.jetbrains.annotations.NotNull;
public class PyReparseableTripleQuotedStringTokenType extends PyReparseableTokenTypeWithSimpleCheck {
public PyReparseableTripleQuotedStringTokenType(@NotNull String debugName) {
super(debugName);
}
@Override
public boolean isReparseable(@NotNull String newText) {
if (!isTripleQuotedString(newText)) { // fail-fast
return false;
}
PythonLexer lexer = new PythonLexer();
lexer.start(newText);
IElementType firstTokenType = lexer.getTokenType();
lexer.advance();
IElementType nextTokenType = lexer.getTokenType();
return firstTokenType == PyTokenTypes.DOCSTRING && nextTokenType == null;
}
private static boolean isTripleQuotedString(@NotNull String newText) {
return (newText.startsWith("\"\"\"") && newText.endsWith("\"\"\"")) ||
(newText.startsWith("'''") && newText.endsWith("'''"));
}
}