PY-72185 Implement incremental parsing for PSI leaves for Python

Merge-request: IJ-MR-138193 Merged-by: Daniil Kalinin <Daniil.Kalinin@jetbrains.com> GitOrigin-RevId: 7d004677a5b7a4637ea956f13ed85abc0d88604e
2026-04-18 20:41:22 +07:00 · 2024-07-17 12:15:36 +00:00
parent 4f263103af
commit e9a6246df3
118 changed files with 967 additions and 23 deletions
--- a/python/python-parser/intellij.python.parser.iml
+++ b/python/python-parser/intellij.python.parser.iml
@@ -13,5 +13,6 @@
    <orderEntry type="module" module-name="intellij.platform.core" />
    <orderEntry type="module" module-name="intellij.platform.util" />
    <orderEntry type="library" name="fastutil-min" level="project" />
+    <orderEntry type="module" module-name="intellij.platform.core.impl" />
  </component>
 </module>
--- a/python/python-parser/src/com/jetbrains/python/PyElementTypes.java
+++ b/python/python-parser/src/com/jetbrains/python/PyElementTypes.java
@@ -99,16 +99,6 @@ public interface PyElementTypes {
  PyElementType DICT_COMP_EXPRESSION = new PyElementType("DICT_COMP_EXPRESSION", Companion.getINSTANCE().getDictCompExpressionConstructor());
  TokenSet STATEMENT_LISTS = TokenSet.create(STATEMENT_LIST);

-  TokenSet BINARY_OPS = TokenSet.create(PyTokenTypes.OR_KEYWORD, PyTokenTypes.AND_KEYWORD, PyTokenTypes.LT, PyTokenTypes.GT,
-                                        PyTokenTypes.OR, PyTokenTypes.XOR, PyTokenTypes.AND, PyTokenTypes.LTLT, PyTokenTypes.GTGT,
-                                        PyTokenTypes.EQEQ, PyTokenTypes.GE, PyTokenTypes.LE, PyTokenTypes.NE, PyTokenTypes.NE_OLD,
-                                        PyTokenTypes.IN_KEYWORD, PyTokenTypes.IS_KEYWORD, PyTokenTypes.NOT_KEYWORD, PyTokenTypes.PLUS,
-                                        PyTokenTypes.MINUS, PyTokenTypes.MULT, PyTokenTypes.AT, PyTokenTypes.FLOORDIV, PyTokenTypes.DIV,
-                                        PyTokenTypes.PERC, PyTokenTypes.EXP);
-
-  TokenSet UNARY_OPS = TokenSet.create(PyTokenTypes.NOT_KEYWORD, PyTokenTypes.PLUS, PyTokenTypes.MINUS, PyTokenTypes.TILDE,
-                                       PyTokenTypes.AWAIT_KEYWORD);
-
  // Parts
  PyElementType IF_PART_IF = new PyElementType("IF_IF", Companion.getINSTANCE().getIfPartIfConstructor());
  PyElementType IF_PART_ELIF = new PyElementType("IF_ELIF", Companion.getINSTANCE().getIfPartElifConstructor());
--- a/python/python-parser/src/com/jetbrains/python/PyTokenTypes.java
+++ b/python/python-parser/src/com/jetbrains/python/PyTokenTypes.java
@@ -4,13 +4,13 @@ package com.jetbrains.python;
 import com.intellij.psi.TokenType;
 import com.intellij.psi.tree.IElementType;
 import com.intellij.psi.tree.TokenSet;
-import com.jetbrains.python.psi.PyElementType;
+import com.jetbrains.python.psi.*;

 public final class PyTokenTypes {
  private PyTokenTypes() {
  }

-  public static final PyElementType IDENTIFIER = new PyElementType("IDENTIFIER");
+  public static final PyElementType IDENTIFIER = new PyReparseableIdentifier("IDENTIFIER");
  public static final PyElementType LINE_BREAK = new PyElementType("LINE_BREAK");
  public static final PyElementType STATEMENT_BREAK = new PyElementType("STATEMENT_BREAK");
  public static final PyElementType SPACE = new PyElementType("SPACE");
@@ -19,7 +19,7 @@ public final class PyTokenTypes {
  public static final IElementType BAD_CHARACTER = TokenType.BAD_CHARACTER;
  public static final PyElementType INCONSISTENT_DEDENT = new PyElementType("INCONSISTENT_DEDENT");

-  public static final PyElementType END_OF_LINE_COMMENT = new PyElementType("END_OF_LINE_COMMENT");
+  public static final PyElementType END_OF_LINE_COMMENT = new PyReparseableEndOfLineCommentType("END_OF_LINE_COMMENT");

  public static final PyElementType AND_KEYWORD = new PyElementType("AND_KEYWORD");
  public static final PyElementType AS_KEYWORD = new PyElementType("AS_KEYWORD");
@@ -69,12 +69,12 @@ public final class PyTokenTypes {
  public static final PyElementType FLOAT_LITERAL = new PyElementType("FLOAT_LITERAL");
  public static final PyElementType IMAGINARY_LITERAL = new PyElementType("IMAGINARY_LITERAL");

-  public static final PyElementType SINGLE_QUOTED_STRING = new PyElementType("SINGLE_QUOTED_STRING");
-  public static final PyElementType TRIPLE_QUOTED_STRING = new PyElementType("TRIPLE_QUOTED_STRING");
+  public static final PyElementType SINGLE_QUOTED_STRING = new PyReparseableSingleQuotedStringTokenType("SINGLE_QUOTED_STRING");
+  public static final PyElementType TRIPLE_QUOTED_STRING = new PyReparseableTripleQuotedStringTokenType("TRIPLE_QUOTED_STRING");
  public static final PyElementType SINGLE_QUOTED_UNICODE = new PyElementType("SINGLE_QUOTED_UNICODE");
  public static final PyElementType TRIPLE_QUOTED_UNICODE = new PyElementType("TRIPLE_QUOTED_UNICODE");

-  public static final PyElementType DOCSTRING = new PyElementType("DOCSTRING");
+  public static final PyElementType DOCSTRING = new PyReparseableTripleQuotedStringTokenType("DOCSTRING");

  public static final TokenSet UNICODE_NODES = TokenSet.create(TRIPLE_QUOTED_UNICODE, SINGLE_QUOTED_UNICODE);
  public static final TokenSet TRIPLE_NODES = TokenSet.create(TRIPLE_QUOTED_UNICODE, TRIPLE_QUOTED_STRING);
@@ -142,6 +142,15 @@ public final class PyTokenTypes {
  public static final TokenSet COMPARISON_OPERATIONS = TokenSet.create(
      LT, GT, EQEQ, GE, LE, NE, NE_OLD, IN_KEYWORD, IS_KEYWORD, NOT_KEYWORD);

+  public static final TokenSet UNARY_OPS = TokenSet.create(NOT_KEYWORD, PLUS, MINUS, TILDE, AWAIT_KEYWORD);
+
+  public static final TokenSet BINARY_OPS = TokenSet.create(OR_KEYWORD, AND_KEYWORD, LT, GT,
+                                                            OR, XOR, AND, LTLT, GTGT,
+                                                            EQEQ, GE, LE, NE, NE_OLD,
+                                                            IN_KEYWORD, IS_KEYWORD, NOT_KEYWORD, PLUS,
+                                                            MINUS, MULT, AT, FLOORDIV, DIV,
+                                                            PERC, EXP);
+
  public static final TokenSet SHIFT_OPERATIONS = TokenSet.create(LTLT, GTGT);
  public static final TokenSet ADDITIVE_OPERATIONS = TokenSet.create(PLUS, MINUS);
  public static final TokenSet MULTIPLICATIVE_OPERATIONS = TokenSet.create(MULT, AT, FLOORDIV, DIV, PERC);
--- a/python/python-parser/src/com/jetbrains/python/psi/PyReparseableEndOfLineCommentType.java
+++ b/python/python-parser/src/com/jetbrains/python/psi/PyReparseableEndOfLineCommentType.java
@@ -0,0 +1,15 @@
+package com.jetbrains.python.psi;
+
+import org.jetbrains.annotations.NotNull;
+
+public class PyReparseableEndOfLineCommentType extends PyReparseableTokenTypeWithSimpleCheck {
+
+  public PyReparseableEndOfLineCommentType(@NotNull String debugName) {
+    super(debugName);
+  }
+
+  @Override
+  public boolean isReparseable(@NotNull String newText) {
+    return newText.startsWith("#");
+  }
+}
--- a/python/python-parser/src/com/jetbrains/python/psi/PyReparseableIdentifier.java
+++ b/python/python-parser/src/com/jetbrains/python/psi/PyReparseableIdentifier.java
@@ -0,0 +1,99 @@
+package com.jetbrains.python.psi;
+
+import com.intellij.lang.ASTFactory;
+import com.intellij.lang.ASTNode;
+import com.intellij.openapi.diagnostic.Logger;
+import com.intellij.openapi.util.registry.Registry;
+import com.intellij.openapi.util.text.StringUtil;
+import com.intellij.psi.PsiWhiteSpace;
+import com.intellij.psi.impl.source.tree.TreeUtil;
+import com.jetbrains.python.lexer.PythonLexer;
+import org.jetbrains.annotations.NotNull;
+import org.jetbrains.annotations.Nullable;
+
+public class PyReparseableIdentifier extends PyReparseableTokenType {
+
+  @SuppressWarnings("LoggerInitializedWithForeignClass")
+  private static final Logger LOG = Logger.getInstance(PyReparseableTokenType.class);
+
+  public PyReparseableIdentifier(@NotNull String debugName) {
+    super(debugName);
+  }
+
+  @Override
+  public @Nullable ASTNode reparseLeaf(@NotNull ASTNode leaf, @NotNull CharSequence newText) {
+    if (!Registry.is("python.ast.leaves.incremental.reparse")) {
+      return null;
+    }
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Attempting to reparse leaf element of type" + this
+                + "\nold text: \n" + leaf.getText()
+                + "\n\nnew text: \n" + newText);
+    }
+
+    if (newText.isEmpty()) {
+      LOG.debug("New text is empty");
+      return null;
+    }
+
+    var lexingContainer = leaf.getTreeParent();
+    if (lexingContainer == null) {
+      LOG.debug("No re-lexing container for a leaf");
+      return null;
+    }
+
+    var originalCharSequence = lexingContainer.getChars();
+    var originalLeafRangeInLexingContainer = leaf.getTextRange().shiftLeft(lexingContainer.getStartOffset());
+    var updatedCharSequence = StringUtil.replaceSubSequence(
+      originalCharSequence, originalLeafRangeInLexingContainer.getStartOffset(), originalLeafRangeInLexingContainer.getEndOffset(),
+      newText);
+
+
+    var currentLeaf = TreeUtil.findFirstLeaf(lexingContainer);
+    PythonLexer lexer = new PythonLexer();
+
+    lexer.start(updatedCharSequence);
+
+    while (true) {
+      if (currentLeaf == null) {
+        LOG.debug("We are out of original leaves");
+        return null;
+      }
+
+      var tokenType = lexer.getTokenType();
+
+      if (currentLeaf instanceof PsiWhiteSpace) {
+        currentLeaf = TreeUtil.nextLeaf(currentLeaf);
+        lexer.advance();
+        continue;
+      }
+
+      if (tokenType != currentLeaf.getElementType()) {
+        LOG.debug("Wrong token type lexed: ", tokenType, " instead of ", currentLeaf.getElementType());
+        return null;
+      }
+
+      var currentLeafRangeInLexingContainer = currentLeaf.getTextRange().shiftLeft(lexingContainer.getStartOffset());
+      if (currentLeaf == leaf) {
+        var expectedEndOffset = currentLeafRangeInLexingContainer.getStartOffset() + newText.length();
+        if (lexer.getTokenEnd() != expectedEndOffset) {
+          LOG.debug("Wrong end offset, got ", lexer.getTokenEnd(), " instead of ", expectedEndOffset);
+          return null;
+        }
+        break;
+      }
+      else if (currentLeafRangeInLexingContainer.getEndOffset() != lexer.getTokenEnd()) {
+        LOG.debug("Wrong token end offset for: ", tokenType,
+                  "; got ", lexer.getTokenEnd(),
+                  " instead of ", currentLeafRangeInLexingContainer.getEndOffset());
+        return null;
+      }
+
+      currentLeaf = TreeUtil.nextLeaf(currentLeaf);
+      lexer.advance();
+    }
+
+    LOG.debug("Reparse is successful");
+    return ASTFactory.leaf(this, newText);
+  }
+}
--- a/python/python-parser/src/com/jetbrains/python/psi/PyReparseableSingleQuotedStringTokenType.java
+++ b/python/python-parser/src/com/jetbrains/python/psi/PyReparseableSingleQuotedStringTokenType.java
@@ -0,0 +1,36 @@
+package com.jetbrains.python.psi;
+
+import com.intellij.psi.tree.IElementType;
+import com.jetbrains.python.PyTokenTypes;
+import com.jetbrains.python.lexer.PythonLexer;
+import org.jetbrains.annotations.NotNull;
+
+public class PyReparseableSingleQuotedStringTokenType extends PyReparseableTokenTypeWithSimpleCheck {
+
+  public PyReparseableSingleQuotedStringTokenType(@NotNull String debugName) {
+    super(debugName);
+  }
+
+  @Override
+  public boolean isReparseable(@NotNull String newText) {
+    if (!isSingleQuotedString(newText) || isChangedToTripleQuoted(newText)) { // fail-fast
+      return false;
+    }
+    PythonLexer lexer = new PythonLexer();
+    lexer.start(newText);
+    IElementType firstTokenType = lexer.getTokenType();
+    lexer.advance();
+    IElementType nextTokenType = lexer.getTokenType();
+    return firstTokenType == PyTokenTypes.DOCSTRING && nextTokenType == null;
+  }
+
+  private static boolean isSingleQuotedString(@NotNull String newText) {
+    return (newText.startsWith("\"") && newText.endsWith("\"")) ||
+           (newText.startsWith("'") && newText.endsWith("'"));
+  }
+
+  private static boolean isChangedToTripleQuoted(@NotNull String newText) {
+    return (newText.startsWith("\"\"\"") && newText.endsWith("\"\"\"")) ||
+           (newText.startsWith("'''") && newText.endsWith("'''"));
+  }
+}
--- a/python/python-parser/src/com/jetbrains/python/psi/PyReparseableTokenType.java
+++ b/python/python-parser/src/com/jetbrains/python/psi/PyReparseableTokenType.java
@@ -0,0 +1,12 @@
+package com.jetbrains.python.psi;
+
+import com.intellij.lang.ASTNode;
+import com.intellij.psi.tree.IReparseableLeafElementType;
+import org.jetbrains.annotations.NotNull;
+
+public abstract class PyReparseableTokenType extends PyElementType implements IReparseableLeafElementType<ASTNode> {
+
+  public PyReparseableTokenType(@NotNull String debugName) {
+    super(debugName);
+  }
+}
--- a/python/python-parser/src/com/jetbrains/python/psi/PyReparseableTokenTypeWithSimpleCheck.java
+++ b/python/python-parser/src/com/jetbrains/python/psi/PyReparseableTokenTypeWithSimpleCheck.java
@@ -0,0 +1,39 @@
+package com.jetbrains.python.psi;
+
+import com.intellij.lang.ASTFactory;
+import com.intellij.lang.ASTNode;
+import com.intellij.openapi.diagnostic.Logger;
+import com.intellij.openapi.util.registry.Registry;
+import org.jetbrains.annotations.NotNull;
+import org.jetbrains.annotations.Nullable;
+
+public abstract class PyReparseableTokenTypeWithSimpleCheck extends PyReparseableTokenType {
+
+  @SuppressWarnings("LoggerInitializedWithForeignClass")
+  private static final Logger LOG = Logger.getInstance(PyReparseableTokenType.class);
+
+  public PyReparseableTokenTypeWithSimpleCheck(@NotNull String debugName) {
+    super(debugName);
+  }
+
+  @Override
+  public @Nullable ASTNode reparseLeaf(@NotNull ASTNode leaf, @NotNull CharSequence newText) {
+    if (!Registry.is("python.ast.leaves.incremental.reparse")) {
+      return null;
+    }
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Attempting to reparse leaf element of type" + this
+                + "\nold text: \n" + leaf.getText()
+                + "\n\nnew text: \n" + newText);
+    }
+
+    if (isReparseable(newText.toString())) {
+      LOG.debug("Reparse is successful");
+      return ASTFactory.leaf(this, newText);
+    }
+    LOG.debug("Reparse is declined");
+    return null;
+  }
+
+  public abstract boolean isReparseable(@NotNull String newText);
+}
--- a/python/python-parser/src/com/jetbrains/python/psi/PyReparseableTripleQuotedStringTokenType.java
+++ b/python/python-parser/src/com/jetbrains/python/psi/PyReparseableTripleQuotedStringTokenType.java
@@ -0,0 +1,31 @@
+package com.jetbrains.python.psi;
+
+import com.intellij.psi.tree.IElementType;
+import com.jetbrains.python.PyTokenTypes;
+import com.jetbrains.python.lexer.PythonLexer;
+import org.jetbrains.annotations.NotNull;
+
+public class PyReparseableTripleQuotedStringTokenType extends PyReparseableTokenTypeWithSimpleCheck {
+
+  public PyReparseableTripleQuotedStringTokenType(@NotNull String debugName) {
+    super(debugName);
+  }
+
+  @Override
+  public boolean isReparseable(@NotNull String newText) {
+    if (!isTripleQuotedString(newText)) { // fail-fast
+      return false;
+    }
+    PythonLexer lexer = new PythonLexer();
+    lexer.start(newText);
+    IElementType firstTokenType = lexer.getTokenType();
+    lexer.advance();
+    IElementType nextTokenType = lexer.getTokenType();
+    return firstTokenType == PyTokenTypes.DOCSTRING && nextTokenType == null;
+  }
+
+  private static boolean isTripleQuotedString(@NotNull String newText) {
+    return (newText.startsWith("\"\"\"") && newText.endsWith("\"\"\"")) ||
+           (newText.startsWith("'''") && newText.endsWith("'''"));
+  }
+}