PY-64326 implement incremental parsing for PyStatementList PSI elements

These changes make PyStatementList elements (which are function and class bodies, cycle bodies, if-else branches, etc.) lazy-parseable which means they can now be reparsed without reparsing the whole file if changes are happened inside them accepted as safe

The main reason behind these changes is to improve performance

GitOrigin-RevId: 892acbe0c95fde6aec74b7595b0a58f902c426f5
This commit is contained in:
Daniil Kalinin
2024-05-07 13:07:09 +02:00
committed by intellij-monorepo-bot
parent eff6477fc2
commit ba6015d27f
13 changed files with 445 additions and 62 deletions

View File

@@ -465,6 +465,8 @@
description="Require marking namespace packages explicitly, treat regular directories as implicit source roots"/>
<registryKey key="python.type.hints.literal.string" defaultValue="true"
description="When enabled, activates LiteralString inference for Python string literals" />
<registryKey key="python.statement.lists.incremental.reparse" defaultValue="true"
description="Enables incremental reparse for statement lists"/>
</extensions>

View File

@@ -2,12 +2,10 @@
package com.jetbrains.python
import com.intellij.lang.ASTNode
import com.intellij.openapi.components.Service
import com.intellij.psi.PsiElement
import com.intellij.psi.stubs.IStubElementType
import com.intellij.psi.tree.IReparseableElementType
import com.jetbrains.python.psi.impl.*
import com.jetbrains.python.PyElementTypesFacade
import com.jetbrains.python.psi.PyElementType
import java.util.function.Function
@@ -54,6 +52,9 @@ class PyElementTypesFacadeImpl : PyElementTypesFacade() {
override val typeAliasStatement: IStubElementType<*, *>
get() = PyStubElementTypes.TYPE_ALIAS_STATEMENT
override val statementList: IReparseableElementType
get() = PyStatementListElementType()
override val argumentListConstructor: F
get() = F { node: ASTNode -> PyArgumentListImpl(node) }
override val printTargetConstructor: F
@@ -99,8 +100,6 @@ class PyElementTypesFacadeImpl : PyElementTypesFacade() {
get() = F { node -> PyWithStatementImpl(node) }
override val whileStatementConstructor: F
get() = F { node -> PyWhileStatementImpl(node) }
override val statementListConstructor: F
get() = F { node -> PyStatementListImpl(node) }
override val nonlocalStatementConstructor: F
get() = F { node -> PyNonlocalStatementImpl(node) }
override val withItemConstructor: F

View File

@@ -79,47 +79,7 @@ public class PyBaseElementImpl<T extends StubElement> extends StubBasedPsiElemen
*/
@Override
public PsiReference findReferenceAt(int offset) {
// copy/paste from SharedPsiElementImplUtil
PsiElement element = findElementAt(offset);
if (element == null || element instanceof OuterLanguageElement) return null;
offset = getTextRange().getStartOffset() + offset - element.getTextRange().getStartOffset();
List<PsiReference> referencesList = new ArrayList<>();
final PsiFile file = element.getContainingFile();
final var context =
file != null ? TypeEvalContext.codeAnalysis(file.getProject(), file) : TypeEvalContext.codeInsightFallback(element.getProject());
final PyResolveContext resolveContext = PyResolveContext.implicitContext(context);
while (element != null) {
addReferences(offset, element, referencesList, resolveContext);
offset = element.getStartOffsetInParent() + offset;
if (element instanceof PsiFile) break;
element = element.getParent();
}
if (referencesList.isEmpty()) return null;
if (referencesList.size() == 1) return referencesList.get(0);
return new PsiMultiReference(referencesList.toArray(PsiReference.EMPTY_ARRAY),
referencesList.get(referencesList.size() - 1).getElement());
}
private static void addReferences(int offset, PsiElement element, final Collection<PsiReference> outReferences,
PyResolveContext resolveContext) {
final PsiReference[] references;
if (element instanceof PyReferenceOwner owner) {
final PsiPolyVariantReference reference = owner.getReference(resolveContext);
references = new PsiReference[]{reference};
}
else {
references = element.getReferences();
}
for (final PsiReference reference : references) {
for (TextRange range : ReferenceRange.getRanges(reference)) {
assert range != null : reference;
if (range.containsOffset(offset)) {
outReferences.add(reference);
}
}
}
return findReferenceAt(this, offset);
}
@Nullable
@@ -140,4 +100,50 @@ public class PyBaseElementImpl<T extends StubElement> extends StubBasedPsiElemen
public <E extends PsiElement> @Nullable E getStubOrPsiParentOfType(@NotNull Class<E> parentClass) {
return super.getStubOrPsiParentOfType(parentClass);
}
@Nullable
static PsiReference findReferenceAt(@NotNull PsiElement target, int offset) {
PsiElement element = target.findElementAt(offset);
if (element == null || element instanceof OuterLanguageElement) return null;
offset = target.getTextRange().getStartOffset() + offset - element.getTextRange().getStartOffset();
List<PsiReference> referencesList = new ArrayList<>();
final PsiFile file = element.getContainingFile();
final var context =
file != null ? TypeEvalContext.codeAnalysis(file.getProject(), file) : TypeEvalContext.codeInsightFallback(element.getProject());
final PyResolveContext resolveContext = PyResolveContext.implicitContext(context);
while (element != null) {
addReferences(offset, element, referencesList, resolveContext);
offset = element.getStartOffsetInParent() + offset;
if (element instanceof PsiFile) break;
element = element.getParent();
}
if (referencesList.isEmpty()) return null;
if (referencesList.size() == 1) return referencesList.get(0);
return new PsiMultiReference(referencesList.toArray(PsiReference.EMPTY_ARRAY),
referencesList.get(referencesList.size() - 1).getElement());
}
private static void addReferences(int offset,
@NotNull PsiElement element,
@NotNull final Collection<PsiReference> outReferences,
@NotNull PyResolveContext resolveContext) {
final PsiReference[] references;
if (element instanceof PyReferenceOwner owner) {
final PsiPolyVariantReference reference = owner.getReference(resolveContext);
references = new PsiReference[]{reference};
}
else {
references = element.getReferences();
}
for (final PsiReference reference : references) {
for (TextRange range : ReferenceRange.getRanges(reference)) {
assert range != null : reference;
if (range.containsOffset(offset)) {
outReferences.add(reference);
}
}
}
}
}

View File

@@ -0,0 +1,47 @@
package com.jetbrains.python.psi.impl;
import com.intellij.psi.PsiElementVisitor;
import com.intellij.psi.PsiReference;
import com.intellij.psi.impl.source.tree.LazyParseablePsiElement;
import com.intellij.psi.tree.IElementType;
import com.jetbrains.python.PythonFileType;
import com.jetbrains.python.PythonLanguage;
import com.jetbrains.python.psi.PyElement;
import com.jetbrains.python.psi.PyElementVisitor;
import com.jetbrains.python.psi.PyUtil;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
public class PyLazyParseablePsiElement extends LazyParseablePsiElement implements PyElement {
public PyLazyParseablePsiElement(@NotNull IElementType type,
@Nullable CharSequence buffer) {
super(type, buffer);
}
@Override
public void accept(@NotNull PsiElementVisitor visitor) {
PyUtil.verboseOnly(() -> PyPsiUtils.assertValid(this));
if (visitor instanceof PyElementVisitor) {
acceptPyVisitor(((PyElementVisitor)visitor));
}
else {
super.accept(visitor);
}
}
protected void acceptPyVisitor(PyElementVisitor pyVisitor) {
pyVisitor.visitPyElement(this);
}
@NotNull
@Override
public PythonLanguage getLanguage() {
return (PythonLanguage)PythonFileType.INSTANCE.getLanguage();
}
@Override
public PsiReference findReferenceAt(int offset) {
return PyBaseElementImpl.findReferenceAt(this, offset);
}
}

View File

@@ -0,0 +1,172 @@
package com.jetbrains.python.psi.impl;
import com.intellij.lang.ASTNode;
import com.intellij.lang.Language;
import com.intellij.lang.PsiBuilder;
import com.intellij.lang.PsiBuilderFactory;
import com.intellij.lexer.Lexer;
import com.intellij.openapi.diagnostic.Logger;
import com.intellij.openapi.project.Project;
import com.intellij.openapi.util.Key;
import com.intellij.openapi.util.registry.Registry;
import com.intellij.psi.PsiElement;
import com.intellij.psi.PsiErrorElement;
import com.intellij.psi.PsiFile;
import com.intellij.psi.tree.ICompositeElementType;
import com.intellij.util.containers.ContainerUtil;
import com.jetbrains.python.PyTokenTypes;
import com.jetbrains.python.PythonLanguage;
import com.jetbrains.python.lexer.PythonIndentingLexer;
import com.jetbrains.python.lexer.PythonIndentingLexerForLazyElements;
import com.jetbrains.python.parsing.PyLazyParser;
import com.jetbrains.python.psi.LanguageLevel;
import com.jetbrains.python.psi.PyFile;
import com.jetbrains.python.psi.PyIndentUtil;
import com.jetbrains.python.psi.PyReparseableElementType;
import org.jetbrains.annotations.NotNull;
public class PyStatementListElementType extends PyReparseableElementType implements ICompositeElementType {
public PyStatementListElementType() {
super("PyStatementList");
}
@SuppressWarnings("LoggerInitializedWithForeignClass")
private static final Logger LOG = Logger.getInstance(PyReparseableElementType.class);
public static final Key<LanguageLevel> LANGUAGE_LEVEL_KEY = Key.create("LANGUAGE_LEVEL_FOR_REPARSEABLE_ELEMENT");
public static final Key<Integer> BASE_INDENT_KEY = Key.create("FIRST_LINE_INDENT_FOR_REPARSEABLE_ELEMENT");
@Override
public boolean isReparseable(@NotNull ASTNode currentNode,
@NotNull CharSequence newText,
@NotNull Language fileLanguage,
@NotNull Project project) {
if (!Registry.is("python.statement.lists.incremental.reparse")) {
return false;
}
if (LOG.isDebugEnabled()) {
LOG.debug("Attempting to reparse lazy element of type " + this
+ "\nparent: " + currentNode.getTreeParent()
+ "\nold text: \n" + currentNode.getText()
+ "\n\nnew text: \n" + newText);
}
if (newText.isEmpty() || !fileLanguage.is(PythonLanguage.INSTANCE)) { // do not reparse Cython statement lists
return false;
}
// Error elements on top level of previous statement list may cause some errors to remain in the PSI tree after reparse
boolean parentContainsErrors =
ContainerUtil.findInstance(((PyStatementListImpl)currentNode).getChildren(), PsiErrorElement.class) != null;
if (parentContainsErrors) {
LOG.debug("Previous node contains PsiErrorElement, reparse is declined");
return false;
}
boolean isAfterColonOnSameLine = isAfterColonOnSameLine((PyStatementListImpl)currentNode);
String firstLineIndent = isAfterColonOnSameLine ? "" : PyIndentUtil.getElementIndent(currentNode.getPsi());
PythonIndentingLexerForLazyElements lexer = new PythonIndentingLexerForLazyElements(firstLineIndent.length());
return checkIndentDedentBalanceWithLexer(newText, lexer, isAfterColonOnSameLine);
}
private static boolean isAfterColonOnSameLine(PsiElement currentNode) {
PsiElement prevSibling = PyPsiUtils.getPrevNonWhitespaceSiblingOnSameLine(currentNode);
return prevSibling != null && prevSibling.getNode().getElementType() == PyTokenTypes.COLON;
}
/**
* Checks the balance between the number of INDENT and DEDENT tokens in the given text using the provided lexer.
* Negative balance immediately indicates that given statement list is no longer reparseable.
* Any positive balance or zero may be incorrect, but does not make a new statement list unparseable.
*
* @param text the input text to check
* @param lexer the lexer to use for tokenizing the text
* @return true if the balance is positive or zero, otherwise - false
*/
public static boolean checkIndentDedentBalanceWithLexer(@NotNull CharSequence text, @NotNull Lexer lexer, boolean isOnTheSameLine) {
lexer.start(text);
int balance = isOnTheSameLine ? 0 : -1;
while (lexer.getTokenType() != null) {
if (lexer.getTokenType() == PyTokenTypes.INDENT) {
balance++;
}
else if (lexer.getTokenType() == PyTokenTypes.DEDENT) {
balance--;
}
if (balance < 0) {
LOG.debug("Indent/Dedent balance is negative, incremental reparse declined");
return false;
}
lexer.advance();
}
return true; // positive balance is safe
}
@Override
public boolean isValidReparse(@NotNull ASTNode oldNode, @NotNull ASTNode newNode) {
PsiFile file = oldNode.getPsi().getContainingFile();
String firstLineIndent = isAfterColonOnSameLine(oldNode.getPsi()) ? "" : PyIndentUtil.getElementIndent(oldNode.getPsi());
if (!(file instanceof PyFile)) return false;
LanguageLevel languageLevel = LanguageLevel.forElement(file);
newNode.putUserData(LANGUAGE_LEVEL_KEY, languageLevel);
newNode.putUserData(BASE_INDENT_KEY, firstLineIndent.length());
ASTNode tmp = newNode.getFirstChildNode();
if (tmp == null) {
return false;
}
LOG.debug("Element of type " + this + " reparsed successfully");
return true;
}
@Override
protected ASTNode doParseContents(@NotNull ASTNode chameleon, @NotNull PsiElement psi) {
PsiElement parentPsiElement = chameleon.getTreeParent().getPsi();
assert parentPsiElement != null : "parent psi is null: " + chameleon;
Integer indent = chameleon.getUserData(BASE_INDENT_KEY);
assert indent != null;
LanguageLevel languageLevel = chameleon.getUserData(LANGUAGE_LEVEL_KEY);
if (languageLevel == null) {
languageLevel = LanguageLevel.getDefault();
}
PythonIndentingLexer lexer = new PythonIndentingLexerForLazyElements(indent.intValue());
LOG.debug("Performing lazy reparse for element of type " + this);
final PsiBuilder builder = createBuilder(parentPsiElement, chameleon, lexer);
final PyLazyParser parser = new PyLazyParser();
parser.setLanguageLevel(languageLevel);
return parser.parseLazyElement(this, builder, languageLevel, PyLazyParser::parseStatementList);
}
@NotNull
private static PsiBuilder createBuilder(@NotNull PsiElement parentPsi, @NotNull ASTNode chameleon, @NotNull Lexer lexer) {
Language languageForParser = PythonLanguage.INSTANCE;
return PsiBuilderFactory.getInstance().createBuilder(parentPsi.getProject(), chameleon, lexer, languageForParser, chameleon.getChars());
}
@Override
public ASTNode createNode(CharSequence text) {
return new PyStatementListImpl(text);
}
@Override
public @NotNull ASTNode createCompositeNode() {
return new PyStatementListImpl(this, null);
}
@Override
public String toString() {
return "PyStatementList";
}
}

View File

@@ -5,22 +5,25 @@ import com.intellij.lang.ASTFactory;
import com.intellij.lang.ASTNode;
import com.intellij.psi.PsiElement;
import com.intellij.psi.TokenType;
import com.intellij.psi.impl.source.tree.TreeElement;
import com.intellij.psi.tree.IElementType;
import com.jetbrains.python.PyElementTypes;
import com.jetbrains.python.PythonDialectsTokenSetProvider;
import com.jetbrains.python.psi.PyElementGenerator;
import com.jetbrains.python.psi.PyElementVisitor;
import com.jetbrains.python.psi.PyStatement;
import com.jetbrains.python.psi.PyStatementList;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
public class PyStatementListImpl extends PyElementImpl implements PyStatementList {
public PyStatementListImpl(ASTNode astNode) {
super(astNode);
public class PyStatementListImpl extends PyLazyParseablePsiElement implements PyStatementList {
public PyStatementListImpl(@NotNull IElementType type, @Nullable CharSequence buffer) {
super(type, buffer);
}
@Override
protected void acceptPyVisitor(PyElementVisitor pyVisitor) {
pyVisitor.visitPyStatementList(this);
public PyStatementListImpl(@Nullable CharSequence buffer) {
super(PyElementTypes.STATEMENT_LIST, buffer);
}
@Override
@@ -29,7 +32,7 @@ public class PyStatementListImpl extends PyElementImpl implements PyStatementLis
}
@Override
public ASTNode addInternal(ASTNode first, ASTNode last, ASTNode anchor, Boolean before) {
public TreeElement addInternal(TreeElement first, ASTNode last, ASTNode anchor, Boolean before) {
if (first.getPsi() instanceof PyStatement && getStatements().length == 1) {
ASTNode treePrev = getNode().getTreePrev();
if (treePrev != null && treePrev.getElementType() == TokenType.WHITE_SPACE && !treePrev.textContains('\n')) {
@@ -49,4 +52,9 @@ public class PyStatementListImpl extends PyElementImpl implements PyStatementLis
}
super.deleteChildInternal(child);
}
}
@Override
public String toString() {
return "PyStatementList";
}
}