[grazie] speed up text extraction on large broken XMLs

GitOrigin-RevId: 7d52d3c86ed284e8aa704a29fd64cf20444b9245
This commit is contained in:
Peter Gromov
2022-05-30 13:10:11 +02:00
committed by intellij-monorepo-bot
parent 1b21925d46
commit 1ac04d4675
2 changed files with 32 additions and 8 deletions

View File

@@ -213,7 +213,25 @@ public class TextExtractionTest extends BasePlatformTestCase {
myFixture.type(' ');
PsiDocumentManager.getInstance(getProject()).commitAllDocuments(); // drop file caches
})
.usesAllCPUCores()
.assertTiming();
}
public void testLargeXmlWithUnclosedDoctypePerformance() {
String text = "<!DOCTYPE rules [\n<!ENTITY some \"x\">\n<rules> " +
IntStreamEx.range(0, 10_000).mapToObj(i -> "<tag> content" + i + "</tag>\n").joining() +
" </rules>";
PsiFile file = myFixture.configureByText("a.xml", text);
PlatformTestUtil
.startPerformanceTest("text extraction", 1_500, () -> {
for (PsiElement element : SyntaxTraverser.psiTraverser(file)) {
TextExtractor.findTextsAt(element, TextContent.TextDomain.ALL);
}
})
.setup(() -> {
myFixture.type(' ');
PsiDocumentManager.getInstance(getProject()).commitAllDocuments(); // drop file caches
})
.assertTiming();
}

View File

@@ -24,9 +24,7 @@ import com.intellij.psi.tree.IElementType;
import com.intellij.psi.util.CachedValueProvider;
import com.intellij.psi.util.CachedValuesManager;
import com.intellij.psi.util.PsiUtilCore;
import com.intellij.psi.xml.XmlDocument;
import com.intellij.psi.xml.XmlTag;
import com.intellij.psi.xml.XmlTokenType;
import com.intellij.psi.xml.*;
import com.intellij.util.containers.ContainerUtil;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
@@ -51,8 +49,7 @@ public class XmlTextExtractor extends TextExtractor {
@Override
protected @Nullable TextContent buildTextContent(@NotNull PsiElement element,
@NotNull Set<TextContent.TextDomain> allowedDomains) {
IElementType type = PsiUtilCore.getElementType(element);
if (isText(type) && hasSuitableDialect(element)) {
if (isText(element) && hasSuitableDialect(element)) {
var classifier = tagClassifier(element);
PsiElement container = SyntaxTraverser.psiApi().parents(element)
.find(e -> e instanceof XmlDocument || e instanceof XmlTag && classifier.apply((XmlTag)e) != TagKind.Inline);
@@ -63,6 +60,7 @@ public class XmlTextExtractor extends TextExtractor {
}
}
IElementType type = PsiUtilCore.getElementType(element);
if (type == XmlTokenType.XML_COMMENT_CHARACTERS && allowedDomains.contains(COMMENTS) && hasSuitableDialect(element)) {
return builder.build(element, COMMENTS);
}
@@ -105,7 +103,7 @@ public class XmlTextExtractor extends TextExtractor {
unknownBefore = true;
}
if (isText(PsiUtilCore.getElementType(each))) {
if (isText(each)) {
group.add(each);
}
super.visitElement(each);
@@ -138,7 +136,15 @@ public class XmlTextExtractor extends TextExtractor {
return full.excludeRange(new TextRange(range.getEndOffset(), full.length())).excludeRange(new TextRange(0, range.getStartOffset()));
}
private static boolean isText(IElementType type) {
private static boolean isText(PsiElement leaf) {
PsiElement parent = leaf.getParent();
if (!(parent instanceof XmlText) &&
!(PsiUtilCore.getElementType(parent) == XmlElementType.XML_CDATA && parent.getParent() instanceof XmlText) &&
!(parent instanceof XmlDocument)) {
return false;
}
IElementType type = PsiUtilCore.getElementType(leaf);
return type == XmlTokenType.XML_WHITE_SPACE || type == TokenType.WHITE_SPACE ||
type == XmlTokenType.XML_CHAR_ENTITY_REF || type == XmlTokenType.XML_DATA_CHARACTERS;
}