IJPL-198476 Performance regression in spellcheck inspections

Merge-request: IJ-MR-170756
Merged-by: Ilia Permiashkin <ilia.permiashkin@jetbrains.com>


Merge-request: IJ-MR-170896
Merged-by: Ilia Permiashkin <ilia.permiashkin@jetbrains.com>

GitOrigin-RevId: 5b03c5ba4354ee4733447bf95ee6554592f0e79b
This commit is contained in:
Ilia Permiashkin
2025-07-31 11:12:57 +00:00
committed by intellij-monorepo-bot
parent bc22f9b8f8
commit fd075ea309
10 changed files with 2262 additions and 6 deletions

View File

@@ -20,9 +20,7 @@ import java.util.regex.Pattern;
*/
public class JsonSpellcheckerStrategy extends SpellcheckingStrategy implements DumbAware {
// JSON is often deserialized to classes,
// so we consider literals that look like typical programming language identifier to be code contexts
private static final Pattern CODE_LIKE_PATTERN = Pattern.compile("\"([a-zA-Z][a-zA-Z0-9_]*)\"");
private static final Pattern CODE_LIKE_PATTERN = Pattern.compile("\"" + CODE_IDENTIFIER_LIKE + "\"");
private final Tokenizer<JsonStringLiteral> ourStringLiteralTokenizer = new Tokenizer<>() {
@Override

View File

@@ -1,7 +1,6 @@
// Copyright 2000-2019 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license that can be found in the LICENSE file.
package com.intellij.grazie.ide.language
import ai.grazie.annotation.TestOnly
import com.intellij.grazie.GrazieTestBase
import com.intellij.openapi.components.service
import com.intellij.spellchecker.grazie.GrazieSpellCheckerEngine
@@ -12,7 +11,6 @@ class JSONSupportTest : GrazieTestBase() {
runHighlightTestForFile("ide/language/json/Example.json")
}
@OptIn(TestOnly::class)
fun `test json typos spellcheck performance`() {
Benchmark.newBenchmark("Highlight typos in i18n.json file") {
runHighlightTestForFile("ide/language/json/i18n.json")

View File

@@ -2,7 +2,10 @@
package com.intellij.grazie.ide.language
import com.intellij.grazie.GrazieTestBase
import com.intellij.openapi.components.service
import com.intellij.openapi.vfs.encoding.EncodingProjectManager
import com.intellij.spellchecker.grazie.GrazieSpellCheckerEngine
import com.intellij.tools.ide.metrics.benchmark.Benchmark
import com.intellij.util.ui.UIUtil
import java.nio.charset.StandardCharsets
@@ -15,4 +18,13 @@ class PropertiesSupportTest : GrazieTestBase() {
UIUtil.dispatchAllInvocationEvents()
runHighlightTestForFile("ide/language/properties/Example.properties")
}
fun `test properties typos spellcheck performance`() {
Benchmark.newBenchmark("Highlight typos in i18n.properties file") {
runHighlightTestForFile("ide/language/properties/i18n.properties")
}.setup {
psiManager.dropPsiCaches()
project.service<GrazieSpellCheckerEngine>().dropSuggestionCache()
}.start()
}
}

View File

@@ -4,6 +4,9 @@ package com.intellij.grazie.ide.language
import com.intellij.grazie.GrazieTestBase
import com.intellij.grazie.text.TextContent
import com.intellij.grazie.text.TextExtractor
import com.intellij.openapi.components.service
import com.intellij.spellchecker.grazie.GrazieSpellCheckerEngine
import com.intellij.tools.ide.metrics.benchmark.Benchmark
class YamlSupportTest : GrazieTestBase() {
fun `test grammar check in yaml file`() {
@@ -14,4 +17,13 @@ class YamlSupportTest : GrazieTestBase() {
val file = myFixture.configureByText("a.yaml", "foo: 'bar'")
assertEquals("bar", TextExtractor.findTextAt(file, 6, TextContent.TextDomain.ALL).toString())
}
fun `test yaml typos spellcheck performance`() {
Benchmark.newBenchmark("Highlight typos in i18n.yaml file") {
runHighlightTestForFile("ide/language/yaml/i18n.yaml")
}.setup {
psiManager.dropPsiCaches()
project.service<GrazieSpellCheckerEngine>().dropSuggestionCache()
}.start()
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -39,6 +39,11 @@ final class PropertiesSpellcheckingStrategy extends SpellcheckingStrategy implem
return super.getTokenizer(element);
}
@Override
protected boolean isLiteral(@NotNull PsiElement element) {
return !super.isComment(element);
}
private static class PropertyKeyTokenizer extends TokenizerBase<PropertyKeyImpl> {
private PropertyKeyTokenizer() {
super(PropertiesSplitter.getInstance());

View File

@@ -20,8 +20,12 @@ import org.jetbrains.yaml.psi.YAMLKeyValue;
import org.jetbrains.yaml.psi.YAMLQuotedText;
import org.jetbrains.yaml.psi.YAMLScalar;
import java.util.regex.Pattern;
final class YAMLSpellcheckerStrategy extends SpellcheckingStrategy implements DumbAware {
private static final Pattern CODE_LIKE_PATTERN = Pattern.compile("[\"']?" + CODE_IDENTIFIER_LIKE + "[\"']?");
private final Tokenizer<YAMLQuotedText> myQuotedTextTokenizer = new Tokenizer<>() {
@Override
public void tokenize(@NotNull YAMLQuotedText element, @NotNull TokenConsumer consumer) {
@@ -68,6 +72,11 @@ final class YAMLSpellcheckerStrategy extends SpellcheckingStrategy implements Du
return super.getTokenizer(element);
}
@Override
protected boolean isLiteral(@NotNull PsiElement element) {
return super.isLiteral(element) || !super.isComment(element) && !CODE_LIKE_PATTERN.matcher(element.getText()).matches();
}
private static class JsonSchemaSpellcheckerClientForYaml extends JsonSchemaSpellcheckerClient {
private final @NotNull PsiElement element;

View File

@@ -3,7 +3,6 @@
package com.intellij.spellchecker.grazie
import ai.grazie.annotation.TestOnly
import ai.grazie.nlp.langs.Language
import ai.grazie.nlp.langs.alphabet.Alphabet
import ai.grazie.nlp.phonetics.metaphone.DoubleMetaphone
@@ -43,6 +42,7 @@ import com.intellij.spellchecker.grazie.dictionary.ExtendedWordListWithFrequency
import com.intellij.spellchecker.grazie.dictionary.WordListAdapter
import com.intellij.spellchecker.grazie.ranker.DiacriticSuggestionRanker
import kotlinx.coroutines.*
import org.jetbrains.annotations.TestOnly
private const val MAX_WORD_LENGTH = 32

View File

@@ -29,6 +29,7 @@ import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;
/**
* Defines spellchecking support for a custom language.
@@ -39,6 +40,9 @@ import java.util.Set;
* Mark your strategy as {@link com.intellij.openapi.project.DumbAware} if it does not need indexes to perform
*/
public class SpellcheckingStrategy implements PossiblyDumbAware {
// Consider literals that look like typical programming language identifier to be code contexts
protected static final Pattern CODE_IDENTIFIER_LIKE = Pattern.compile("([a-zA-Z][a-zA-Z0-9_]*)");
protected final Tokenizer<PsiComment> myCommentTokenizer = new CommentTokenizer();
public static final ExtensionPointName<KeyedLazyInstance<SpellcheckingStrategy>> EP_NAME =