IJPL-198476 Performance regression in spellcheck inspections

Merge-request: IJ-MR-170756 Merged-by: Ilia Permiashkin <ilia.permiashkin@jetbrains.com> Merge-request: IJ-MR-170896 Merged-by: Ilia Permiashkin <ilia.permiashkin@jetbrains.com> GitOrigin-RevId: 5b03c5ba4354ee4733447bf95ee6554592f0e79b
2026-03-22 06:50:54 +07:00 · 2025-07-31 11:12:57 +00:00
parent bc22f9b8f8
commit fd075ea309
10 changed files with 2262 additions and 6 deletions
--- a/json/backend/src/com/intellij/json/JsonSpellcheckerStrategy.java
+++ b/json/backend/src/com/intellij/json/JsonSpellcheckerStrategy.java
@@ -20,9 +20,7 @@ import java.util.regex.Pattern;
 */
 public class JsonSpellcheckerStrategy extends SpellcheckingStrategy implements DumbAware {

-  // JSON is often deserialized to classes,
-  // so we consider literals that look like typical programming language identifier to be code contexts
-  private static final Pattern CODE_LIKE_PATTERN = Pattern.compile("\"([a-zA-Z][a-zA-Z0-9_]*)\"");
+  private static final Pattern CODE_LIKE_PATTERN = Pattern.compile("\"" + CODE_IDENTIFIER_LIKE + "\"");

  private final Tokenizer<JsonStringLiteral> ourStringLiteralTokenizer = new Tokenizer<>() {
    @Override
--- a/plugins/grazie/src/test/kotlin/com/intellij/grazie/ide/language/JSONSupportTest.kt
+++ b/plugins/grazie/src/test/kotlin/com/intellij/grazie/ide/language/JSONSupportTest.kt
@@ -1,7 +1,6 @@
 // Copyright 2000-2019 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license that can be found in the LICENSE file.
 package com.intellij.grazie.ide.language

-import ai.grazie.annotation.TestOnly
 import com.intellij.grazie.GrazieTestBase
 import com.intellij.openapi.components.service
 import com.intellij.spellchecker.grazie.GrazieSpellCheckerEngine
@@ -12,7 +11,6 @@ class JSONSupportTest : GrazieTestBase() {
    runHighlightTestForFile("ide/language/json/Example.json")
  }

-  @OptIn(TestOnly::class)
  fun `test json typos spellcheck performance`() {
    Benchmark.newBenchmark("Highlight typos in i18n.json file") {
      runHighlightTestForFile("ide/language/json/i18n.json")
--- a/plugins/grazie/src/test/kotlin/com/intellij/grazie/ide/language/PropertiesSupportTest.kt
+++ b/plugins/grazie/src/test/kotlin/com/intellij/grazie/ide/language/PropertiesSupportTest.kt
@@ -2,7 +2,10 @@
 package com.intellij.grazie.ide.language

 import com.intellij.grazie.GrazieTestBase
+import com.intellij.openapi.components.service
 import com.intellij.openapi.vfs.encoding.EncodingProjectManager
+import com.intellij.spellchecker.grazie.GrazieSpellCheckerEngine
+import com.intellij.tools.ide.metrics.benchmark.Benchmark
 import com.intellij.util.ui.UIUtil
 import java.nio.charset.StandardCharsets

@@ -15,4 +18,13 @@ class PropertiesSupportTest : GrazieTestBase() {
    UIUtil.dispatchAllInvocationEvents()
    runHighlightTestForFile("ide/language/properties/Example.properties")
  }
+
+  fun `test properties typos spellcheck performance`() {
+    Benchmark.newBenchmark("Highlight typos in i18n.properties file") {
+      runHighlightTestForFile("ide/language/properties/i18n.properties")
+    }.setup {
+      psiManager.dropPsiCaches()
+      project.service<GrazieSpellCheckerEngine>().dropSuggestionCache()
+    }.start()
+  }
 }
--- a/plugins/grazie/src/test/kotlin/com/intellij/grazie/ide/language/YamlSupportTest.kt
+++ b/plugins/grazie/src/test/kotlin/com/intellij/grazie/ide/language/YamlSupportTest.kt
@@ -4,6 +4,9 @@ package com.intellij.grazie.ide.language
 import com.intellij.grazie.GrazieTestBase
 import com.intellij.grazie.text.TextContent
 import com.intellij.grazie.text.TextExtractor
+import com.intellij.openapi.components.service
+import com.intellij.spellchecker.grazie.GrazieSpellCheckerEngine
+import com.intellij.tools.ide.metrics.benchmark.Benchmark

 class YamlSupportTest : GrazieTestBase() {
  fun `test grammar check in yaml file`() {
@@ -14,4 +17,13 @@ class YamlSupportTest : GrazieTestBase() {
    val file = myFixture.configureByText("a.yaml", "foo: 'bar'")
    assertEquals("bar", TextExtractor.findTextAt(file, 6, TextContent.TextDomain.ALL).toString())
  }
+
+  fun `test yaml typos spellcheck performance`() {
+    Benchmark.newBenchmark("Highlight typos in i18n.yaml file") {
+      runHighlightTestForFile("ide/language/yaml/i18n.yaml")
+    }.setup {
+      psiManager.dropPsiCaches()
+      project.service<GrazieSpellCheckerEngine>().dropSuggestionCache()
+    }.start()
+  }
 }
--- a/plugins/grazie/src/test/testData/ide/language/properties/i18n.properties
+++ b/plugins/grazie/src/test/testData/ide/language/properties/i18n.properties
--- a/plugins/grazie/src/test/testData/ide/language/yaml/i18n.yaml
+++ b/plugins/grazie/src/test/testData/ide/language/yaml/i18n.yaml
--- a/plugins/properties/src/com/intellij/lang/properties/spellchecker/PropertiesSpellcheckingStrategy.java
+++ b/plugins/properties/src/com/intellij/lang/properties/spellchecker/PropertiesSpellcheckingStrategy.java
@@ -39,6 +39,11 @@ final class PropertiesSpellcheckingStrategy extends SpellcheckingStrategy implem
    return super.getTokenizer(element);
  }

+  @Override
+  protected boolean isLiteral(@NotNull PsiElement element) {
+    return !super.isComment(element);
+  }
+
  private static class PropertyKeyTokenizer extends TokenizerBase<PropertyKeyImpl> {
    private PropertyKeyTokenizer() {
      super(PropertiesSplitter.getInstance());
--- a/plugins/yaml/backend/src/YAMLSpellcheckerStrategy.java
+++ b/plugins/yaml/backend/src/YAMLSpellcheckerStrategy.java
@@ -20,8 +20,12 @@ import org.jetbrains.yaml.psi.YAMLKeyValue;
 import org.jetbrains.yaml.psi.YAMLQuotedText;
 import org.jetbrains.yaml.psi.YAMLScalar;

+import java.util.regex.Pattern;
+
 final class YAMLSpellcheckerStrategy extends SpellcheckingStrategy implements DumbAware {

+  private static final Pattern CODE_LIKE_PATTERN = Pattern.compile("[\"']?" + CODE_IDENTIFIER_LIKE + "[\"']?");
+
  private final Tokenizer<YAMLQuotedText> myQuotedTextTokenizer = new Tokenizer<>() {
    @Override
    public void tokenize(@NotNull YAMLQuotedText element, @NotNull TokenConsumer consumer) {
@@ -68,6 +72,11 @@ final class YAMLSpellcheckerStrategy extends SpellcheckingStrategy implements Du
    return super.getTokenizer(element);
  }

+  @Override
+  protected boolean isLiteral(@NotNull PsiElement element) {
+    return super.isLiteral(element) || !super.isComment(element) && !CODE_LIKE_PATTERN.matcher(element.getText()).matches();
+  }
+
  private static class JsonSchemaSpellcheckerClientForYaml extends JsonSchemaSpellcheckerClient {
    private final @NotNull PsiElement element;

--- a/spellchecker/src/com/intellij/spellchecker/grazie/GrazieSpellCheckerEngine.kt
+++ b/spellchecker/src/com/intellij/spellchecker/grazie/GrazieSpellCheckerEngine.kt
@@ -3,7 +3,6 @@

 package com.intellij.spellchecker.grazie

-import ai.grazie.annotation.TestOnly
 import ai.grazie.nlp.langs.Language
 import ai.grazie.nlp.langs.alphabet.Alphabet
 import ai.grazie.nlp.phonetics.metaphone.DoubleMetaphone
@@ -43,6 +42,7 @@ import com.intellij.spellchecker.grazie.dictionary.ExtendedWordListWithFrequency
 import com.intellij.spellchecker.grazie.dictionary.WordListAdapter
 import com.intellij.spellchecker.grazie.ranker.DiacriticSuggestionRanker
 import kotlinx.coroutines.*
+import org.jetbrains.annotations.TestOnly

 private const val MAX_WORD_LENGTH = 32

--- a/spellchecker/src/com/intellij/spellchecker/tokenizer/SpellcheckingStrategy.java
+++ b/spellchecker/src/com/intellij/spellchecker/tokenizer/SpellcheckingStrategy.java
@@ -29,6 +29,7 @@ import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
 import java.util.Set;
+import java.util.regex.Pattern;

 /**
 * Defines spellchecking support for a custom language.
@@ -39,6 +40,9 @@ import java.util.Set;
 * Mark your strategy as {@link com.intellij.openapi.project.DumbAware} if it does not need indexes to perform
 */
 public class SpellcheckingStrategy implements PossiblyDumbAware {
+  // Consider literals that look like typical programming language identifier to be code contexts
+  protected static final Pattern CODE_IDENTIFIER_LIKE = Pattern.compile("([a-zA-Z][a-zA-Z0-9_]*)");
+
  protected final Tokenizer<PsiComment> myCommentTokenizer = new CommentTokenizer();

  public static final ExtensionPointName<KeyedLazyInstance<SpellcheckingStrategy>> EP_NAME =