IJPL-36149 Spell-checker treats HTML text as code

Co-authored-by: Peter Gromov <peter.gromov@jetbrains.com> Merge-request: IJ-MR-164808 Merged-by: Ilia Permiashkin <ilia.permiashkin@jetbrains.com> (cherry picked from commit 3515defd096aaf041b719fff47c7a18d539f93c4) GitOrigin-RevId: c4ce65568e8d28873e10add0fb4f13b62db8e548
2025-12-15 02:59:33 +07:00 · 2025-06-17 14:30:50 +00:00
parent 791ef44df3
commit 1625bc3b08
10 changed files with 117 additions and 22 deletions
--- a/plugins/grazie/src/test/kotlin/com/intellij/grazie/ide/language/XMLSupportTest.kt
+++ b/plugins/grazie/src/test/kotlin/com/intellij/grazie/ide/language/XMLSupportTest.kt
@@ -17,6 +17,14 @@ class XMLSupportTest : GrazieTestBase() {
    runHighlightTestForFile("ide/language/xml/Example.xml")
  }

+  fun `test typo checks and self references xml file`() {
+    runHighlightTestForFile("ide/language/xml/SelfReferenceExample.xml")
+  }
+
+  fun `test typo checks when comments are before root tag xml file`() {
+    runHighlightTestForFile("ide/language/xml/Comment.xml")
+  }
+
  fun `test no grammar checks in svg file`() {
    runHighlightTestForFile("ide/language/xml/Example.svg")
  }
@@ -26,4 +34,11 @@ class XMLSupportTest : GrazieTestBase() {
    runHighlightTestForFile("ide/language/xml/Example.html")
  }

+  fun `test typo checks and self references html file`() {
+    runHighlightTestForFile("ide/language/xml/SelfReferenceExample.html")
+  }
+
+  fun `test typo checks when comments are before root tag html file`() {
+    runHighlightTestForFile("ide/language/xml/Comment.html")
+  }
 }
--- a/plugins/grazie/src/test/testData/ide/language/xml/Comment.html
+++ b/plugins/grazie/src/test/testData/ide/language/xml/Comment.html
@@ -0,0 +1,3 @@
+<!-- <<TYPO descr="Typo: In word 'acucar'">acucar</TYPO>>Comment 1, <TYPO descr="Typo: In word 'ueberweisen'">ueberweisen</TYPO></<TYPO descr="Typo: In word 'acucar'">acucar</TYPO>> -->
+<!-- <<TYPO descr="Typo: In word 'ueberweisen'">ueberweisen</TYPO>>Comment 2</<TYPO descr="Typo: In word 'ueberweisen'">ueberweisen</TYPO>> -->
+<!-- <<TYPO descr="Typo: In word 'ueberweisen'">ueberweisen</TYPO>/> -->
--- a/plugins/grazie/src/test/testData/ide/language/xml/Comment.xml
+++ b/plugins/grazie/src/test/testData/ide/language/xml/Comment.xml
@@ -0,0 +1,4 @@
+<!-- <<TYPO descr="Typo: In word 'acucar'">acucar</TYPO>>Comment 1, <TYPO descr="Typo: In word 'ueberweisen'">ueberweisen</TYPO></<TYPO descr="Typo: In word 'acucar'">acucar</TYPO>> -->
+<!-- <<TYPO descr="Typo: In word 'ueberweisen'">ueberweisen</TYPO>>Comment 2</<TYPO descr="Typo: In word 'ueberweisen'">ueberweisen</TYPO>> -->
+<!-- <<TYPO descr="Typo: In word 'ueberweisen'">ueberweisen</TYPO>/> -->
+<unterqueren>Not a typo</unterqueren>
--- a/plugins/grazie/src/test/testData/ide/language/xml/Example.html
+++ b/plugins/grazie/src/test/testData/ide/language/xml/Example.html
@@ -46,5 +46,9 @@ actually return values.
 <p>Use the <b>Ignore single field static imports</b> checkbox to ignore single-field <code>import static</code> statements.</p>
 <p>There are several reasons synchronization on <code>this</code> or <code>class</code> expressions may be a bad idea:</p>

+<!-- <<TYPO descr="Typo: In word 'acucar'">acucar</TYPO>>Comment 1, <TYPO descr="Typo: In word 'ueberweisen'">ueberweisen</TYPO></<TYPO descr="Typo: In word 'acucar'">acucar</TYPO>> -->
+<!-- <<TYPO descr="Typo: In word 'ueberweisen'">ueberweisen</TYPO>>Comment 2</<TYPO descr="Typo: In word 'ueberweisen'">ueberweisen</TYPO>> -->
+<!-- <<TYPO descr="Typo: In word 'ueberweisen'">ueberweisen</TYPO>/> -->
+
 </body>
 </html>
--- a/plugins/grazie/src/test/testData/ide/language/xml/Example.xml
+++ b/plugins/grazie/src/test/testData/ide/language/xml/Example.xml
@@ -20,4 +20,9 @@
    There is a <GRAMMAR_ERROR descr="POSSIBILTY_POSSIBLE">possibility</GRAMMAR_ERROR> of such thing so that this is possible.
  </description-single-sentence
  >
+
+  <someTag withId="<TYPO descr="Typo: In word 'ueberweisen'">ueberweisen</TYPO>"/>
+  <!-- <<TYPO descr="Typo: In word 'acucar'">acucar</TYPO>>Comment 1, <TYPO descr="Typo: In word 'ueberweisen'">ueberweisen</TYPO></<TYPO descr="Typo: In word 'acucar'">acucar</TYPO>> -->
+  <!-- <<TYPO descr="Typo: In word 'ueberweisen'">ueberweisen</TYPO>>Comment 2</<TYPO descr="Typo: In word 'ueberweisen'">ueberweisen</TYPO>> -->
+  <!-- <<TYPO descr="Typo: In word 'ueberweisen'">ueberweisen</TYPO>/> -->
 </shiporder>
--- a/plugins/grazie/src/test/testData/ide/language/xml/SelfReferenceExample.html
+++ b/plugins/grazie/src/test/testData/ide/language/xml/SelfReferenceExample.html
@@ -0,0 +1,7 @@
+<ueberweisen>Tags are code. Sehenswürdigkeit, açúcar, <TYPO descr="Typo: In word 'acucar'">acucar</TYPO> and <TYPO descr="Typo: In word 'ueberweisen'">ueberweisen</TYPO>
+  <acucar/>
+</ueberweisen>
+
+<!-- <<TYPO descr="Typo: In word 'acucar'">acucar</TYPO>>Comment 1, <TYPO descr="Typo: In word 'ueberweisen'">ueberweisen</TYPO></<TYPO descr="Typo: In word 'acucar'">acucar</TYPO>> -->
+<!-- <<TYPO descr="Typo: In word 'ueberweisen'">ueberweisen</TYPO>>Comment 2</<TYPO descr="Typo: In word 'ueberweisen'">ueberweisen</TYPO>> -->
+<!-- <<TYPO descr="Typo: In word 'ueberweisen'">ueberweisen</TYPO>/> -->
--- a/plugins/grazie/src/test/testData/ide/language/xml/SelfReferenceExample.xml
+++ b/plugins/grazie/src/test/testData/ide/language/xml/SelfReferenceExample.xml
@@ -0,0 +1,12 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<shipOrder xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+           xsi:noNamespaceSchemaLocation="Example.xsd">
+
+  <ueberweisen>Tags are code. Sehenswürdigkeit, açúcar, <TYPO descr="Typo: In word 'acucar'">acucar</TYPO> and <TYPO descr="Typo: In word 'ueberweisen'">ueberweisen</TYPO>
+    <acucar/>
+  </ueberweisen>
+
+  <!-- <acucar>Comment 1, ueberweisen</acucar> -->
+  <!-- <ueberweisen>Comment 2</ueberweisen> -->
+  <!-- <ueberweisen/> -->
+</shipOrder>
--- a/spellchecker/src/com/intellij/spellchecker/inspections/SpellCheckingInspection.java
+++ b/spellchecker/src/com/intellij/spellchecker/inspections/SpellCheckingInspection.java
@@ -11,7 +11,6 @@ import com.intellij.lang.refactoring.NamesValidator;
 import com.intellij.openapi.progress.ProgressManager;
 import com.intellij.openapi.project.DumbAware;
 import com.intellij.openapi.project.DumbService;
-import com.intellij.openapi.project.Project;
 import com.intellij.openapi.util.TextRange;
 import com.intellij.openapi.util.registry.Registry;
 import com.intellij.profile.codeInspection.InspectionProfileManager;
@@ -20,19 +19,16 @@ import com.intellij.psi.PsiElementVisitor;
 import com.intellij.psi.PsiFile;
 import com.intellij.psi.PsiReference;
 import com.intellij.psi.util.CachedValuesManager;
-import com.intellij.refactoring.rename.RenameUtil;
 import com.intellij.spellchecker.SpellCheckerManager;
 import com.intellij.spellchecker.tokenizer.*;
 import com.intellij.spellchecker.util.SpellCheckerBundle;
 import com.intellij.util.Consumer;
 import com.intellij.util.containers.CollectionFactory;
-import com.intellij.util.io.IOUtil;
 import com.intellij.util.text.StringSearcher;
 import org.jetbrains.annotations.NonNls;
 import org.jetbrains.annotations.NotNull;
 import org.jetbrains.annotations.Nullable;

-import java.text.Normalizer;
 import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;
@@ -279,15 +275,20 @@ public final class SpellCheckingInspection extends LocalInspectionTool implement
      }

      PsiFile file = myElement.getContainingFile();
-      Map<String, Boolean> referenceWords = CachedValuesManager.getProjectPsiDependentCache(file, (element) -> new ConcurrentHashMap<>());
-      return referenceWords.computeIfAbsent(word, (key) -> hasSameNamedReferenceInFile(file, key));
+      Map<String, Boolean> references = CachedValuesManager.getProjectPsiDependentCache(file, (psi) -> new ConcurrentHashMap<>());
+      return references.computeIfAbsent(word, key -> hasSameNamedReferencesInFile(file, key));
    }

-    private boolean hasSameNamedReferenceInFile(PsiFile file, String word) {
-      for (int occurrence : new StringSearcher(word, true, true).findAllOccurrences(file.getText())) {
+    private static boolean hasSameNamedReferencesInFile(PsiFile file, String word) {
+      int[] occurrences = new StringSearcher(word, true, true).findAllOccurrences(file.getText());
+      if (occurrences.length <= 1) {
+        return false;
+      }
+
+      for (int occurrence : occurrences) {
        PsiReference reference = file.findReferenceAt(occurrence);
-        PsiElement element = reference != null ? reference.resolve() : null;
-        if (reference != null && element != null && reference.getElement() != element) {
+        PsiElement resolvedReference = reference != null ? reference.resolve() : null;
+        if (reference != null && resolvedReference != null && reference.getElement() != resolvedReference) {
          return true;
        }
      }
--- a/spellchecker/src/com/intellij/spellchecker/tokenizer/SpellcheckingStrategy.java
+++ b/spellchecker/src/com/intellij/spellchecker/tokenizer/SpellcheckingStrategy.java
@@ -13,7 +13,6 @@ import com.intellij.openapi.project.PossiblyDumbAware;
 import com.intellij.openapi.util.TextRange;
 import com.intellij.psi.*;
 import com.intellij.psi.impl.source.tree.injected.InjectedLanguageUtil;
-import com.intellij.psi.tree.IElementType;
 import com.intellij.psi.util.PsiTreeUtil;
 import com.intellij.spellchecker.DictionaryLayer;
 import com.intellij.spellchecker.DictionaryLayersProvider;
@@ -94,18 +93,16 @@ public class SpellcheckingStrategy implements PossiblyDumbAware {
  }

  public boolean elementFitsScope(@NotNull PsiElement element, Set<SpellCheckingInspection.SpellCheckingScope> scope) {
-
-    final Language language = element.getLanguage();
-    final IElementType elementType = element.getNode().getElementType();
-    final ParserDefinition parserDefinition = LanguageParserDefinitions.INSTANCE.forLanguage(language);
+    Language language = element.getLanguage();
+    ParserDefinition parserDefinition = LanguageParserDefinitions.INSTANCE.forLanguage(language);

    if (parserDefinition != null) {
-      if (parserDefinition.getStringLiteralElements().contains(elementType)) {
+      if (isLiteral(element)) {
        if (!scope.contains(SpellCheckingInspection.SpellCheckingScope.Literals)) {
          return false;
        }
      }
-      else if (parserDefinition.getCommentTokens().contains(elementType)) {
+      else if (isComment(element)) {
        if (!scope.contains(SpellCheckingInspection.SpellCheckingScope.Comments)) {
          return false;
        }
@@ -117,6 +114,16 @@ public class SpellcheckingStrategy implements PossiblyDumbAware {
    return true;
  }

+  protected boolean isLiteral(@NotNull PsiElement psiElement) {
+    ParserDefinition parserDefinition = LanguageParserDefinitions.INSTANCE.forLanguage(psiElement.getLanguage());
+    return parserDefinition.getStringLiteralElements().contains(psiElement.getNode().getElementType());
+  }
+
+  protected boolean isComment(@NotNull PsiElement psiElement) {
+    ParserDefinition parserDefinition = LanguageParserDefinitions.INSTANCE.forLanguage(psiElement.getLanguage());
+    return parserDefinition.getCommentTokens().contains(psiElement.getNode().getElementType());
+  }
+
  protected static boolean isInjectedLanguageFragment(@Nullable PsiElement element) {
    return element instanceof PsiLanguageInjectionHost
           && InjectedLanguageUtil.hasInjections((PsiLanguageInjectionHost)element);
--- a/spellchecker/xml/src/XmlSpellcheckingStrategy.java
+++ b/spellchecker/xml/src/XmlSpellcheckingStrategy.java
@@ -32,7 +32,7 @@ import static java.util.Collections.singletonList;
 public class XmlSpellcheckingStrategy extends SuppressibleSpellcheckingStrategy implements DumbAware {

  private final Tokenizer<? extends PsiElement> myXmlTextTokenizer = createTextTokenizer();
-
+  private final Tokenizer<? extends PsiElement> myXmlCommentTokenizer = createCommentTokenizer();
  private final Tokenizer<? extends PsiElement> myXmlAttributeTokenizer = createAttributeValueTokenizer();

  @Override
@@ -40,13 +40,14 @@ public class XmlSpellcheckingStrategy extends SuppressibleSpellcheckingStrategy
    if (element instanceof XmlText) {
      return myXmlTextTokenizer;
    }
+    if (isComment(element)) {
+      return myXmlCommentTokenizer;
+    }
    if (element instanceof XmlToken
        && ((XmlToken)element).getTokenType() == XmlTokenType.XML_DATA_CHARACTERS
        && !isXmlDataCharactersParentHandledByItsStrategy(element.getParent())) {
      // Special case for all other XML_DATA_CHARACTERS, which are not handled through parent PSI
-      if (isInTemplateLanguageFile(element))
-        return EMPTY_TOKENIZER;
-      return TEXT_TOKENIZER;
+      return isInTemplateLanguageFile(element) ? EMPTY_TOKENIZER : TEXT_TOKENIZER;
    }
    if (element instanceof XmlAttributeValue) {
      return myXmlAttributeTokenizer;
@@ -62,7 +63,6 @@ public class XmlSpellcheckingStrategy extends SuppressibleSpellcheckingStrategy
        return true;
      }
    }
-
    return false;
  }

@@ -94,10 +94,24 @@ public class XmlSpellcheckingStrategy extends SuppressibleSpellcheckingStrategy
    return file == null || file.getLanguage() instanceof TemplateLanguage;
  }

+  @Override
+  protected boolean isLiteral(@NotNull PsiElement element) {
+    return element instanceof XmlAttributeValue || element instanceof XmlText;
+  }
+
+  @Override
+  protected boolean isComment(@NotNull PsiElement element) {
+    return element instanceof XmlComment;
+  }
+
  protected Tokenizer<? extends PsiElement> createTextTokenizer() {
    return new XmlTextTokenizer(PlainTextSplitter.getInstance());
  }

+  protected Tokenizer<? extends PsiElement> createCommentTokenizer() {
+    return new XmlCommentTokenizer(PlainTextSplitter.getInstance());
+  }
+
  protected abstract static class XmlTextContentTokenizer<T extends XmlElement> extends XmlTokenizerBase<T> {

    public XmlTextContentTokenizer(Splitter splitter) {
@@ -125,7 +139,30 @@ public class XmlSpellcheckingStrategy extends SuppressibleSpellcheckingStrategy
    }
  }

+  private static class XmlCommentTokenizer extends XmlTokenizerBase<XmlComment> {
+
+    private XmlCommentTokenizer(Splitter splitter) {
+      super(splitter);
+    }
+
+    @Override
+    protected @NotNull List<@NotNull SpellcheckRange> getSpellcheckRanges(@NotNull XmlComment comment) {
+      var ranges = new SmartList<SpellcheckRange>();
+      comment.acceptChildren(new XmlElementVisitor() {
+        @Override
+        public void visitXmlToken(@NotNull XmlToken token) {
+          if (token.getNode().getElementType() == XmlTokenType.XML_COMMENT_CHARACTERS) {
+            var text = token.getText();
+            ranges.add(new SpellcheckRange(text, false, token.getStartOffsetInParent(), TextRange.allOf(text)));
+          }
+        }
+      });
+      return ranges;
+    }
+  }
+
  protected static class XmlTextTokenizer extends XmlTextContentTokenizer<XmlText> {
+
    public XmlTextTokenizer(Splitter splitter) {
      super(splitter);
    }