IJPL-36149 Spell-checker treats HTML text as code

Co-authored-by: Peter Gromov <peter.gromov@jetbrains.com>

Merge-request: IJ-MR-164808
Merged-by: Ilia Permiashkin <ilia.permiashkin@jetbrains.com>
(cherry picked from commit 3515defd096aaf041b719fff47c7a18d539f93c4)

GitOrigin-RevId: c4ce65568e8d28873e10add0fb4f13b62db8e548
This commit is contained in:
Ilia Permiashkin
2025-06-17 14:30:50 +00:00
committed by intellij-monorepo-bot
parent 791ef44df3
commit 1625bc3b08
10 changed files with 117 additions and 22 deletions

View File

@@ -17,6 +17,14 @@ class XMLSupportTest : GrazieTestBase() {
runHighlightTestForFile("ide/language/xml/Example.xml")
}
fun `test typo checks and self references xml file`() {
runHighlightTestForFile("ide/language/xml/SelfReferenceExample.xml")
}
fun `test typo checks when comments are before root tag xml file`() {
runHighlightTestForFile("ide/language/xml/Comment.xml")
}
fun `test no grammar checks in svg file`() {
runHighlightTestForFile("ide/language/xml/Example.svg")
}
@@ -26,4 +34,11 @@ class XMLSupportTest : GrazieTestBase() {
runHighlightTestForFile("ide/language/xml/Example.html")
}
fun `test typo checks and self references html file`() {
runHighlightTestForFile("ide/language/xml/SelfReferenceExample.html")
}
fun `test typo checks when comments are before root tag html file`() {
runHighlightTestForFile("ide/language/xml/Comment.html")
}
}

View File

@@ -0,0 +1,3 @@
<!-- <<TYPO descr="Typo: In word 'acucar'">acucar</TYPO>>Comment 1, <TYPO descr="Typo: In word 'ueberweisen'">ueberweisen</TYPO></<TYPO descr="Typo: In word 'acucar'">acucar</TYPO>> -->
<!-- <<TYPO descr="Typo: In word 'ueberweisen'">ueberweisen</TYPO>>Comment 2</<TYPO descr="Typo: In word 'ueberweisen'">ueberweisen</TYPO>> -->
<!-- <<TYPO descr="Typo: In word 'ueberweisen'">ueberweisen</TYPO>/> -->

View File

@@ -0,0 +1,4 @@
<!-- <<TYPO descr="Typo: In word 'acucar'">acucar</TYPO>>Comment 1, <TYPO descr="Typo: In word 'ueberweisen'">ueberweisen</TYPO></<TYPO descr="Typo: In word 'acucar'">acucar</TYPO>> -->
<!-- <<TYPO descr="Typo: In word 'ueberweisen'">ueberweisen</TYPO>>Comment 2</<TYPO descr="Typo: In word 'ueberweisen'">ueberweisen</TYPO>> -->
<!-- <<TYPO descr="Typo: In word 'ueberweisen'">ueberweisen</TYPO>/> -->
<unterqueren>Not a typo</unterqueren>

View File

@@ -46,5 +46,9 @@ actually return values.
<p>Use the <b>Ignore single field static imports</b> checkbox to ignore single-field <code>import static</code> statements.</p>
<p>There are several reasons synchronization on <code>this</code> or <code>class</code> expressions may be a bad idea:</p>
<!-- <<TYPO descr="Typo: In word 'acucar'">acucar</TYPO>>Comment 1, <TYPO descr="Typo: In word 'ueberweisen'">ueberweisen</TYPO></<TYPO descr="Typo: In word 'acucar'">acucar</TYPO>> -->
<!-- <<TYPO descr="Typo: In word 'ueberweisen'">ueberweisen</TYPO>>Comment 2</<TYPO descr="Typo: In word 'ueberweisen'">ueberweisen</TYPO>> -->
<!-- <<TYPO descr="Typo: In word 'ueberweisen'">ueberweisen</TYPO>/> -->
</body>
</html>

View File

@@ -20,4 +20,9 @@
There is a <GRAMMAR_ERROR descr="POSSIBILTY_POSSIBLE">possibility</GRAMMAR_ERROR> of such thing so that this is possible.
</description-single-sentence
>
<someTag withId="<TYPO descr="Typo: In word 'ueberweisen'">ueberweisen</TYPO>"/>
<!-- <<TYPO descr="Typo: In word 'acucar'">acucar</TYPO>>Comment 1, <TYPO descr="Typo: In word 'ueberweisen'">ueberweisen</TYPO></<TYPO descr="Typo: In word 'acucar'">acucar</TYPO>> -->
<!-- <<TYPO descr="Typo: In word 'ueberweisen'">ueberweisen</TYPO>>Comment 2</<TYPO descr="Typo: In word 'ueberweisen'">ueberweisen</TYPO>> -->
<!-- <<TYPO descr="Typo: In word 'ueberweisen'">ueberweisen</TYPO>/> -->
</shiporder>

View File

@@ -0,0 +1,7 @@
<ueberweisen>Tags are code. Sehenswürdigkeit, açúcar, <TYPO descr="Typo: In word 'acucar'">acucar</TYPO> and <TYPO descr="Typo: In word 'ueberweisen'">ueberweisen</TYPO>
<acucar/>
</ueberweisen>
<!-- <<TYPO descr="Typo: In word 'acucar'">acucar</TYPO>>Comment 1, <TYPO descr="Typo: In word 'ueberweisen'">ueberweisen</TYPO></<TYPO descr="Typo: In word 'acucar'">acucar</TYPO>> -->
<!-- <<TYPO descr="Typo: In word 'ueberweisen'">ueberweisen</TYPO>>Comment 2</<TYPO descr="Typo: In word 'ueberweisen'">ueberweisen</TYPO>> -->
<!-- <<TYPO descr="Typo: In word 'ueberweisen'">ueberweisen</TYPO>/> -->

View File

@@ -0,0 +1,12 @@
<?xml version="1.0" encoding="UTF-8"?>
<shipOrder xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:noNamespaceSchemaLocation="Example.xsd">
<ueberweisen>Tags are code. Sehenswürdigkeit, açúcar, <TYPO descr="Typo: In word 'acucar'">acucar</TYPO> and <TYPO descr="Typo: In word 'ueberweisen'">ueberweisen</TYPO>
<acucar/>
</ueberweisen>
<!-- <acucar>Comment 1, ueberweisen</acucar> -->
<!-- <ueberweisen>Comment 2</ueberweisen> -->
<!-- <ueberweisen/> -->
</shipOrder>

View File

@@ -11,7 +11,6 @@ import com.intellij.lang.refactoring.NamesValidator;
import com.intellij.openapi.progress.ProgressManager;
import com.intellij.openapi.project.DumbAware;
import com.intellij.openapi.project.DumbService;
import com.intellij.openapi.project.Project;
import com.intellij.openapi.util.TextRange;
import com.intellij.openapi.util.registry.Registry;
import com.intellij.profile.codeInspection.InspectionProfileManager;
@@ -20,19 +19,16 @@ import com.intellij.psi.PsiElementVisitor;
import com.intellij.psi.PsiFile;
import com.intellij.psi.PsiReference;
import com.intellij.psi.util.CachedValuesManager;
import com.intellij.refactoring.rename.RenameUtil;
import com.intellij.spellchecker.SpellCheckerManager;
import com.intellij.spellchecker.tokenizer.*;
import com.intellij.spellchecker.util.SpellCheckerBundle;
import com.intellij.util.Consumer;
import com.intellij.util.containers.CollectionFactory;
import com.intellij.util.io.IOUtil;
import com.intellij.util.text.StringSearcher;
import org.jetbrains.annotations.NonNls;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import java.text.Normalizer;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
@@ -279,15 +275,20 @@ public final class SpellCheckingInspection extends LocalInspectionTool implement
}
PsiFile file = myElement.getContainingFile();
Map<String, Boolean> referenceWords = CachedValuesManager.getProjectPsiDependentCache(file, (element) -> new ConcurrentHashMap<>());
return referenceWords.computeIfAbsent(word, (key) -> hasSameNamedReferenceInFile(file, key));
Map<String, Boolean> references = CachedValuesManager.getProjectPsiDependentCache(file, (psi) -> new ConcurrentHashMap<>());
return references.computeIfAbsent(word, key -> hasSameNamedReferencesInFile(file, key));
}
private boolean hasSameNamedReferenceInFile(PsiFile file, String word) {
for (int occurrence : new StringSearcher(word, true, true).findAllOccurrences(file.getText())) {
private static boolean hasSameNamedReferencesInFile(PsiFile file, String word) {
int[] occurrences = new StringSearcher(word, true, true).findAllOccurrences(file.getText());
if (occurrences.length <= 1) {
return false;
}
for (int occurrence : occurrences) {
PsiReference reference = file.findReferenceAt(occurrence);
PsiElement element = reference != null ? reference.resolve() : null;
if (reference != null && element != null && reference.getElement() != element) {
PsiElement resolvedReference = reference != null ? reference.resolve() : null;
if (reference != null && resolvedReference != null && reference.getElement() != resolvedReference) {
return true;
}
}

View File

@@ -13,7 +13,6 @@ import com.intellij.openapi.project.PossiblyDumbAware;
import com.intellij.openapi.util.TextRange;
import com.intellij.psi.*;
import com.intellij.psi.impl.source.tree.injected.InjectedLanguageUtil;
import com.intellij.psi.tree.IElementType;
import com.intellij.psi.util.PsiTreeUtil;
import com.intellij.spellchecker.DictionaryLayer;
import com.intellij.spellchecker.DictionaryLayersProvider;
@@ -94,18 +93,16 @@ public class SpellcheckingStrategy implements PossiblyDumbAware {
}
public boolean elementFitsScope(@NotNull PsiElement element, Set<SpellCheckingInspection.SpellCheckingScope> scope) {
final Language language = element.getLanguage();
final IElementType elementType = element.getNode().getElementType();
final ParserDefinition parserDefinition = LanguageParserDefinitions.INSTANCE.forLanguage(language);
Language language = element.getLanguage();
ParserDefinition parserDefinition = LanguageParserDefinitions.INSTANCE.forLanguage(language);
if (parserDefinition != null) {
if (parserDefinition.getStringLiteralElements().contains(elementType)) {
if (isLiteral(element)) {
if (!scope.contains(SpellCheckingInspection.SpellCheckingScope.Literals)) {
return false;
}
}
else if (parserDefinition.getCommentTokens().contains(elementType)) {
else if (isComment(element)) {
if (!scope.contains(SpellCheckingInspection.SpellCheckingScope.Comments)) {
return false;
}
@@ -117,6 +114,16 @@ public class SpellcheckingStrategy implements PossiblyDumbAware {
return true;
}
protected boolean isLiteral(@NotNull PsiElement psiElement) {
ParserDefinition parserDefinition = LanguageParserDefinitions.INSTANCE.forLanguage(psiElement.getLanguage());
return parserDefinition.getStringLiteralElements().contains(psiElement.getNode().getElementType());
}
protected boolean isComment(@NotNull PsiElement psiElement) {
ParserDefinition parserDefinition = LanguageParserDefinitions.INSTANCE.forLanguage(psiElement.getLanguage());
return parserDefinition.getCommentTokens().contains(psiElement.getNode().getElementType());
}
protected static boolean isInjectedLanguageFragment(@Nullable PsiElement element) {
return element instanceof PsiLanguageInjectionHost
&& InjectedLanguageUtil.hasInjections((PsiLanguageInjectionHost)element);

View File

@@ -32,7 +32,7 @@ import static java.util.Collections.singletonList;
public class XmlSpellcheckingStrategy extends SuppressibleSpellcheckingStrategy implements DumbAware {
private final Tokenizer<? extends PsiElement> myXmlTextTokenizer = createTextTokenizer();
private final Tokenizer<? extends PsiElement> myXmlCommentTokenizer = createCommentTokenizer();
private final Tokenizer<? extends PsiElement> myXmlAttributeTokenizer = createAttributeValueTokenizer();
@Override
@@ -40,13 +40,14 @@ public class XmlSpellcheckingStrategy extends SuppressibleSpellcheckingStrategy
if (element instanceof XmlText) {
return myXmlTextTokenizer;
}
if (isComment(element)) {
return myXmlCommentTokenizer;
}
if (element instanceof XmlToken
&& ((XmlToken)element).getTokenType() == XmlTokenType.XML_DATA_CHARACTERS
&& !isXmlDataCharactersParentHandledByItsStrategy(element.getParent())) {
// Special case for all other XML_DATA_CHARACTERS, which are not handled through parent PSI
if (isInTemplateLanguageFile(element))
return EMPTY_TOKENIZER;
return TEXT_TOKENIZER;
return isInTemplateLanguageFile(element) ? EMPTY_TOKENIZER : TEXT_TOKENIZER;
}
if (element instanceof XmlAttributeValue) {
return myXmlAttributeTokenizer;
@@ -62,7 +63,6 @@ public class XmlSpellcheckingStrategy extends SuppressibleSpellcheckingStrategy
return true;
}
}
return false;
}
@@ -94,10 +94,24 @@ public class XmlSpellcheckingStrategy extends SuppressibleSpellcheckingStrategy
return file == null || file.getLanguage() instanceof TemplateLanguage;
}
@Override
protected boolean isLiteral(@NotNull PsiElement element) {
return element instanceof XmlAttributeValue || element instanceof XmlText;
}
@Override
protected boolean isComment(@NotNull PsiElement element) {
return element instanceof XmlComment;
}
protected Tokenizer<? extends PsiElement> createTextTokenizer() {
return new XmlTextTokenizer(PlainTextSplitter.getInstance());
}
protected Tokenizer<? extends PsiElement> createCommentTokenizer() {
return new XmlCommentTokenizer(PlainTextSplitter.getInstance());
}
protected abstract static class XmlTextContentTokenizer<T extends XmlElement> extends XmlTokenizerBase<T> {
public XmlTextContentTokenizer(Splitter splitter) {
@@ -125,7 +139,30 @@ public class XmlSpellcheckingStrategy extends SuppressibleSpellcheckingStrategy
}
}
private static class XmlCommentTokenizer extends XmlTokenizerBase<XmlComment> {
private XmlCommentTokenizer(Splitter splitter) {
super(splitter);
}
@Override
protected @NotNull List<@NotNull SpellcheckRange> getSpellcheckRanges(@NotNull XmlComment comment) {
var ranges = new SmartList<SpellcheckRange>();
comment.acceptChildren(new XmlElementVisitor() {
@Override
public void visitXmlToken(@NotNull XmlToken token) {
if (token.getNode().getElementType() == XmlTokenType.XML_COMMENT_CHARACTERS) {
var text = token.getText();
ranges.add(new SpellcheckRange(text, false, token.getStartOffsetInParent(), TextRange.allOf(text)));
}
}
});
return ranges;
}
}
protected static class XmlTextTokenizer extends XmlTextContentTokenizer<XmlText> {
public XmlTextTokenizer(Splitter splitter) {
super(splitter);
}