mirror of
https://gitflic.ru/project/openide/openide.git
synced 2025-12-15 02:59:33 +07:00
grazie: understand common HTML markup and block tags in Javadoc, Properties, and CDATA (IJPL-148256, IJPL-149288)
GitOrigin-RevId: c901095fd88b0b3cde9c37a751014de363498f79
This commit is contained in:
committed by
intellij-monorepo-bot
parent
d2c3e31a19
commit
5cc805fce0
@@ -38,21 +38,22 @@ public class JavaTextExtractor extends TextExtractor {
|
||||
.removingIndents(" \t").removingLineSuffixes(" \t");
|
||||
|
||||
@Override
|
||||
public TextContent buildTextContent(@NotNull PsiElement root, @NotNull Set<TextContent.TextDomain> allowedDomains) {
|
||||
public @NotNull List<TextContent> buildTextContents(@NotNull PsiElement root, @NotNull Set<TextContent.TextDomain> allowedDomains) {
|
||||
if (allowedDomains.contains(DOCUMENTATION)) {
|
||||
if (root instanceof PsiDocComment) {
|
||||
return HtmlUtilsKt.removeHtml(javadocBuilder.excluding(e -> e instanceof PsiDocTagImpl).build(root, DOCUMENTATION));
|
||||
return HtmlUtilsKt.excludeHtml(javadocBuilder.excluding(e -> e instanceof PsiDocTagImpl).build(root, DOCUMENTATION));
|
||||
}
|
||||
if (root instanceof PsiDocTagImpl) {
|
||||
return HtmlUtilsKt.removeHtml(javadocBuilder.build(root, DOCUMENTATION));
|
||||
return HtmlUtilsKt.excludeHtml(javadocBuilder.build(root, DOCUMENTATION));
|
||||
}
|
||||
}
|
||||
|
||||
if (root instanceof PsiCommentImpl && allowedDomains.contains(COMMENTS)) {
|
||||
List<PsiElement> roots = PsiUtilsKt.getNotSoDistantSimilarSiblings(root, e ->
|
||||
JAVA_PLAIN_COMMENT_BIT_SET.contains(PsiUtilCore.getElementType(e)));
|
||||
return TextContent.joinWithWhitespace('\n', ContainerUtil.mapNotNull(roots, c ->
|
||||
TextContentBuilder.FromPsi.removingIndents(" \t*/").removingLineSuffixes(" \t").build(c, COMMENTS)));
|
||||
return ContainerUtil.createMaybeSingletonList(
|
||||
TextContent.joinWithWhitespace('\n', ContainerUtil.mapNotNull(roots, c ->
|
||||
TextContentBuilder.FromPsi.removingIndents(" \t*/").removingLineSuffixes(" \t").build(c, COMMENTS))));
|
||||
}
|
||||
|
||||
if (root instanceof PsiLiteralExpression &&
|
||||
@@ -66,13 +67,13 @@ public class JavaTextExtractor extends TextExtractor {
|
||||
ContainerUtil.map(Text.allOccurrences(Pattern.compile("(?<=\n)" + "\\s{" + indent + "}"), content), Exclusion::exclude));
|
||||
}
|
||||
content = content.excludeRanges(ContainerUtil.map(Text.allOccurrences(Pattern.compile("\\\\\n"), content), Exclusion::exclude));
|
||||
return content.trimWhitespace();
|
||||
return ContainerUtil.createMaybeSingletonList(content.trimWhitespace());
|
||||
}
|
||||
|
||||
return content;
|
||||
return ContainerUtil.createMaybeSingletonList(content);
|
||||
}
|
||||
|
||||
return null;
|
||||
return List.of();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -14,7 +14,6 @@ import com.intellij.psi.PsiElement;
|
||||
import com.intellij.psi.util.PsiUtilCore;
|
||||
import com.intellij.util.containers.ContainerUtil;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
@@ -28,13 +27,13 @@ public class PropertyTextExtractor extends TextExtractor {
|
||||
private static final Pattern trailingSlash = Pattern.compile("\\\\\n");
|
||||
|
||||
@Override
|
||||
public @Nullable TextContent buildTextContent(@NotNull PsiElement root,
|
||||
@NotNull Set<TextContent.TextDomain> allowedDomains) {
|
||||
protected @NotNull List<TextContent> buildTextContents(@NotNull PsiElement root, @NotNull Set<TextContent.TextDomain> allowedDomains) {
|
||||
if (root instanceof PsiComment) {
|
||||
List<PsiElement> roots = PsiUtilsKt.getNotSoDistantSimilarSiblings(root, e ->
|
||||
PropertiesTokenTypes.COMMENTS.contains(PsiUtilCore.getElementType(e)));
|
||||
return TextContent.joinWithWhitespace('\n', ContainerUtil.mapNotNull(roots, c ->
|
||||
TextContentBuilder.FromPsi.removingIndents(" \t#!").build(c, COMMENTS)));
|
||||
return ContainerUtil.createMaybeSingletonList(
|
||||
TextContent.joinWithWhitespace('\n', ContainerUtil.mapNotNull(roots, c ->
|
||||
TextContentBuilder.FromPsi.removingIndents(" \t#!").build(c, COMMENTS))));
|
||||
}
|
||||
if (PsiUtilCore.getElementType(root) == PropertiesTokenTypes.VALUE_CHARACTERS) {
|
||||
TextContent content = TextContent.builder().build(root, TextContent.TextDomain.PLAIN_TEXT);
|
||||
@@ -61,8 +60,8 @@ public class PropertyTextExtractor extends TextExtractor {
|
||||
}
|
||||
content = content.markUnknown(new TextRange(start, end));
|
||||
}
|
||||
return HtmlUtilsKt.removeHtml(content);
|
||||
return HtmlUtilsKt.excludeHtml(content);
|
||||
}
|
||||
return null;
|
||||
return List.of();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@ package com.intellij.grazie.utils
|
||||
import ai.grazie.nlp.utils.takeNonWhitespaces
|
||||
import com.intellij.grazie.text.TextContent
|
||||
import com.intellij.grazie.text.TextContent.Exclusion
|
||||
import com.intellij.grazie.text.TextContent.ExclusionKind
|
||||
import com.intellij.openapi.progress.ProgressManager
|
||||
import com.intellij.openapi.util.TextRange
|
||||
import kotlinx.html.*
|
||||
@@ -32,9 +33,45 @@ var TD.valign: String
|
||||
|
||||
fun FlowContent.nbsp() = +Entities.nbsp
|
||||
|
||||
private val anyTag = Pattern.compile("</?\\w+[^>]*>")
|
||||
private val anyTag = Pattern.compile("</?(\\w+)[^>]*>")
|
||||
private val closingTag = Pattern.compile("</\\w+\\s*>")
|
||||
|
||||
@JvmField
|
||||
val commonBlockElements: Set<String> =
|
||||
setOf("body", "p", "br", "td", "li", "title", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "table", "ol", "ul")
|
||||
|
||||
private val commonMarkupElements = setOf("span", "i", "b", "u", "font", "a", "s", "strong", "sub", "sup")
|
||||
|
||||
/**
|
||||
* Remove HTML markup from a text, splitting it at block elements (like {@code <p>}),
|
||||
* marking common HTML markup tags (like {@code <i>}) as markup offsets,
|
||||
* and replacing all other tags with unknown fragments.
|
||||
*/
|
||||
fun excludeHtml(content: TextContent?): List<TextContent> {
|
||||
if (content == null) return emptyList()
|
||||
|
||||
val components = ArrayList<TextContent>()
|
||||
var lastComponentStart = 0
|
||||
var matchEnd = 0
|
||||
val matcher = anyTag.matcher(content)
|
||||
while (matcher.find(matchEnd)) {
|
||||
matchEnd = matcher.end()
|
||||
ProgressManager.checkCanceled()
|
||||
|
||||
val tagName = matcher.group(1)
|
||||
if (tagName in commonBlockElements) {
|
||||
content.subText(TextRange(lastComponentStart, matcher.start()))?.let(components::add)
|
||||
lastComponentStart = matcher.end()
|
||||
}
|
||||
}
|
||||
content.subText(TextRange(lastComponentStart, content.length))?.let(components::add)
|
||||
|
||||
@Suppress("DEPRECATION")
|
||||
return components.mapNotNull { removeHtml(it)?.trimWhitespace() }
|
||||
}
|
||||
|
||||
/** Remove HTML markup from a text, replacing it with unknown or markup (for some common HTML tags) offsets. */
|
||||
@Deprecated("use excludeHtml", ReplaceWith("excludeHtml"))
|
||||
fun removeHtml(_content: TextContent?): TextContent? {
|
||||
var content: TextContent = _content ?: return null
|
||||
|
||||
@@ -52,6 +89,8 @@ fun removeHtml(_content: TextContent?): TextContent? {
|
||||
else null
|
||||
|
||||
fun tagClosed(tagName: String) {
|
||||
if (tagName in commonMarkupElements) return
|
||||
|
||||
val openingIndex = exclusions.indexOfLast { openingTagName(it.start, it.end) == tagName && content[it.end - 2] != '/' }
|
||||
if (openingIndex >= 0) {
|
||||
exclusions[openingIndex] = Exclusion.markUnknown(TextRange(exclusions[openingIndex].start, exclusions.last().end))
|
||||
@@ -59,13 +98,21 @@ fun removeHtml(_content: TextContent?): TextContent? {
|
||||
}
|
||||
}
|
||||
|
||||
for (tagRange in Text.allOccurrences(anyTag, content)) {
|
||||
var matchEnd = 0
|
||||
val matcher = anyTag.matcher(content)
|
||||
while (matcher.find(matchEnd)) {
|
||||
matchEnd = matcher.end()
|
||||
ProgressManager.checkCanceled()
|
||||
if (closingTag.matcher(content.subSequence(tagRange.startOffset, tagRange.endOffset)).matches()) {
|
||||
exclusions.add(Exclusion.markUnknown(tagRange))
|
||||
tagClosed(content.substring(tagRange.startOffset + 2, tagRange.endOffset - 1).trim())
|
||||
} else if (openingTagName(tagRange.startOffset, tagRange.endOffset) != null) {
|
||||
exclusions.add(Exclusion.markUnknown(tagRange))
|
||||
val matchStart = matcher.start()
|
||||
val tagName = matcher.group(1)
|
||||
if (!tagName[0].isLetterOrDigit()) continue
|
||||
|
||||
val exclusionKind = if (tagName in commonMarkupElements) ExclusionKind.markup else ExclusionKind.unknown
|
||||
if (closingTag.matcher(content.subSequence(matchStart, matchEnd)).matches()) {
|
||||
exclusions.add(Exclusion(matchStart, matchEnd, exclusionKind))
|
||||
tagClosed(content.substring(matchStart + 2, matchEnd - 1).trim())
|
||||
} else {
|
||||
exclusions.add(Exclusion(matchStart, matchEnd, exclusionKind))
|
||||
}
|
||||
}
|
||||
return content.excludeRanges(exclusions)
|
||||
|
||||
@@ -20,6 +20,7 @@ import com.intellij.psi.util.PsiTreeUtil;
|
||||
import com.intellij.psi.xml.XmlTag;
|
||||
import com.intellij.testFramework.fixtures.BasePlatformTestCase;
|
||||
import com.intellij.tools.ide.metrics.benchmark.Benchmark;
|
||||
import com.intellij.util.containers.ContainerUtil;
|
||||
import kotlin.text.StringsKt;
|
||||
import one.util.streamex.IntStreamEx;
|
||||
import org.intellij.lang.regexp.RegExpLanguage;
|
||||
@@ -114,11 +115,12 @@ public class TextExtractionTest extends BasePlatformTestCase {
|
||||
}
|
||||
|
||||
public void testBrokenPropertyMessageFormat() {
|
||||
assertEquals("a |", unknownOffsets(extractText("a.properties", "a=a {0, choice, 1#1 code fragment|2#{0,number} code fragments", 4)));
|
||||
assertEquals("a|", unknownOffsets(extractText("a.properties", "a=a {0, choice, 1#1 code fragment|2#{0,number} code fragments", 4)));
|
||||
}
|
||||
|
||||
public void testExcludePropertyHtml() {
|
||||
assertEquals("Hello |World", unknownOffsets(extractText("a.properties", "a=<html>Hello <p/>World</html>", 8)));
|
||||
List<TextContent> texts = extractTexts("a.properties", "a=<html>Hello <p/><i>World</i></html>", 8, PsiElement.class);
|
||||
assertEquals(List.of("Hello", "World"), ContainerUtil.map(texts, TextContentTest::unknownOffsets));
|
||||
}
|
||||
|
||||
public void testMultiLineCommentInProperties() {
|
||||
@@ -142,10 +144,11 @@ public class TextExtractionTest extends BasePlatformTestCase {
|
||||
* @return the offset of {@link #bar} in something
|
||||
* @throws Exception when something happens
|
||||
*/""";
|
||||
TextContent text = extractText("a.java", docText, 6);
|
||||
assertEquals("Hello |,\nhere's an asterisk: *\nand some |.\ntags1 |\ntags2 |\n|is unknown.", unknownOffsets(text));
|
||||
assertEquals(
|
||||
List.of("Hello |,\nhere's an asterisk: *\nand some |.\ntags1 |\ntags2 |one| two", "three| four|\n|is unknown."),
|
||||
ContainerUtil.map(extractTexts("a.java", docText, 6, PsiDocComment.class), TextContentTest::unknownOffsets));
|
||||
|
||||
text = extractText("a.java", docText, docText.indexOf("the offset"));
|
||||
TextContent text = extractText("a.java", docText, docText.indexOf("the offset"));
|
||||
assertEquals("the offset of in something", text.toString());
|
||||
|
||||
text = extractText("a.java", docText, docText.indexOf("without"));
|
||||
@@ -274,8 +277,11 @@ public class TextExtractionTest extends BasePlatformTestCase {
|
||||
}
|
||||
assertEquals("|abc|", unknownOffsets(extractText("a.xml", "<b>abc</b>", 4)));
|
||||
|
||||
assertEquals("|characters with markup\nand without it|",
|
||||
unknownOffsets(extractText("a.xml", "<b><![CDATA[\n characters with markup\n]]>and without it</b>", 22)));
|
||||
{
|
||||
String text = "<b><![CDATA[\n characters with markup\n]]>and without it</b>";
|
||||
assertEquals("characters with markup", unknownOffsets(extractText("a.xml", text, 22)));
|
||||
assertEquals("and without it|", unknownOffsets(extractText("a.xml", text, 45)));
|
||||
}
|
||||
|
||||
assertEquals("abcd efg", unknownOffsets(extractText("a.xml", "<tag attr=\"abcd efg\"/>", 14)));
|
||||
assertEquals("comment", extractText("a.xml", "<!-- comment -->", 10).toString());
|
||||
@@ -339,7 +345,7 @@ public class TextExtractionTest extends BasePlatformTestCase {
|
||||
PsiDocComment comment = PsiTreeUtil.findElementOfClassAtOffset(file, 10, PsiDocComment.class, false);
|
||||
TextExtractor extractor = new JavaTextExtractor();
|
||||
Benchmark.newBenchmark("TextContent building with HTML removal", () -> {
|
||||
assertEquals(expected, extractor.buildTextContent(comment, TextContent.TextDomain.ALL).toString());
|
||||
assertEquals(expected, assertOneElement(extractor.buildTextContents(comment, TextContent.TextDomain.ALL)).toString());
|
||||
}).start();
|
||||
}
|
||||
|
||||
@@ -362,7 +368,7 @@ public class TextExtractionTest extends BasePlatformTestCase {
|
||||
var literal = PsiTreeUtil.findElementOfClassAtOffset(file, 100, PsiLiteralExpression.class, false);
|
||||
var extractor = new JavaTextExtractor();
|
||||
Benchmark.newBenchmark("TextContent building from a long text fragment", () -> {
|
||||
assertEquals(expected, extractor.buildTextContent(literal, TextContent.TextDomain.ALL).toString());
|
||||
assertEquals(expected, assertOneElement(extractor.buildTextContents(literal, TextContent.TextDomain.ALL)).toString());
|
||||
}).start();
|
||||
}
|
||||
|
||||
@@ -374,7 +380,7 @@ public class TextExtractionTest extends BasePlatformTestCase {
|
||||
PsiElement tag = PsiTreeUtil.findElementOfClassAtOffset(file, text.indexOf("something"), PsiDocTag.class, false);
|
||||
Benchmark.newBenchmark("TextContent building from complex PSI", () -> {
|
||||
for (int i = 0; i < 10; i++) {
|
||||
TextContent content = extractor.buildTextContent(tag, TextContent.TextDomain.ALL);
|
||||
TextContent content = assertOneElement(extractor.buildTextContents(tag, TextContent.TextDomain.ALL));
|
||||
assertEquals("something if is not too expensive", content.toString());
|
||||
}
|
||||
}).start();
|
||||
@@ -412,9 +418,17 @@ public class TextExtractionTest extends BasePlatformTestCase {
|
||||
return extractText(fileName, fileText, offset, getProject());
|
||||
}
|
||||
|
||||
public static TextContent extractText(String fileName, String fileText, int offset, Project project) {
|
||||
private List<TextContent> extractTexts(String fileName, String text, int offset, Class<? extends PsiElement> psi) {
|
||||
PsiFile file = createFile(fileName, text, getProject());
|
||||
return TextExtractor.findTextsAt(PsiTreeUtil.findElementOfClassAtOffset(file, offset, psi, false), TextContent.TextDomain.ALL);
|
||||
}
|
||||
|
||||
private static PsiFile createFile(String fileName, String fileText, Project project) {
|
||||
FileType fileType = FileTypeManager.getInstance().getFileTypeByFileName(fileName);
|
||||
PsiFile file = PsiFileFactory.getInstance(project).createFileFromText(fileName, fileType, fileText);
|
||||
return TextExtractor.findTextAt(file, offset, TextContent.TextDomain.ALL);
|
||||
return PsiFileFactory.getInstance(project).createFileFromText(fileName, fileType, fileText);
|
||||
}
|
||||
|
||||
public static TextContent extractText(String fileName, String fileText, int offset, Project project) {
|
||||
return TextExtractor.findTextAt(createFile(fileName, fileText, project), offset, TextContent.TextDomain.ALL);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,7 +17,7 @@ class ExampleClassWithNoTypos<T> {
|
||||
private String name;
|
||||
|
||||
/**
|
||||
* Creates an empty group.
|
||||
* Creates an empty group. It's a <b>react</b> method.
|
||||
*
|
||||
* @param name The name of the group. And another sentence.
|
||||
*/
|
||||
@@ -60,10 +60,15 @@ class ExampleClassWithNoTypos<T> {
|
||||
*/
|
||||
class ExampleClassWithTypos<T> {
|
||||
|
||||
/**
|
||||
* There can be many mistakes here. It <GRAMMAR_ERROR descr="IT_VBZ">add</GRAMMAR_ERROR><br>
|
||||
*
|
||||
* <b>It <GRAMMAR_ERROR descr="IT_VBZ">add</GRAMMAR_ERROR></b>
|
||||
*/
|
||||
private String name;
|
||||
|
||||
/**
|
||||
* Creates an empty group.
|
||||
* Creates an empty group. It's a <GRAMMAR_ERROR descr="A_GOOGLE">react</GRAMMAR_ERROR> method.
|
||||
*
|
||||
* @param name the <GRAMMAR_ERROR descr="COMMA_WHICH">name which</GRAMMAR_ERROR> group
|
||||
*/
|
||||
|
||||
@@ -13,4 +13,11 @@
|
||||
System.out.println("Hello " + name)
|
||||
Hello John
|
||||
</code>
|
||||
<description-multiple-paragraphs><![CDATA[
|
||||
There is a possibility of such thing.<p>So that this is possible. And this is <GRAMMAR_ERROR descr="EN_A_VS_AN">an</GRAMMAR_ERROR> mistake.
|
||||
]]></description-multiple-paragraphs>
|
||||
<description-single-sentence>
|
||||
There is a <GRAMMAR_ERROR descr="POSSIBILTY_POSSIBLE">possibility</GRAMMAR_ERROR> of such thing so that this is possible.
|
||||
</description-single-sentence
|
||||
>
|
||||
</shiporder>
|
||||
|
||||
@@ -29,7 +29,6 @@ import com.intellij.psi.util.PsiUtilCore;
|
||||
import com.intellij.psi.xml.*;
|
||||
import com.intellij.util.containers.ContainerUtil;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.function.Function;
|
||||
@@ -49,35 +48,34 @@ public class XmlTextExtractor extends TextExtractor {
|
||||
}
|
||||
|
||||
@Override
|
||||
protected @Nullable TextContent buildTextContent(@NotNull PsiElement element,
|
||||
@NotNull Set<TextContent.TextDomain> allowedDomains) {
|
||||
protected @NotNull List<TextContent> buildTextContents(@NotNull PsiElement element, @NotNull Set<TextContent.TextDomain> allowedDomains) {
|
||||
if (isText(element) && hasSuitableDialect(element)) {
|
||||
var classifier = tagClassifier(element);
|
||||
PsiElement container = SyntaxTraverser.psiApi().parents(element)
|
||||
.find(e -> e instanceof XmlDocument || e instanceof XmlTag && classifier.apply((XmlTag)e) != TagKind.Inline);
|
||||
if (container != null) {
|
||||
Map<PsiElement, TextContent> contentsInside = CachedValuesManager.getCachedValue(container, () ->
|
||||
Map<PsiElement, List<TextContent>> contentsInside = CachedValuesManager.getCachedValue(container, () ->
|
||||
CachedValueProvider.Result.create(calcContents(container), container));
|
||||
return contentsInside.get(element);
|
||||
return contentsInside.getOrDefault(element, List.of());
|
||||
}
|
||||
}
|
||||
|
||||
IElementType type = PsiUtilCore.getElementType(element);
|
||||
if (type == XmlTokenType.XML_COMMENT_CHARACTERS && allowedDomains.contains(COMMENTS) && hasSuitableDialect(element)) {
|
||||
return builder.build(element, COMMENTS);
|
||||
return ContainerUtil.createMaybeSingletonList(builder.build(element, COMMENTS));
|
||||
}
|
||||
|
||||
if (type == XmlTokenType.XML_ATTRIBUTE_VALUE_TOKEN && allowedDomains.contains(LITERALS) && hasSuitableDialect(element)) {
|
||||
TextContent content = builder.build(element, LITERALS);
|
||||
if (content != null && seemsNatural(content)) {
|
||||
return content;
|
||||
return List.of(content);
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
return List.of();
|
||||
}
|
||||
|
||||
private @NotNull Map<PsiElement, TextContent> calcContents(PsiElement container) {
|
||||
private @NotNull Map<PsiElement, List<TextContent>> calcContents(PsiElement container) {
|
||||
if (container instanceof XmlTag && isNonText((XmlTag)container)) {
|
||||
return Collections.emptyMap();
|
||||
}
|
||||
@@ -88,7 +86,7 @@ public class XmlTextExtractor extends TextExtractor {
|
||||
var fullContent = NotNullLazyValue.lazy(() -> TextContent.psiFragment(PLAIN_TEXT, container));
|
||||
|
||||
var visitor = new PsiRecursiveElementWalkingVisitor() {
|
||||
final Map<PsiElement, TextContent> result = new HashMap<>();
|
||||
final Map<PsiElement, List<TextContent>> result = new HashMap<>();
|
||||
final List<PsiElement> group = new ArrayList<>();
|
||||
final Set<Integer> markupIndices = new HashSet<>();
|
||||
final Set<Integer> unknownIndices = new HashSet<>();
|
||||
@@ -115,7 +113,17 @@ public class XmlTextExtractor extends TextExtractor {
|
||||
}
|
||||
|
||||
if (isText(each)) {
|
||||
group.add(each);
|
||||
if (isCdata(each.getParent())) {
|
||||
List<TextContent> contents = HtmlUtilsKt.excludeHtml(
|
||||
extractRange(each.getTextRange().shiftLeft(container.getTextRange().getStartOffset())));
|
||||
if (!contents.isEmpty()) { // isolate CDATA into its own TextContent set for now; maybe glue to the surrounding texts later
|
||||
flushGroup(false);
|
||||
result.put(each, contents);
|
||||
unknownBefore = false;
|
||||
}
|
||||
} else {
|
||||
group.add(each);
|
||||
}
|
||||
}
|
||||
else if (PsiUtilCore.getElementType(each) == XmlTokenType.XML_CHAR_ENTITY_REF) {
|
||||
if (HtmlUtilsKt.isSpaceEntity(each.getText())) {
|
||||
@@ -127,6 +135,11 @@ public class XmlTextExtractor extends TextExtractor {
|
||||
super.visitElement(each);
|
||||
}
|
||||
|
||||
private TextContent extractRange(TextRange range) {
|
||||
TextContent full = fullContent.getValue();
|
||||
return full.excludeRange(new TextRange(range.getEndOffset(), full.length())).excludeRange(new TextRange(0, range.getStartOffset()));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void elementFinished(PsiElement element) {
|
||||
super.elementFinished(element);
|
||||
@@ -140,7 +153,7 @@ public class XmlTextExtractor extends TextExtractor {
|
||||
List<TextContent> components = new ArrayList<>(group.size());
|
||||
for (int i = 0; i < group.size(); i++) {
|
||||
PsiElement e = group.get(i);
|
||||
TextContent component = extractRange(fullContent.getValue(), e.getTextRange().shiftLeft(containerStart));
|
||||
TextContent component = extractRange(e.getTextRange().shiftLeft(containerStart));
|
||||
component = applyExclusions(i, component, markupIndices, ExclusionKind.markup);
|
||||
component = applyExclusions(i, component, unknownIndices, ExclusionKind.unknown);
|
||||
components.add(component);
|
||||
@@ -152,7 +165,7 @@ public class XmlTextExtractor extends TextExtractor {
|
||||
content = HtmlUtilsKt.inlineSpaceEntities(content.removeIndents(Set.of(' ', '\t')));
|
||||
if (content != null) {
|
||||
for (PsiElement e : group) {
|
||||
result.put(e, content);
|
||||
result.put(e, List.of(content));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -178,15 +191,9 @@ public class XmlTextExtractor extends TextExtractor {
|
||||
return content.toString().contains(" ");
|
||||
}
|
||||
|
||||
private static TextContent extractRange(TextContent full, TextRange range) {
|
||||
return full.excludeRange(new TextRange(range.getEndOffset(), full.length())).excludeRange(new TextRange(0, range.getStartOffset()));
|
||||
}
|
||||
|
||||
private static boolean isText(PsiElement leaf) {
|
||||
PsiElement parent = leaf.getParent();
|
||||
if (!(parent instanceof XmlText) &&
|
||||
!(PsiUtilCore.getElementType(parent) == XmlElementType.XML_CDATA && parent.getParent() instanceof XmlText) &&
|
||||
!(parent instanceof XmlDocument)) {
|
||||
if (!(parent instanceof XmlText) && !isCdata(parent) && !(parent instanceof XmlDocument)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -195,6 +202,10 @@ public class XmlTextExtractor extends TextExtractor {
|
||||
type == XmlTokenType.XML_DATA_CHARACTERS;
|
||||
}
|
||||
|
||||
private static boolean isCdata(PsiElement element) {
|
||||
return PsiUtilCore.getElementType(element) == XmlElementType.XML_CDATA;
|
||||
}
|
||||
|
||||
private boolean hasSuitableDialect(@NotNull PsiElement element) {
|
||||
return myEnabledDialects.contains(element.getContainingFile().getLanguage().getClass());
|
||||
}
|
||||
@@ -216,9 +227,6 @@ public class XmlTextExtractor extends TextExtractor {
|
||||
super(HTMLLanguage.class);
|
||||
}
|
||||
|
||||
private static final Set<String> DEFINITELY_BLOCK_TAGS =
|
||||
Set.of("body", "p", "br", "td", "li", "title", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "table");
|
||||
|
||||
@Override
|
||||
protected Function<XmlTag, TagKind> tagClassifier(@NotNull PsiElement context) {
|
||||
if (!Registry.is("grazie.html.concatenate.inline.tag.contents")) {
|
||||
@@ -230,7 +238,7 @@ public class XmlTextExtractor extends TextExtractor {
|
||||
return tag -> {
|
||||
String name = tag.getName();
|
||||
if (NON_TEXT_TAGS.contains(name)) return TagKind.Unknown;
|
||||
if (DEFINITELY_BLOCK_TAGS.contains(name)) return TagKind.Block;
|
||||
if (HtmlUtilsKt.commonBlockElements.contains(name)) return TagKind.Block;
|
||||
if (inlineTags.contains(name)) return TagKind.Inline;
|
||||
return TagKind.Unknown;
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user