mirror of
https://gitflic.ru/project/openide/openide.git
synced 2025-12-14 09:12:22 +07:00
master-inspectopedia-collector
Cleaner checks for br element Merge branch 'master' into master-inspectopedia-collector Collecting CWE ids Merge-request: IJ-MR-126396 Merged-by: Egor Malyshev <egor.malyshev@jetbrains.com> GitOrigin-RevId: 6bec313f9bf5a255bd81e922ed2e0b34fc6e4304
This commit is contained in:
committed by
intellij-monorepo-bot
parent
0b5363a00e
commit
63d7aa8192
@@ -8,8 +8,7 @@ import com.fasterxml.jackson.databind.SerializationFeature;
|
||||
import com.fasterxml.jackson.databind.json.JsonMapper;
|
||||
import com.intellij.codeInspection.InspectionEP;
|
||||
import com.intellij.codeInspection.InspectionProfileEntry;
|
||||
import com.intellij.codeInspection.ex.InspectionToolWrapper;
|
||||
import com.intellij.codeInspection.ex.ScopeToolState;
|
||||
import com.intellij.codeInspection.ex.*;
|
||||
import com.intellij.codeInspection.options.*;
|
||||
import com.intellij.ide.plugins.PluginManager;
|
||||
import com.intellij.inspectopedia.extractor.data.Inspection;
|
||||
@@ -18,6 +17,7 @@ import com.intellij.inspectopedia.extractor.data.Plugin;
|
||||
import com.intellij.inspectopedia.extractor.data.Plugins;
|
||||
import com.intellij.inspectopedia.extractor.utils.HtmlUtils;
|
||||
import com.intellij.openapi.application.ApplicationInfo;
|
||||
import com.intellij.openapi.application.ApplicationManager;
|
||||
import com.intellij.openapi.application.ApplicationStarter;
|
||||
import com.intellij.openapi.diagnostic.Logger;
|
||||
import com.intellij.openapi.project.Project;
|
||||
@@ -97,6 +97,11 @@ final class InspectopediaExtractor implements ApplicationStarter {
|
||||
|
||||
availablePlugins.put(IDE_NAME, new Plugin(IDE_NAME, IDE_NAME, IDE_VERSION));
|
||||
|
||||
final InspectionMetaInformationService
|
||||
service = ApplicationManager.getApplication().getService(InspectionMetaInformationService.class);
|
||||
|
||||
final MetaInformationState inspectionsExtraState = service == null ? null : (MetaInformationState)service.getState(null);
|
||||
|
||||
for (final ScopeToolState scopeToolState : scopeToolStates) {
|
||||
|
||||
final InspectionToolWrapper<?, ?> wrapper = scopeToolState.getTool();
|
||||
@@ -118,13 +123,16 @@ final class InspectopediaExtractor implements ApplicationStarter {
|
||||
catch (Throwable t) {
|
||||
LOG.info("Cannot create options panel " + wrapper.getShortName(), t);
|
||||
}
|
||||
final MetaInformation metaInformation = inspectionsExtraState == null ? null : inspectionsExtraState.getInspections().get(wrapper.getID());
|
||||
final List<Integer> cweIds = metaInformation == null ? null : metaInformation.getCweIds();
|
||||
|
||||
final String language = wrapper.getLanguage();
|
||||
final String briefDescription = HtmlUtils.cleanupHtml(description[0], language);
|
||||
final String extendedDescription = description.length > 1 ? HtmlUtils.cleanupHtml(description[1], language) : null;
|
||||
final Inspection inspection = new Inspection(wrapper.getShortName(), wrapper.getDisplayName(), wrapper.getDefaultLevel().getName(),
|
||||
language, briefDescription,
|
||||
extendedDescription, Arrays.asList(wrapper.getGroupPath()), wrapper.applyToDialects(),
|
||||
wrapper.isCleanupTool(), wrapper.isEnabledByDefault(), panelInfo);
|
||||
wrapper.isCleanupTool(), wrapper.isEnabledByDefault(), panelInfo, cweIds);
|
||||
|
||||
availablePlugins.get(pluginId).addInspection(inspection);
|
||||
}
|
||||
|
||||
@@ -27,6 +27,7 @@ public class Inspection implements Comparable<Inspection> {
|
||||
public String extendedDescription = "";
|
||||
public boolean hasOptionsPanel = false;
|
||||
public List<OptionsPanelInfo> options = null;
|
||||
public List<Integer> cweIds = null;
|
||||
|
||||
public Inspection(String id,
|
||||
String name,
|
||||
@@ -38,7 +39,8 @@ public class Inspection implements Comparable<Inspection> {
|
||||
boolean appliesToDialects,
|
||||
boolean partOfCodeCleanup,
|
||||
boolean enabledByDefault,
|
||||
List<OptionsPanelInfo> options) {
|
||||
List<OptionsPanelInfo> options,
|
||||
List<Integer> cweIds) {
|
||||
this.id = id;
|
||||
this.name = name;
|
||||
this.severity = severity;
|
||||
@@ -51,6 +53,7 @@ public class Inspection implements Comparable<Inspection> {
|
||||
this.isEnabledDefault = enabledByDefault;
|
||||
this.hasOptionsPanel = options != null;
|
||||
this.options = options;
|
||||
this.cweIds = cweIds;
|
||||
}
|
||||
|
||||
public Inspection() {
|
||||
|
||||
@@ -17,130 +17,129 @@ import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
public final class HtmlUtils {
|
||||
public static final Safelist SAFELIST = new Safelist();
|
||||
public static final Safelist SAFELIST = new Safelist();
|
||||
|
||||
static {
|
||||
SAFELIST.addTags("a", "b", "code", "i", "li", "list", "p", "s", "u");
|
||||
static {
|
||||
SAFELIST.addTags("a", "b", "code", "i", "li", "list", "p", "s", "u");
|
||||
}
|
||||
|
||||
private static final List<Pair<String, String>> RENAME_MAP = List.of(
|
||||
Pair.create("ul", "list"),
|
||||
Pair.create("th", "td"),
|
||||
Pair.create("c", "code"),
|
||||
Pair.create("strong", "b"),
|
||||
Pair.create("small", "font"),
|
||||
Pair.create("span", "control"),
|
||||
Pair.create("blockquote", "tip"),
|
||||
Pair.create("em", "i")
|
||||
);
|
||||
|
||||
private static final List<String> REMOVE_MAP = List.of(
|
||||
"hr",
|
||||
"br",
|
||||
"code:matches(^\\s*$)"
|
||||
);
|
||||
|
||||
private static final List<String> UNWRAP_MAP = List.of(
|
||||
"tbody",
|
||||
"pre",
|
||||
"code[style=block] > *"
|
||||
);
|
||||
|
||||
@NotNull
|
||||
public static String cleanupHtml(@NotNull String source, @Nullable String languageForCodeBlocks) {
|
||||
final Document document = Jsoup.parse(source);
|
||||
|
||||
RENAME_MAP.forEach(map -> document.select(map.first).tagName(map.second));
|
||||
|
||||
final Elements ol = document.select("ol");
|
||||
ol.tagName("list");
|
||||
ol.attr("style", "decimal");
|
||||
|
||||
UNWRAP_MAP.forEach(map -> document.select(map).unwrap());
|
||||
|
||||
final Elements codeBlock = document.select("pre > code");
|
||||
codeBlock.attr("style", "block");
|
||||
codeBlock.attr("lang", languageForCodeBlocks == null ? "Text" : languageForCodeBlocks);
|
||||
|
||||
document.select("code > *").stream()
|
||||
.filter(element -> !element.tagName().equals("a"))
|
||||
.forEach(Node::unwrap);
|
||||
|
||||
document.select("br").stream().map(Element::parent)
|
||||
.distinct()
|
||||
.forEach(parent -> {
|
||||
final List<Pair<Boolean, List<Node>>> groups = new ArrayList<>();
|
||||
final List<Node> inlineElements = new ArrayList<>();
|
||||
final Iterator<Node> childNodes = parent.childNodes().iterator();
|
||||
while (childNodes.hasNext()) {
|
||||
final Node childNode = childNodes.next();
|
||||
|
||||
if (!isBlockElement(childNode)) {
|
||||
inlineElements.add(childNode);
|
||||
}
|
||||
|
||||
if (isBlockElement(childNode) || !childNodes.hasNext()) {
|
||||
if (!inlineElements.isEmpty()) {
|
||||
groups.add(Pair.create(true, List.copyOf(inlineElements)));
|
||||
inlineElements.clear();
|
||||
}
|
||||
}
|
||||
|
||||
if (isBlockElement(childNode) && !isBr(childNode)) {
|
||||
groups.add(Pair.create(false, List.of(childNode)));
|
||||
}
|
||||
}
|
||||
for (Pair<Boolean, List<Node>> group : groups) {
|
||||
final Boolean shouldWrap = group.getFirst();
|
||||
final List<Node> nodes = group.getSecond();
|
||||
|
||||
final Element elementForNodes = shouldWrap ? document.createElement("p") : parent;
|
||||
|
||||
nodes.forEach(n -> {
|
||||
n.remove();
|
||||
elementForNodes.appendChild(n);
|
||||
});
|
||||
|
||||
if (shouldWrap) {
|
||||
parent.appendChild(elementForNodes);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
REMOVE_MAP.forEach(map -> document.select(map).remove());
|
||||
|
||||
Elements paragraphsWithParagraphs;
|
||||
//What if there are hypothetically many nested P, and we're going to miss them with only one iteration?
|
||||
do {
|
||||
paragraphsWithParagraphs = document.select("p:has(p)");
|
||||
paragraphsWithParagraphs.unwrap();
|
||||
}
|
||||
while (!paragraphsWithParagraphs.isEmpty());
|
||||
//And then there were multi nested paragraphs which deep down contained nothing but whitespace? Now they're ready for removal as well :)
|
||||
final Elements emptyParagraphs = document.select("p:matches(^\\s*$)");
|
||||
emptyParagraphs.remove();
|
||||
|
||||
final Cleaner cleaner = new Cleaner(SAFELIST);
|
||||
cleaner.clean(document);
|
||||
|
||||
document.outputSettings().syntax(Document.OutputSettings.Syntax.xml);
|
||||
document.outputSettings().prettyPrint(false);
|
||||
|
||||
return document.body().html();
|
||||
}
|
||||
|
||||
private static boolean isBlockElement(@NotNull Node node) {
|
||||
if (!(node instanceof Element element)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
private static final List<Pair<String, String>> RENAME_MAP = List.of(
|
||||
Pair.create("ul", "list"),
|
||||
Pair.create("th", "td"),
|
||||
Pair.create("c", "code"),
|
||||
Pair.create("strong", "b"),
|
||||
Pair.create("small", "font"),
|
||||
Pair.create("span", "control"),
|
||||
Pair.create("blockquote", "tip"),
|
||||
Pair.create("em", "i")
|
||||
);
|
||||
return "list".equals(element.tagName()) ||
|
||||
("code".equals(element.tagName()) && "block".equals(element.attr("style"))) ||
|
||||
isBr(node);
|
||||
}
|
||||
|
||||
private static final List<String> REMOVE_MAP = List.of(
|
||||
"hr",
|
||||
"br",
|
||||
"code:matches(^\\s*$)"
|
||||
);
|
||||
|
||||
private static final List<String> UNWRAP_MAP = List.of(
|
||||
"tbody",
|
||||
"pre",
|
||||
"code[style=block] > *"
|
||||
);
|
||||
|
||||
@NotNull
|
||||
public static String cleanupHtml(@NotNull String source, @Nullable String languageForCodeBlocks) {
|
||||
final Document document = Jsoup.parse(source);
|
||||
|
||||
RENAME_MAP.forEach(map -> document.select(map.first).tagName(map.second));
|
||||
|
||||
final Elements ol = document.select("ol");
|
||||
ol.tagName("list");
|
||||
ol.attr("style", "decimal");
|
||||
|
||||
UNWRAP_MAP.forEach(map -> document.select(map).unwrap());
|
||||
|
||||
final Elements codeBlock = document.select("pre > code");
|
||||
codeBlock.attr("style", "block");
|
||||
codeBlock.attr("lang", languageForCodeBlocks == null ? "Text" : languageForCodeBlocks);
|
||||
|
||||
document.select("code > *").stream()
|
||||
.filter(element -> !element.tagName().equals("a"))
|
||||
.forEach(Node::unwrap);
|
||||
|
||||
document.select("br").stream().map(Element::parent)
|
||||
.distinct()
|
||||
.forEach(parent -> {
|
||||
final List<Pair<Boolean, List<Node>>> groups = new ArrayList<>();
|
||||
final List<Node> inlineElements = new ArrayList<>();
|
||||
final Iterator<Node> childNodes = parent.childNodes().iterator();
|
||||
while (childNodes.hasNext()) {
|
||||
final Node childNode = childNodes.next();
|
||||
|
||||
if (!isBlockElement(childNode)) {
|
||||
inlineElements.add(childNode);
|
||||
}
|
||||
|
||||
if (isBlockElement(childNode) || !childNodes.hasNext()) {
|
||||
if (!inlineElements.isEmpty()) {
|
||||
groups.add(Pair.create(true, List.copyOf(inlineElements)));
|
||||
inlineElements.clear();
|
||||
}
|
||||
}
|
||||
|
||||
if (isBlockElement(childNode) && !isBr(childNode)) {
|
||||
groups.add(Pair.create(false, List.of(childNode)));
|
||||
}
|
||||
}
|
||||
for (Pair<Boolean, List<Node>> group : groups) {
|
||||
final Boolean shouldWrap = group.getFirst();
|
||||
final List<Node> nodes = group.getSecond();
|
||||
|
||||
final Element elementForNodes = shouldWrap ? document.createElement("p") : parent;
|
||||
|
||||
nodes.forEach(n -> {
|
||||
n.remove();
|
||||
elementForNodes.appendChild(n);
|
||||
});
|
||||
|
||||
if (shouldWrap) {
|
||||
parent.appendChild(elementForNodes);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
REMOVE_MAP.forEach(map -> document.select(map).remove());
|
||||
|
||||
Elements paragraphsWithParagraphs;
|
||||
//What if there are hypothetically many nested P, and we're going to miss them with only one iteration?
|
||||
do {
|
||||
paragraphsWithParagraphs = document.select("p:has(p)");
|
||||
paragraphsWithParagraphs.unwrap();
|
||||
} while (!paragraphsWithParagraphs.isEmpty());
|
||||
//And then there were multi nested paragraphs which deep down contained nothing but whitespace? Now they're ready for removal as well :)
|
||||
final Elements emptyParagraphs = document.select("p:matches(^\\s*$)");
|
||||
emptyParagraphs.remove();
|
||||
|
||||
final Cleaner cleaner = new Cleaner(SAFELIST);
|
||||
cleaner.clean(document);
|
||||
|
||||
document.outputSettings().syntax(Document.OutputSettings.Syntax.xml);
|
||||
document.outputSettings().prettyPrint(false);
|
||||
|
||||
return document.body().html();
|
||||
}
|
||||
|
||||
private static boolean isBlockElement(@NotNull Node node) {
|
||||
if (!(node instanceof Element element))
|
||||
return false;
|
||||
|
||||
return element.tagName().equals("list") ||
|
||||
(element.tagName().equals("code") && element.attr("style").equals("block")) ||
|
||||
isBr(node);
|
||||
}
|
||||
|
||||
private static boolean isBr(@NotNull Node node) {
|
||||
if (!(node instanceof Element))
|
||||
return false;
|
||||
|
||||
return "br".equals(((Element) node).tagName());
|
||||
}
|
||||
private static boolean isBr(@NotNull Node node) {
|
||||
return node instanceof Element element && "br".equals(element.tagName());
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user