mirror of
https://gitflic.ru/project/openide/openide.git
synced 2025-12-14 18:05:27 +07:00
master-inspectopedia-collector
Cleaner checks for br element Merge branch 'master' into master-inspectopedia-collector Collecting CWE ids Merge-request: IJ-MR-126396 Merged-by: Egor Malyshev <egor.malyshev@jetbrains.com> GitOrigin-RevId: 6bec313f9bf5a255bd81e922ed2e0b34fc6e4304
This commit is contained in:
committed by
intellij-monorepo-bot
parent
0b5363a00e
commit
63d7aa8192
@@ -8,8 +8,7 @@ import com.fasterxml.jackson.databind.SerializationFeature;
|
|||||||
import com.fasterxml.jackson.databind.json.JsonMapper;
|
import com.fasterxml.jackson.databind.json.JsonMapper;
|
||||||
import com.intellij.codeInspection.InspectionEP;
|
import com.intellij.codeInspection.InspectionEP;
|
||||||
import com.intellij.codeInspection.InspectionProfileEntry;
|
import com.intellij.codeInspection.InspectionProfileEntry;
|
||||||
import com.intellij.codeInspection.ex.InspectionToolWrapper;
|
import com.intellij.codeInspection.ex.*;
|
||||||
import com.intellij.codeInspection.ex.ScopeToolState;
|
|
||||||
import com.intellij.codeInspection.options.*;
|
import com.intellij.codeInspection.options.*;
|
||||||
import com.intellij.ide.plugins.PluginManager;
|
import com.intellij.ide.plugins.PluginManager;
|
||||||
import com.intellij.inspectopedia.extractor.data.Inspection;
|
import com.intellij.inspectopedia.extractor.data.Inspection;
|
||||||
@@ -18,6 +17,7 @@ import com.intellij.inspectopedia.extractor.data.Plugin;
|
|||||||
import com.intellij.inspectopedia.extractor.data.Plugins;
|
import com.intellij.inspectopedia.extractor.data.Plugins;
|
||||||
import com.intellij.inspectopedia.extractor.utils.HtmlUtils;
|
import com.intellij.inspectopedia.extractor.utils.HtmlUtils;
|
||||||
import com.intellij.openapi.application.ApplicationInfo;
|
import com.intellij.openapi.application.ApplicationInfo;
|
||||||
|
import com.intellij.openapi.application.ApplicationManager;
|
||||||
import com.intellij.openapi.application.ApplicationStarter;
|
import com.intellij.openapi.application.ApplicationStarter;
|
||||||
import com.intellij.openapi.diagnostic.Logger;
|
import com.intellij.openapi.diagnostic.Logger;
|
||||||
import com.intellij.openapi.project.Project;
|
import com.intellij.openapi.project.Project;
|
||||||
@@ -97,6 +97,11 @@ final class InspectopediaExtractor implements ApplicationStarter {
|
|||||||
|
|
||||||
availablePlugins.put(IDE_NAME, new Plugin(IDE_NAME, IDE_NAME, IDE_VERSION));
|
availablePlugins.put(IDE_NAME, new Plugin(IDE_NAME, IDE_NAME, IDE_VERSION));
|
||||||
|
|
||||||
|
final InspectionMetaInformationService
|
||||||
|
service = ApplicationManager.getApplication().getService(InspectionMetaInformationService.class);
|
||||||
|
|
||||||
|
final MetaInformationState inspectionsExtraState = service == null ? null : (MetaInformationState)service.getState(null);
|
||||||
|
|
||||||
for (final ScopeToolState scopeToolState : scopeToolStates) {
|
for (final ScopeToolState scopeToolState : scopeToolStates) {
|
||||||
|
|
||||||
final InspectionToolWrapper<?, ?> wrapper = scopeToolState.getTool();
|
final InspectionToolWrapper<?, ?> wrapper = scopeToolState.getTool();
|
||||||
@@ -118,13 +123,16 @@ final class InspectopediaExtractor implements ApplicationStarter {
|
|||||||
catch (Throwable t) {
|
catch (Throwable t) {
|
||||||
LOG.info("Cannot create options panel " + wrapper.getShortName(), t);
|
LOG.info("Cannot create options panel " + wrapper.getShortName(), t);
|
||||||
}
|
}
|
||||||
|
final MetaInformation metaInformation = inspectionsExtraState == null ? null : inspectionsExtraState.getInspections().get(wrapper.getID());
|
||||||
|
final List<Integer> cweIds = metaInformation == null ? null : metaInformation.getCweIds();
|
||||||
|
|
||||||
final String language = wrapper.getLanguage();
|
final String language = wrapper.getLanguage();
|
||||||
final String briefDescription = HtmlUtils.cleanupHtml(description[0], language);
|
final String briefDescription = HtmlUtils.cleanupHtml(description[0], language);
|
||||||
final String extendedDescription = description.length > 1 ? HtmlUtils.cleanupHtml(description[1], language) : null;
|
final String extendedDescription = description.length > 1 ? HtmlUtils.cleanupHtml(description[1], language) : null;
|
||||||
final Inspection inspection = new Inspection(wrapper.getShortName(), wrapper.getDisplayName(), wrapper.getDefaultLevel().getName(),
|
final Inspection inspection = new Inspection(wrapper.getShortName(), wrapper.getDisplayName(), wrapper.getDefaultLevel().getName(),
|
||||||
language, briefDescription,
|
language, briefDescription,
|
||||||
extendedDescription, Arrays.asList(wrapper.getGroupPath()), wrapper.applyToDialects(),
|
extendedDescription, Arrays.asList(wrapper.getGroupPath()), wrapper.applyToDialects(),
|
||||||
wrapper.isCleanupTool(), wrapper.isEnabledByDefault(), panelInfo);
|
wrapper.isCleanupTool(), wrapper.isEnabledByDefault(), panelInfo, cweIds);
|
||||||
|
|
||||||
availablePlugins.get(pluginId).addInspection(inspection);
|
availablePlugins.get(pluginId).addInspection(inspection);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -27,6 +27,7 @@ public class Inspection implements Comparable<Inspection> {
|
|||||||
public String extendedDescription = "";
|
public String extendedDescription = "";
|
||||||
public boolean hasOptionsPanel = false;
|
public boolean hasOptionsPanel = false;
|
||||||
public List<OptionsPanelInfo> options = null;
|
public List<OptionsPanelInfo> options = null;
|
||||||
|
public List<Integer> cweIds = null;
|
||||||
|
|
||||||
public Inspection(String id,
|
public Inspection(String id,
|
||||||
String name,
|
String name,
|
||||||
@@ -38,7 +39,8 @@ public class Inspection implements Comparable<Inspection> {
|
|||||||
boolean appliesToDialects,
|
boolean appliesToDialects,
|
||||||
boolean partOfCodeCleanup,
|
boolean partOfCodeCleanup,
|
||||||
boolean enabledByDefault,
|
boolean enabledByDefault,
|
||||||
List<OptionsPanelInfo> options) {
|
List<OptionsPanelInfo> options,
|
||||||
|
List<Integer> cweIds) {
|
||||||
this.id = id;
|
this.id = id;
|
||||||
this.name = name;
|
this.name = name;
|
||||||
this.severity = severity;
|
this.severity = severity;
|
||||||
@@ -51,6 +53,7 @@ public class Inspection implements Comparable<Inspection> {
|
|||||||
this.isEnabledDefault = enabledByDefault;
|
this.isEnabledDefault = enabledByDefault;
|
||||||
this.hasOptionsPanel = options != null;
|
this.hasOptionsPanel = options != null;
|
||||||
this.options = options;
|
this.options = options;
|
||||||
|
this.cweIds = cweIds;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Inspection() {
|
public Inspection() {
|
||||||
|
|||||||
@@ -17,130 +17,129 @@ import java.util.Iterator;
|
|||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
public final class HtmlUtils {
|
public final class HtmlUtils {
|
||||||
public static final Safelist SAFELIST = new Safelist();
|
public static final Safelist SAFELIST = new Safelist();
|
||||||
|
|
||||||
static {
|
static {
|
||||||
SAFELIST.addTags("a", "b", "code", "i", "li", "list", "p", "s", "u");
|
SAFELIST.addTags("a", "b", "code", "i", "li", "list", "p", "s", "u");
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final List<Pair<String, String>> RENAME_MAP = List.of(
|
||||||
|
Pair.create("ul", "list"),
|
||||||
|
Pair.create("th", "td"),
|
||||||
|
Pair.create("c", "code"),
|
||||||
|
Pair.create("strong", "b"),
|
||||||
|
Pair.create("small", "font"),
|
||||||
|
Pair.create("span", "control"),
|
||||||
|
Pair.create("blockquote", "tip"),
|
||||||
|
Pair.create("em", "i")
|
||||||
|
);
|
||||||
|
|
||||||
|
private static final List<String> REMOVE_MAP = List.of(
|
||||||
|
"hr",
|
||||||
|
"br",
|
||||||
|
"code:matches(^\\s*$)"
|
||||||
|
);
|
||||||
|
|
||||||
|
private static final List<String> UNWRAP_MAP = List.of(
|
||||||
|
"tbody",
|
||||||
|
"pre",
|
||||||
|
"code[style=block] > *"
|
||||||
|
);
|
||||||
|
|
||||||
|
@NotNull
|
||||||
|
public static String cleanupHtml(@NotNull String source, @Nullable String languageForCodeBlocks) {
|
||||||
|
final Document document = Jsoup.parse(source);
|
||||||
|
|
||||||
|
RENAME_MAP.forEach(map -> document.select(map.first).tagName(map.second));
|
||||||
|
|
||||||
|
final Elements ol = document.select("ol");
|
||||||
|
ol.tagName("list");
|
||||||
|
ol.attr("style", "decimal");
|
||||||
|
|
||||||
|
UNWRAP_MAP.forEach(map -> document.select(map).unwrap());
|
||||||
|
|
||||||
|
final Elements codeBlock = document.select("pre > code");
|
||||||
|
codeBlock.attr("style", "block");
|
||||||
|
codeBlock.attr("lang", languageForCodeBlocks == null ? "Text" : languageForCodeBlocks);
|
||||||
|
|
||||||
|
document.select("code > *").stream()
|
||||||
|
.filter(element -> !element.tagName().equals("a"))
|
||||||
|
.forEach(Node::unwrap);
|
||||||
|
|
||||||
|
document.select("br").stream().map(Element::parent)
|
||||||
|
.distinct()
|
||||||
|
.forEach(parent -> {
|
||||||
|
final List<Pair<Boolean, List<Node>>> groups = new ArrayList<>();
|
||||||
|
final List<Node> inlineElements = new ArrayList<>();
|
||||||
|
final Iterator<Node> childNodes = parent.childNodes().iterator();
|
||||||
|
while (childNodes.hasNext()) {
|
||||||
|
final Node childNode = childNodes.next();
|
||||||
|
|
||||||
|
if (!isBlockElement(childNode)) {
|
||||||
|
inlineElements.add(childNode);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isBlockElement(childNode) || !childNodes.hasNext()) {
|
||||||
|
if (!inlineElements.isEmpty()) {
|
||||||
|
groups.add(Pair.create(true, List.copyOf(inlineElements)));
|
||||||
|
inlineElements.clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isBlockElement(childNode) && !isBr(childNode)) {
|
||||||
|
groups.add(Pair.create(false, List.of(childNode)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (Pair<Boolean, List<Node>> group : groups) {
|
||||||
|
final Boolean shouldWrap = group.getFirst();
|
||||||
|
final List<Node> nodes = group.getSecond();
|
||||||
|
|
||||||
|
final Element elementForNodes = shouldWrap ? document.createElement("p") : parent;
|
||||||
|
|
||||||
|
nodes.forEach(n -> {
|
||||||
|
n.remove();
|
||||||
|
elementForNodes.appendChild(n);
|
||||||
|
});
|
||||||
|
|
||||||
|
if (shouldWrap) {
|
||||||
|
parent.appendChild(elementForNodes);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
REMOVE_MAP.forEach(map -> document.select(map).remove());
|
||||||
|
|
||||||
|
Elements paragraphsWithParagraphs;
|
||||||
|
//What if there are hypothetically many nested P, and we're going to miss them with only one iteration?
|
||||||
|
do {
|
||||||
|
paragraphsWithParagraphs = document.select("p:has(p)");
|
||||||
|
paragraphsWithParagraphs.unwrap();
|
||||||
|
}
|
||||||
|
while (!paragraphsWithParagraphs.isEmpty());
|
||||||
|
//And then there were multi nested paragraphs which deep down contained nothing but whitespace? Now they're ready for removal as well :)
|
||||||
|
final Elements emptyParagraphs = document.select("p:matches(^\\s*$)");
|
||||||
|
emptyParagraphs.remove();
|
||||||
|
|
||||||
|
final Cleaner cleaner = new Cleaner(SAFELIST);
|
||||||
|
cleaner.clean(document);
|
||||||
|
|
||||||
|
document.outputSettings().syntax(Document.OutputSettings.Syntax.xml);
|
||||||
|
document.outputSettings().prettyPrint(false);
|
||||||
|
|
||||||
|
return document.body().html();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean isBlockElement(@NotNull Node node) {
|
||||||
|
if (!(node instanceof Element element)) {
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final List<Pair<String, String>> RENAME_MAP = List.of(
|
return "list".equals(element.tagName()) ||
|
||||||
Pair.create("ul", "list"),
|
("code".equals(element.tagName()) && "block".equals(element.attr("style"))) ||
|
||||||
Pair.create("th", "td"),
|
isBr(node);
|
||||||
Pair.create("c", "code"),
|
}
|
||||||
Pair.create("strong", "b"),
|
|
||||||
Pair.create("small", "font"),
|
|
||||||
Pair.create("span", "control"),
|
|
||||||
Pair.create("blockquote", "tip"),
|
|
||||||
Pair.create("em", "i")
|
|
||||||
);
|
|
||||||
|
|
||||||
private static final List<String> REMOVE_MAP = List.of(
|
private static boolean isBr(@NotNull Node node) {
|
||||||
"hr",
|
return node instanceof Element element && "br".equals(element.tagName());
|
||||||
"br",
|
}
|
||||||
"code:matches(^\\s*$)"
|
|
||||||
);
|
|
||||||
|
|
||||||
private static final List<String> UNWRAP_MAP = List.of(
|
|
||||||
"tbody",
|
|
||||||
"pre",
|
|
||||||
"code[style=block] > *"
|
|
||||||
);
|
|
||||||
|
|
||||||
@NotNull
|
|
||||||
public static String cleanupHtml(@NotNull String source, @Nullable String languageForCodeBlocks) {
|
|
||||||
final Document document = Jsoup.parse(source);
|
|
||||||
|
|
||||||
RENAME_MAP.forEach(map -> document.select(map.first).tagName(map.second));
|
|
||||||
|
|
||||||
final Elements ol = document.select("ol");
|
|
||||||
ol.tagName("list");
|
|
||||||
ol.attr("style", "decimal");
|
|
||||||
|
|
||||||
UNWRAP_MAP.forEach(map -> document.select(map).unwrap());
|
|
||||||
|
|
||||||
final Elements codeBlock = document.select("pre > code");
|
|
||||||
codeBlock.attr("style", "block");
|
|
||||||
codeBlock.attr("lang", languageForCodeBlocks == null ? "Text" : languageForCodeBlocks);
|
|
||||||
|
|
||||||
document.select("code > *").stream()
|
|
||||||
.filter(element -> !element.tagName().equals("a"))
|
|
||||||
.forEach(Node::unwrap);
|
|
||||||
|
|
||||||
document.select("br").stream().map(Element::parent)
|
|
||||||
.distinct()
|
|
||||||
.forEach(parent -> {
|
|
||||||
final List<Pair<Boolean, List<Node>>> groups = new ArrayList<>();
|
|
||||||
final List<Node> inlineElements = new ArrayList<>();
|
|
||||||
final Iterator<Node> childNodes = parent.childNodes().iterator();
|
|
||||||
while (childNodes.hasNext()) {
|
|
||||||
final Node childNode = childNodes.next();
|
|
||||||
|
|
||||||
if (!isBlockElement(childNode)) {
|
|
||||||
inlineElements.add(childNode);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (isBlockElement(childNode) || !childNodes.hasNext()) {
|
|
||||||
if (!inlineElements.isEmpty()) {
|
|
||||||
groups.add(Pair.create(true, List.copyOf(inlineElements)));
|
|
||||||
inlineElements.clear();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (isBlockElement(childNode) && !isBr(childNode)) {
|
|
||||||
groups.add(Pair.create(false, List.of(childNode)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (Pair<Boolean, List<Node>> group : groups) {
|
|
||||||
final Boolean shouldWrap = group.getFirst();
|
|
||||||
final List<Node> nodes = group.getSecond();
|
|
||||||
|
|
||||||
final Element elementForNodes = shouldWrap ? document.createElement("p") : parent;
|
|
||||||
|
|
||||||
nodes.forEach(n -> {
|
|
||||||
n.remove();
|
|
||||||
elementForNodes.appendChild(n);
|
|
||||||
});
|
|
||||||
|
|
||||||
if (shouldWrap) {
|
|
||||||
parent.appendChild(elementForNodes);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
REMOVE_MAP.forEach(map -> document.select(map).remove());
|
|
||||||
|
|
||||||
Elements paragraphsWithParagraphs;
|
|
||||||
//What if there are hypothetically many nested P, and we're going to miss them with only one iteration?
|
|
||||||
do {
|
|
||||||
paragraphsWithParagraphs = document.select("p:has(p)");
|
|
||||||
paragraphsWithParagraphs.unwrap();
|
|
||||||
} while (!paragraphsWithParagraphs.isEmpty());
|
|
||||||
//And then there were multi nested paragraphs which deep down contained nothing but whitespace? Now they're ready for removal as well :)
|
|
||||||
final Elements emptyParagraphs = document.select("p:matches(^\\s*$)");
|
|
||||||
emptyParagraphs.remove();
|
|
||||||
|
|
||||||
final Cleaner cleaner = new Cleaner(SAFELIST);
|
|
||||||
cleaner.clean(document);
|
|
||||||
|
|
||||||
document.outputSettings().syntax(Document.OutputSettings.Syntax.xml);
|
|
||||||
document.outputSettings().prettyPrint(false);
|
|
||||||
|
|
||||||
return document.body().html();
|
|
||||||
}
|
|
||||||
|
|
||||||
private static boolean isBlockElement(@NotNull Node node) {
|
|
||||||
if (!(node instanceof Element element))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
return element.tagName().equals("list") ||
|
|
||||||
(element.tagName().equals("code") && element.attr("style").equals("block")) ||
|
|
||||||
isBr(node);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static boolean isBr(@NotNull Node node) {
|
|
||||||
if (!(node instanceof Element))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
return "br".equals(((Element) node).tagName());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user