From ee1e0ca922a298e222fef1b0c49217010ffcd67e Mon Sep 17 00:00:00 2001 From: Egor Malyshev Date: Tue, 13 May 2025 18:03:12 +0200 Subject: [PATCH] Live fast, fail early (cherry picked from commit b8c0898472318064ee7fb073d9e50e1f3ca7643d) (cherry picked from commit a0affcaafb7bd3740275f78aa7a3f0c3fb7e791c) IJ-MR-166587 GitOrigin-RevId: 5f4aa52595da40711dfffa2a1a6b3e6ab7f71409 --- .../builtInHelp/indexer/HelpIndexer.kt | 49 ++++++++++--------- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/plugins/built-in-help/src/com/jetbrains/builtInHelp/indexer/HelpIndexer.kt b/plugins/built-in-help/src/com/jetbrains/builtInHelp/indexer/HelpIndexer.kt index e4d1850a150f..5704e25c8e5c 100644 --- a/plugins/built-in-help/src/com/jetbrains/builtInHelp/indexer/HelpIndexer.kt +++ b/plugins/built-in-help/src/com/jetbrains/builtInHelp/indexer/HelpIndexer.kt @@ -35,11 +35,14 @@ internal constructor(indexDir: String) { @Throws(IOException::class) fun indexDirectory(dirName: String) { //It's always a directory, there is no chance that the method is called on a file + val lineSeparator = System.lineSeparator() + val acceptedFiles = setOf("htm", "html") + Files.walk(Path.of(dirName)).use { stream -> stream .filter { it.isRegularFile() && - it.extension.lowercase(Locale.getDefault()) in setOf("htm", "html") + it.extension.lowercase(Locale.getDefault()) in acceptedFiles } .asSequence() .forEach { file -> @@ -47,34 +50,32 @@ internal constructor(indexDir: String) { val docIndex = Document() val parsedDocument = Jsoup.parse(file, "UTF-8") + if (parsedDocument.select("meta[http-equiv=refresh]").isNotEmpty()) { + println("Skipping redirect page: $file ") + return@forEach + } + + if (parsedDocument.body().attr("data-template") == "section-page") { + println("Skipping section page: $file") + return@forEach + } + val content = StringBuilder() - val lineSeparator = System.lineSeparator() - val articles = parsedDocument.body().getElementsByClass("article") - val title = parsedDocument.title() + val article = parsedDocument.body().getElementsByClass("article").first() - if (articles.isEmpty()) { - if (parsedDocument.select("meta[http-equiv=refresh]").isNotEmpty() || - title.contains("You will be redirected shortly")) { - println("Skipping redirect page: $file ") - } - else if (parsedDocument.body().attr("data-template") == "section-page") { - println("Skipping section page: $file") - } - else { - System.err.println("Could not add: $file because no `
` elements are found. Title is '$title'") - } + if (article == null) { + println("Skipping: $file because no `
` elements are found.") + return@forEach } - else { - articles.first()?.children() - ?.forEach { content.append(it.text()).append(lineSeparator) } - docIndex.add(TextField("contents", content.toString(), Field.Store.YES)) - docIndex.add(StringField("filename", file.name, Field.Store.YES)) - docIndex.add(StringField("title", title, Field.Store.YES)) + article.children().forEach { content.append(it.text()).append(lineSeparator) } - writer.addDocument(docIndex) - println("Added: $file") - } + docIndex.add(TextField("contents", content.toString(), Field.Store.YES)) + docIndex.add(StringField("filename", file.name, Field.Store.YES)) + docIndex.add(StringField("title", parsedDocument.title(), Field.Store.YES)) + + writer.addDocument(docIndex) + println("Added: $file") } catch (e: Throwable) { System.err.println("Could not add: $file because ${e.message}")