mirror of
https://gitflic.ru/project/openide/openide.git
synced 2026-03-22 15:10:43 +07:00
Live fast, fail early
(cherry picked from commit b8c0898472318064ee7fb073d9e50e1f3ca7643d) (cherry picked from commit a0affcaafb7bd3740275f78aa7a3f0c3fb7e791c) IJ-MR-166587 GitOrigin-RevId: 5f4aa52595da40711dfffa2a1a6b3e6ab7f71409
This commit is contained in:
committed by
intellij-monorepo-bot
parent
f378b736c1
commit
ee1e0ca922
@@ -35,11 +35,14 @@ internal constructor(indexDir: String) {
|
||||
@Throws(IOException::class)
|
||||
fun indexDirectory(dirName: String) {
|
||||
//It's always a directory, there is no chance that the method is called on a file
|
||||
val lineSeparator = System.lineSeparator()
|
||||
val acceptedFiles = setOf("htm", "html")
|
||||
|
||||
Files.walk(Path.of(dirName)).use { stream ->
|
||||
stream
|
||||
.filter {
|
||||
it.isRegularFile() &&
|
||||
it.extension.lowercase(Locale.getDefault()) in setOf("htm", "html")
|
||||
it.extension.lowercase(Locale.getDefault()) in acceptedFiles
|
||||
}
|
||||
.asSequence()
|
||||
.forEach { file ->
|
||||
@@ -47,34 +50,32 @@ internal constructor(indexDir: String) {
|
||||
val docIndex = Document()
|
||||
val parsedDocument = Jsoup.parse(file, "UTF-8")
|
||||
|
||||
if (parsedDocument.select("meta[http-equiv=refresh]").isNotEmpty()) {
|
||||
println("Skipping redirect page: $file ")
|
||||
return@forEach
|
||||
}
|
||||
|
||||
if (parsedDocument.body().attr("data-template") == "section-page") {
|
||||
println("Skipping section page: $file")
|
||||
return@forEach
|
||||
}
|
||||
|
||||
val content = StringBuilder()
|
||||
val lineSeparator = System.lineSeparator()
|
||||
val articles = parsedDocument.body().getElementsByClass("article")
|
||||
val title = parsedDocument.title()
|
||||
val article = parsedDocument.body().getElementsByClass("article").first()
|
||||
|
||||
if (articles.isEmpty()) {
|
||||
if (parsedDocument.select("meta[http-equiv=refresh]").isNotEmpty() ||
|
||||
title.contains("You will be redirected shortly")) {
|
||||
println("Skipping redirect page: $file ")
|
||||
}
|
||||
else if (parsedDocument.body().attr("data-template") == "section-page") {
|
||||
println("Skipping section page: $file")
|
||||
}
|
||||
else {
|
||||
System.err.println("Could not add: $file because no `<article>` elements are found. Title is '$title'")
|
||||
}
|
||||
if (article == null) {
|
||||
println("Skipping: $file because no `<article>` elements are found.")
|
||||
return@forEach
|
||||
}
|
||||
else {
|
||||
articles.first()?.children()
|
||||
?.forEach { content.append(it.text()).append(lineSeparator) }
|
||||
|
||||
docIndex.add(TextField("contents", content.toString(), Field.Store.YES))
|
||||
docIndex.add(StringField("filename", file.name, Field.Store.YES))
|
||||
docIndex.add(StringField("title", title, Field.Store.YES))
|
||||
article.children().forEach { content.append(it.text()).append(lineSeparator) }
|
||||
|
||||
writer.addDocument(docIndex)
|
||||
println("Added: $file")
|
||||
}
|
||||
docIndex.add(TextField("contents", content.toString(), Field.Store.YES))
|
||||
docIndex.add(StringField("filename", file.name, Field.Store.YES))
|
||||
docIndex.add(StringField("title", parsedDocument.title(), Field.Store.YES))
|
||||
|
||||
writer.addDocument(docIndex)
|
||||
println("Added: $file")
|
||||
}
|
||||
catch (e: Throwable) {
|
||||
System.err.println("Could not add: $file because ${e.message}")
|
||||
|
||||
Reference in New Issue
Block a user