mirror of
https://gitflic.ru/project/openide/openide.git
synced 2026-03-22 15:10:43 +07:00
Optimize indexing
(cherry picked from commit fbb4040b240d2213441417118391563db60b01f0) (cherry picked from commit 960062b13e9b93b225df91e6f17c2ab68eae240f) IJ-MR-166587 GitOrigin-RevId: 39c1273d15bd4d55d7d7f3420574d61735107163
This commit is contained in:
committed by
intellij-monorepo-bot
parent
3d3a833018
commit
f378b736c1
@@ -18,6 +18,7 @@ import java.util.*
|
||||
import kotlin.io.path.extension
|
||||
import kotlin.io.path.isRegularFile
|
||||
import kotlin.io.path.name
|
||||
import kotlin.streams.asSequence
|
||||
|
||||
class HelpIndexer
|
||||
@Throws(IOException::class)
|
||||
@@ -32,58 +33,53 @@ internal constructor(indexDir: String) {
|
||||
}
|
||||
|
||||
@Throws(IOException::class)
|
||||
fun indexFileOrDirectory(fileName: String) {
|
||||
val file = Path.of(fileName)
|
||||
val queue = if (Files.isRegularFile(file)) {
|
||||
if (file.extension.lowercase(Locale.getDefault()) in setOf("htm", "html")) listOf(file)
|
||||
else return
|
||||
}
|
||||
else if (Files.isDirectory(file)) {
|
||||
Files.walk(file).use { stream ->
|
||||
stream
|
||||
.filter { it.isRegularFile() }
|
||||
.filter { it.extension.lowercase(Locale.getDefault()) in setOf("htm", "html") }
|
||||
.toList()
|
||||
}
|
||||
}
|
||||
else return
|
||||
|
||||
for (f in queue) {
|
||||
try {
|
||||
val doc = Document()
|
||||
val parsedDocument = Jsoup.parse(f, "UTF-8")
|
||||
|
||||
val content = StringBuilder()
|
||||
val lineSeparator = System.lineSeparator()
|
||||
val articles = parsedDocument.body().getElementsByClass("article")
|
||||
val title = parsedDocument.title()
|
||||
if (articles.isEmpty()) {
|
||||
if (title.contains("You will be redirected shortly")) {
|
||||
println("Skipping redirect page: $f ")
|
||||
}
|
||||
else if (parsedDocument.body().attr("data-template") == "section-page") {
|
||||
println("Skipping section page: $f")
|
||||
}
|
||||
else {
|
||||
System.err.println("Could not add: $f because no `<article>` found. Title is '$title'")
|
||||
}
|
||||
continue
|
||||
fun indexDirectory(dirName: String) {
|
||||
//It's always a directory, there is no chance that the method is called on a file
|
||||
Files.walk(Path.of(dirName)).use { stream ->
|
||||
stream
|
||||
.filter {
|
||||
it.isRegularFile() &&
|
||||
it.extension.lowercase(Locale.getDefault()) in setOf("htm", "html")
|
||||
}
|
||||
@Suppress("SpellCheckingInspection")
|
||||
articles[0].children()
|
||||
.filterNot { it.hasAttr("data-swiftype-index") }
|
||||
.forEach { content.append(it.text()).append(lineSeparator) }
|
||||
.asSequence()
|
||||
.forEach { file ->
|
||||
try {
|
||||
val docIndex = Document()
|
||||
val parsedDocument = Jsoup.parse(file, "UTF-8")
|
||||
|
||||
doc.add(TextField("contents", content.toString(), Field.Store.YES))
|
||||
doc.add(StringField("filename", f.name, Field.Store.YES))
|
||||
doc.add(StringField("title", title, Field.Store.YES))
|
||||
val content = StringBuilder()
|
||||
val lineSeparator = System.lineSeparator()
|
||||
val articles = parsedDocument.body().getElementsByClass("article")
|
||||
val title = parsedDocument.title()
|
||||
|
||||
writer.addDocument(doc)
|
||||
println("Added: $f")
|
||||
}
|
||||
catch (e: Throwable) {
|
||||
System.err.println("Could not add: $f because ${e.message}")
|
||||
}
|
||||
if (articles.isEmpty()) {
|
||||
if (parsedDocument.select("meta[http-equiv=refresh]").isNotEmpty() ||
|
||||
title.contains("You will be redirected shortly")) {
|
||||
println("Skipping redirect page: $file ")
|
||||
}
|
||||
else if (parsedDocument.body().attr("data-template") == "section-page") {
|
||||
println("Skipping section page: $file")
|
||||
}
|
||||
else {
|
||||
System.err.println("Could not add: $file because no `<article>` elements are found. Title is '$title'")
|
||||
}
|
||||
}
|
||||
else {
|
||||
articles.first()?.children()
|
||||
?.forEach { content.append(it.text()).append(lineSeparator) }
|
||||
|
||||
docIndex.add(TextField("contents", content.toString(), Field.Store.YES))
|
||||
docIndex.add(StringField("filename", file.name, Field.Store.YES))
|
||||
docIndex.add(StringField("title", title, Field.Store.YES))
|
||||
|
||||
writer.addDocument(docIndex)
|
||||
println("Added: $file")
|
||||
}
|
||||
}
|
||||
catch (e: Throwable) {
|
||||
System.err.println("Could not add: $file because ${e.message}")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -97,7 +93,7 @@ internal constructor(indexDir: String) {
|
||||
|
||||
private fun doIndex(dirToStore: String, dirToIndex: String) {
|
||||
val indexer = HelpIndexer(dirToStore)
|
||||
indexer.indexFileOrDirectory(dirToIndex)
|
||||
indexer.indexDirectory(dirToIndex)
|
||||
indexer.closeIndex()
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user