No need to remove analytics anymore, new artifacts already don't contain it

(cherry picked from commit 5951100748601f0ddd9459736abe28d993f09ab7)


(cherry picked from commit 08e48fb8be34da5fc20a8f5b1bc9f1d8d8d5d28c)

IJ-MR-166587

GitOrigin-RevId: 52c7ba7392009f161d14b31e8c0ece494404a67f
This commit is contained in:
Egor Malyshev
2025-05-13 17:34:17 +02:00
committed by intellij-monorepo-bot
parent 772a479df8
commit 3d3a833018

View File

@@ -1,4 +1,4 @@
// Copyright 2000-2019 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license that can be found in the LICENSE file.
// Copyright 2000-2025 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license that can be found in the LICENSE file.
package com.jetbrains.builtInHelp.indexer
import org.apache.lucene.analysis.standard.StandardAnalyzer
@@ -10,19 +10,20 @@ import org.apache.lucene.index.IndexWriter
import org.apache.lucene.index.IndexWriterConfig
import org.apache.lucene.store.FSDirectory
import org.jsoup.Jsoup
import java.io.File
import java.io.IOException
import java.nio.file.Files
import java.nio.file.Path
import java.nio.file.Paths
import java.util.Locale
import kotlin.collections.ArrayList
import java.util.*
import kotlin.io.path.extension
import kotlin.io.path.isRegularFile
import kotlin.io.path.name
class HelpIndexer
@Throws(IOException::class)
internal constructor(indexDir: String) {
private val writer: IndexWriter
private val queue = ArrayList<File>()
init {
val dir = FSDirectory.open(Paths.get(indexDir))
@@ -32,8 +33,20 @@ internal constructor(indexDir: String) {
@Throws(IOException::class)
fun indexFileOrDirectory(fileName: String) {
addFiles(File(fileName))
val file = Path.of(fileName)
val queue = if (Files.isRegularFile(file)) {
if (file.extension.lowercase(Locale.getDefault()) in setOf("htm", "html")) listOf(file)
else return
}
else if (Files.isDirectory(file)) {
Files.walk(file).use { stream ->
stream
.filter { it.isRegularFile() }
.filter { it.extension.lowercase(Locale.getDefault()) in setOf("htm", "html") }
.toList()
}
}
else return
for (f in queue) {
try {
@@ -42,35 +55,34 @@ internal constructor(indexDir: String) {
val content = StringBuilder()
val lineSeparator = System.lineSeparator()
parsedDocument.body().getElementsByClass("article")[0].children()
val articles = parsedDocument.body().getElementsByClass("article")
val title = parsedDocument.title()
if (articles.isEmpty()) {
if (title.contains("You will be redirected shortly")) {
println("Skipping redirect page: $f ")
}
else if (parsedDocument.body().attr("data-template") == "section-page") {
println("Skipping section page: $f")
}
else {
System.err.println("Could not add: $f because no `<article>` found. Title is '$title'")
}
continue
}
@Suppress("SpellCheckingInspection")
articles[0].children()
.filterNot { it.hasAttr("data-swiftype-index") }
.forEach { content.append(it.text()).append(lineSeparator) }
doc.add(TextField("contents", content.toString(), Field.Store.YES))
doc.add(StringField("filename", f.name, Field.Store.YES))
doc.add(StringField("title", parsedDocument.title(), Field.Store.YES))
doc.add(StringField("title", title, Field.Store.YES))
writer.addDocument(doc)
println("Added: $f")
}
catch (e: Throwable) {
println("Could not add: $f because ${e.message}")
}
}
queue.clear()
}
private fun addFiles(file: File) {
if (file.isDirectory) {
val files = file.listFiles() ?: emptyArray()
for (f in files) {
addFiles(f)
}
}
else {
val filename = file.name.lowercase(Locale.getDefault())
if (filename.endsWith(".htm") || filename.endsWith(".html")) {
queue.add(file)
System.err.println("Could not add: $f because ${e.message}")
}
}
}
@@ -92,26 +104,6 @@ internal constructor(indexDir: String) {
@Throws(IOException::class)
@JvmStatic
fun main(args: Array<String>) {
val tokens = listOf(
"<noscript><iframe src=\"//www.googletagmanager.com/ns.html?id=GTM-5P98\" height=\"0\" width=\"0\" style=\"display:none;visibility:hidden\"></iframe></noscript>",
"</script><script src=\"/help/app/v2/analytics.js\"></script>",
"<script>(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':",
"new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0],",
"j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src=",
"'//www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f);",
"})(window,document,'script','dataLayer','GTM-5P98');")
val files = Paths.get(args[1]).toFile().listFiles() ?: emptyArray()
files.filter { it.extension == "html" }.forEach {
var contents = String(Files.readAllBytes(it.toPath()), Charsets.UTF_8)
println("Removing analytics code from ${it.name}")
for (token in tokens) {
contents = contents.replace(token, "")
}
contents = contents.replace("//resources.jetbrains.com/storage/help-app/", "/help/")
Files.write(it.toPath(), contents.toByteArray(Charsets.UTF_8))
}
doIndex(args[0], args[1])
}
}