IJPL-149042 Don't save file id in index

GitOrigin-RevId: 2d4cd5409d1cbe510c3449acd1e78f85fb6abcc0
This commit is contained in:
Liudmila Kornilova
2024-09-25 18:58:01 +02:00
committed by intellij-monorepo-bot
parent eacaee3f73
commit 630ddec989
3 changed files with 25 additions and 27 deletions

View File

@@ -17,6 +17,7 @@ import com.intellij.openapi.util.Disposer
import com.intellij.openapi.util.registry.Registry
import com.intellij.openapi.vfs.VirtualFile
import com.intellij.openapi.vfs.VirtualFileManager
import com.intellij.openapi.vfs.VirtualFileWithId
import com.intellij.openapi.vfs.isFile
import com.intellij.platform.diagnostic.telemetry.helpers.useWithScope
import com.intellij.platform.ide.progress.withBackgroundProgress
@@ -240,7 +241,7 @@ class FileBasedEmbeddingIndexer(private val cs: CoroutineScope) : Disposable {
private suspend fun fetchEntities(indexId: ID<EmbeddingKey, String>,
channel: Channel<IndexableEntity>,
project: Project,
nameToEntity: (EmbeddingKey, String) -> LongIndexableEntity) {
nameToEntity: (Long, String) -> LongIndexableEntity) {
val fileBasedIndex = FileBasedIndex.getInstance()
val scope = GlobalSearchScope.projectScope(project)
val keys = smartReadAction(project) { fileBasedIndex.getAllKeys(indexId, project) }
@@ -248,9 +249,18 @@ class FileBasedEmbeddingIndexer(private val cs: CoroutineScope) : Disposable {
val chunkSize = Registry.intValue("intellij.platform.ml.embeddings.file.based.index.processing.chunk.size")
keys.asSequence().chunked(chunkSize).forEach { chunk ->
chunk.forEach { key ->
val names = smartReadAction(project) { fileBasedIndex.getValues(indexId, key, scope) }
for (name in names) {
channel.send(nameToEntity(key, name))
val fileIdsAndNames = smartReadAction(project) {
val result = mutableListOf<Pair<Int, String>>()
fileBasedIndex.processValues(indexId, key, null, { virtualFile, name ->
if (virtualFile is VirtualFileWithId) {
result.add(Pair(virtualFile.id, name))
}
true
}, scope)
result
}
for ((fileId, name) in fileIdsAndNames) {
channel.send(nameToEntity(key.toLong(fileId), name))
}
}
}

View File

@@ -3,7 +3,6 @@ package com.intellij.platform.ml.embeddings.indexer
import com.intellij.openapi.fileTypes.FileType
import com.intellij.openapi.vfs.VirtualFile
import com.intellij.openapi.vfs.VirtualFileWithId
import com.intellij.psi.PsiFile
import com.intellij.util.indexing.*
import com.intellij.util.io.DataExternalizer
@@ -11,24 +10,16 @@ import com.intellij.util.io.DataInputOutputUtil
import com.intellij.util.io.KeyDescriptor
import java.io.DataInput
import java.io.DataOutput
import java.util.*
internal val CLASS_NAME_EMBEDDING_INDEX_NAME = ID.create<EmbeddingKey, String>("ClassNameEmbeddingIndex")
internal val SYMBOL_NAME_EMBEDDING_INDEX_NAME = ID.create<EmbeddingKey, String>("SymbolNameEmbeddingIndex")
internal class EmbeddingKey(val fileId: Int, val indexableRepresentationHashCode: Int) {
override fun hashCode(): Int = Objects.hash(fileId, indexableRepresentationHashCode)
override fun equals(other: Any?): Boolean =
other is EmbeddingKey && other.fileId == fileId && other.indexableRepresentationHashCode == indexableRepresentationHashCode
internal class EmbeddingKey(val textHashCode: Int) {
override fun hashCode(): Int = textHashCode.hashCode()
override fun equals(other: Any?): Boolean = other is EmbeddingKey && other.textHashCode == textHashCode
fun toLong(): Long {
return (fileId.toLong() shl 32) + indexableRepresentationHashCode.toLong()
}
companion object {
fun fromLong(v: Long): EmbeddingKey {
return EmbeddingKey((v shr 32).toInt(), v.toInt())
}
fun toLong(fileId: Int): Long {
return (fileId.toLong() shl 32) + textHashCode.toLong()
}
}
@@ -40,7 +31,7 @@ internal class ClassNameEmbeddingIndex : BaseEmbeddingIndex() {
get() = ClassesProvider.supportedFileTypes
override fun getName(): ID<EmbeddingKey, String> = CLASS_NAME_EMBEDDING_INDEX_NAME
override fun getVersion(): Int = 0
override fun getVersion(): Int = 1
override fun index(psiFile: PsiFile): List<IndexingItem> {
return ClassesProvider.extractClasses(psiFile).map { IndexingItem(it.id.id) }
}
@@ -51,7 +42,7 @@ internal class SymbolNameEmbeddingIndex : BaseEmbeddingIndex() {
get() = SymbolsProvider.supportedFileTypes
override fun getName(): ID<EmbeddingKey, String> = SYMBOL_NAME_EMBEDDING_INDEX_NAME
override fun getVersion(): Int = 0
override fun getVersion(): Int = 1
override fun index(psiFile: PsiFile): List<IndexingItem> {
return SymbolsProvider.extractSymbols(psiFile).map { IndexingItem(it.id.id) }
}
@@ -75,10 +66,9 @@ internal abstract class BaseEmbeddingIndex() : FileBasedIndexExtension<Embedding
override fun getIndexer(): DataIndexer<EmbeddingKey, String, FileContent> {
return DataIndexer { inputData ->
val id = (inputData.file as? VirtualFileWithId)?.id ?: return@DataIndexer emptyMap()
index(inputData.psiFile).associate { item ->
val textHashcode = item.text.hashCode()
EmbeddingKey(id, textHashcode) to item.text
EmbeddingKey(textHashcode) to item.text
}
}
}
@@ -91,8 +81,8 @@ internal abstract class BaseEmbeddingIndex() : FileBasedIndexExtension<Embedding
return object : KeyDescriptor<EmbeddingKey> {
override fun getHashCode(value: EmbeddingKey): Int = value.hashCode()
override fun isEqual(val1: EmbeddingKey, val2: EmbeddingKey): Boolean = val1 == val2
override fun save(out: DataOutput, value: EmbeddingKey) = DataInputOutputUtil.writeLONG(out, value.toLong())
override fun read(`in`: DataInput): EmbeddingKey = EmbeddingKey.fromLong(DataInputOutputUtil.readLONG(`in`))
override fun save(out: DataOutput, value: EmbeddingKey) = DataInputOutputUtil.writeINT(out, value.textHashCode)
override fun read(`in`: DataInput): EmbeddingKey = EmbeddingKey(DataInputOutputUtil.readINT(`in`))
}
}

View File

@@ -1,8 +1,6 @@
// Copyright 2000-2024 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license.
package com.intellij.platform.ml.embeddings.indexer.keys
import com.intellij.openapi.application.readAction
import com.intellij.openapi.application.runReadAction
import com.intellij.openapi.application.smartReadAction
import com.intellij.openapi.components.Service
import com.intellij.openapi.components.service
@@ -45,7 +43,7 @@ internal class IndexLongKeyProvider : EmbeddingStorageKeyProvider<Long> {
smartReadAction(project) {
FileBasedIndex.getInstance().processValues(
/* indexId = */ index,
/* dataKey = */ EmbeddingKey.fromLong(key),
/* dataKey = */ EmbeddingKey(hash),
/* inFile = */ file,
/* processor = */ FileBasedIndex.ValueProcessor { _, value -> result = value; false },
/* filter = */ GlobalSearchScope.fileScope(project, file))