IJPL-149042 refactor EmbeddingsConfiguration registration

now you register EmbeddingsConfigurations in plugin.xml per indexId

GitOrigin-RevId: 25a345503c681a1c0e1b8ede90c4a4b51759ec80
This commit is contained in:
Max Medvedev
2024-09-27 14:02:33 +02:00
committed by intellij-monorepo-bot
parent 24dc4fd27b
commit cc7687562d
8 changed files with 155 additions and 41 deletions

View File

@@ -73,8 +73,11 @@
<extensionPoint
qualifiedName="com.intellij.platform.ml.embeddings.textEmbeddingsConfiguration"
interface="com.intellij.platform.ml.embeddings.indexer.configuration.EmbeddingsConfiguration"
dynamic="true"/>
beanClass="com.intellij.platform.ml.embeddings.indexer.configuration.EmbeddingsConfigurationBean"
dynamic="true"
>
<with attribute="implementation" implements="com.intellij.platform.ml.embeddings.indexer.configuration.EmbeddingsConfiguration"/>
</extensionPoint>
<extensionPoint name="embeddings.indexer.classesProvider" beanClass="com.intellij.openapi.fileTypes.FileTypeExtensionPoint" dynamic="true">
<with attribute="implementationClass" implements="com.intellij.platform.ml.embeddings.indexer.ClassesProvider"/>
@@ -84,13 +87,63 @@
</extensionPoint>
</extensionPoints>
<extensions defaultExtensionNs="com.intellij.platform.ml.embeddings">
<textEmbeddingsConfiguration
implementation="com.intellij.platform.ml.embeddings.indexer.configuration.InProcessEmbeddingsConfiguration"
indexId="actions"
/>
<textEmbeddingsConfiguration
implementation="com.intellij.platform.ml.embeddings.indexer.configuration.InProcessEmbeddingsConfiguration"
indexId="classes"
/>
<textEmbeddingsConfiguration
implementation="com.intellij.platform.ml.embeddings.indexer.configuration.InProcessEmbeddingsConfiguration"
indexId="symbols"
/>
<textEmbeddingsConfiguration
implementation="com.intellij.platform.ml.embeddings.indexer.configuration.InProcessEmbeddingsConfiguration"
indexId="files"
/>
<textEmbeddingsConfiguration
implementation="com.intellij.platform.ml.embeddings.indexer.configuration.NativeServerFileBasedIndexEmbeddingsConfiguration"
indexId="classes"
order="before NativeServerEmbeddingsConfigurationClasses"
/>
<textEmbeddingsConfiguration
implementation="com.intellij.platform.ml.embeddings.indexer.configuration.NativeServerFileBasedIndexEmbeddingsConfiguration"
indexId="symbols"
order="before NativeServerEmbeddingsConfigurationSymbols"
/>
<textEmbeddingsConfiguration
implementation="com.intellij.platform.ml.embeddings.indexer.configuration.NativeServerFileBasedIndexEmbeddingsConfiguration"
indexId="files"
order="before NativeServerEmbeddingsConfigurationFiles"
/>
<textEmbeddingsConfiguration
implementation="com.intellij.platform.ml.embeddings.indexer.configuration.NativeServerEmbeddingsConfiguration"
indexId="actions"
id = "NativeServerEmbeddingsConfigurationActions"
/>
<textEmbeddingsConfiguration
implementation="com.intellij.platform.ml.embeddings.indexer.configuration.NativeServerEmbeddingsConfiguration"
indexId="classes"
id = "NativeServerEmbeddingsConfigurationClasses"
/>
<textEmbeddingsConfiguration
implementation="com.intellij.platform.ml.embeddings.indexer.configuration.NativeServerEmbeddingsConfiguration"
indexId="symbols"
id = "NativeServerEmbeddingsConfigurationSymbols"
/>
<textEmbeddingsConfiguration
implementation="com.intellij.platform.ml.embeddings.indexer.configuration.NativeServerEmbeddingsConfiguration"
indexId="files"
id = "NativeServerEmbeddingsConfigurationFiles"
/>
</extensions>
<extensions defaultExtensionNs="com.intellij">
<platform.ml.embeddings.textEmbeddingsConfiguration
implementation="com.intellij.platform.ml.embeddings.indexer.configuration.InProcessEmbeddingsConfiguration"/>
<platform.ml.embeddings.textEmbeddingsConfiguration
implementation="com.intellij.platform.ml.embeddings.indexer.configuration.NativeServerEmbeddingsConfiguration"/>
<fileBasedIndex implementation="com.intellij.platform.ml.embeddings.indexer.FileNameEmbeddingIndex"/>
<fileBasedIndex implementation="com.intellij.platform.ml.embeddings.indexer.ClassNameEmbeddingIndex"/>
<fileBasedIndex implementation="com.intellij.platform.ml.embeddings.indexer.SymbolNameEmbeddingIndex"/>

View File

@@ -24,6 +24,7 @@ import com.intellij.platform.ml.embeddings.EmbeddingsBundle
import com.intellij.platform.ml.embeddings.files.SemanticSearchFileChangeListener
import com.intellij.platform.ml.embeddings.indexer.configuration.EmbeddingsConfiguration
import com.intellij.platform.ml.embeddings.indexer.entities.*
import com.intellij.platform.ml.embeddings.indexer.storage.EmbeddingsStorageManagerWrapper
import com.intellij.platform.ml.embeddings.jvm.indices.EntityId
import com.intellij.platform.ml.embeddings.logging.EmbeddingSearchLogger
import com.intellij.platform.ml.embeddings.settings.EmbeddingIndexSettings
@@ -56,7 +57,11 @@ class FileBasedEmbeddingIndexer(private val cs: CoroutineScope) : Disposable {
private val indexingJobs = mutableMapOf<Project, Job>()
private val jobsMutex = Mutex()
private val storageManagerWrapper = EmbeddingsConfiguration.getStorageManagerWrapper()
private val storageManagerWrappers = buildMap {
for (indexId in FILE_BASED_INDICES) {
put(indexId, EmbeddingsConfiguration.getStorageManagerWrapper(indexId))
}
}
private val filesLimit: Int?
get() {
@@ -162,16 +167,17 @@ class FileBasedEmbeddingIndexer(private val cs: CoroutineScope) : Disposable {
suspend fun sendEntities(indexId: IndexId, channel: ReceiveChannel<IndexableEntity>) {
val entities = ArrayList<IndexableEntity>(BATCH_SIZE)
var index = 0
val wrapper = getStorageManagerWrapper(indexId)
for (entity in channel) {
if (entities.size < BATCH_SIZE) entities.add(entity) else entities[index] = entity
++index
if (index == BATCH_SIZE) {
storageManagerWrapper.addAbsent(project, indexId, entities)
wrapper.addAbsent(project, entities)
index = 0
}
}
if (entities.isNotEmpty()) {
storageManagerWrapper.addAbsent(project, indexId, entities)
wrapper.addAbsent(project, entities)
}
}
@@ -305,16 +311,20 @@ class FileBasedEmbeddingIndexer(private val cs: CoroutineScope) : Disposable {
private suspend fun startIndexingSession(project: Project) {
for (indexId in FILE_BASED_INDICES) {
storageManagerWrapper.startIndexingSession(project, indexId)
getStorageManagerWrapper(indexId).startIndexingSession(project)
}
}
private suspend fun finishIndexingSession(project: Project) {
for (indexId in FILE_BASED_INDICES) {
storageManagerWrapper.finishIndexingSession(project, indexId)
getStorageManagerWrapper(indexId).finishIndexingSession(project)
}
}
private fun getStorageManagerWrapper(indexId: IndexId): EmbeddingsStorageManagerWrapper<*> {
return storageManagerWrappers[indexId] ?: throw IllegalArgumentException("$indexId is not supported for file-based indexing")
}
companion object {
fun getInstance(): FileBasedEmbeddingIndexer = service()

View File

@@ -2,11 +2,21 @@
package com.intellij.platform.ml.embeddings.indexer.configuration
import com.intellij.openapi.extensions.ExtensionPointName
import com.intellij.openapi.extensions.RequiredElement
import com.intellij.platform.ml.embeddings.indexer.IndexId
import com.intellij.platform.ml.embeddings.indexer.keys.EmbeddingStorageKeyProvider
import com.intellij.platform.ml.embeddings.indexer.storage.EmbeddingsStorageManagerWrapper
import com.intellij.platform.ml.embeddings.indexer.storage.TextEmbeddingsStorageManager
import com.intellij.serviceContainer.BaseKeyedLazyInstance
import com.intellij.util.xmlb.annotations.Attribute
import org.jetbrains.annotations.ApiStatus
/**
* EmbeddingsConfiguration defines a configuration
* Use [getStorageManagerWrapper] to obtain the corresponding instance of [EmbeddingsStorageManagerWrapper] to perform a search for
* in a given index.
* Different indexes can use different storages and key providers.
*/
@ApiStatus.OverrideOnly
interface EmbeddingsConfiguration<KeyT> {
fun getStorageManager(): TextEmbeddingsStorageManager<KeyT>
@@ -16,21 +26,35 @@ interface EmbeddingsConfiguration<KeyT> {
fun isEnabled(): Boolean
companion object {
private val EP_NAME: ExtensionPointName<EmbeddingsConfiguration<*>> = ExtensionPointName.create(
private val EP_NAME: ExtensionPointName<EmbeddingsConfigurationBean> = ExtensionPointName.create(
"com.intellij.platform.ml.embeddings.textEmbeddingsConfiguration")
fun getStorageManagerWrapper(): EmbeddingsStorageManagerWrapper<*> {
val instance = EP_NAME.extensionList.first {
it.isEnabled()
fun getStorageManagerWrapper(indexId: IndexId): EmbeddingsStorageManagerWrapper<*> {
val bean = EP_NAME.extensionList.first {
it.indexId == indexId &&
it.instance.isEnabled()
}
return instance.toStorageManagerWrapper()
val instance = bean.instance
return instance.toStorageManagerWrapper(indexId)
}
private fun <KeyT> EmbeddingsConfiguration<KeyT>.toStorageManagerWrapper(): EmbeddingsStorageManagerWrapper<KeyT> {
private fun <KeyT> EmbeddingsConfiguration<KeyT>.toStorageManagerWrapper(indexId: IndexId): EmbeddingsStorageManagerWrapper<KeyT> {
val storageManager = getStorageManager()
val keyProvider = getKeyProvider()
return EmbeddingsStorageManagerWrapper(storageManager, keyProvider)
return EmbeddingsStorageManagerWrapper(indexId, storageManager, keyProvider)
}
}
}
internal class EmbeddingsConfigurationBean : BaseKeyedLazyInstance<EmbeddingsConfiguration<*>>() {
@RequiredElement
@Attribute("implementation")
lateinit var implementation: String
@RequiredElement
@Attribute(value = "indexId")
lateinit var indexId: IndexId
override fun getImplementationClassName(): String? = implementation
}

View File

@@ -3,7 +3,6 @@ package com.intellij.platform.ml.embeddings.indexer.configuration
import com.intellij.openapi.util.registry.Registry
import com.intellij.platform.ml.embeddings.indexer.keys.EmbeddingStorageKeyProvider
import com.intellij.platform.ml.embeddings.indexer.keys.IndexLongKeyProvider
import com.intellij.platform.ml.embeddings.indexer.keys.IntegerStorageKeyProvider
import com.intellij.platform.ml.embeddings.indexer.storage.NativeServerTextEmbeddingsStorageManager
import com.intellij.platform.ml.embeddings.indexer.storage.TextEmbeddingsStorageManager
@@ -14,12 +13,7 @@ class NativeServerEmbeddingsConfiguration: EmbeddingsConfiguration<Long> {
}
override fun getKeyProvider(): EmbeddingStorageKeyProvider<Long> {
return if (Registry.`is`("intellij.platform.ml.embeddings.use.file.based.index")) {
IndexLongKeyProvider.getInstance()
}
else {
IntegerStorageKeyProvider.getInstance()
}
return IntegerStorageKeyProvider.getInstance()
}
override fun isEnabled(): Boolean {

View File

@@ -0,0 +1,24 @@
// Copyright 2000-2024 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license.
package com.intellij.platform.ml.embeddings.indexer.configuration
import com.intellij.openapi.util.registry.Registry
import com.intellij.platform.ml.embeddings.indexer.IndexId
import com.intellij.platform.ml.embeddings.indexer.keys.EmbeddingStorageKeyProvider
import com.intellij.platform.ml.embeddings.indexer.keys.IndexLongKeyProvider
import com.intellij.platform.ml.embeddings.indexer.storage.NativeServerTextEmbeddingsStorageManager
import com.intellij.platform.ml.embeddings.indexer.storage.TextEmbeddingsStorageManager
class NativeServerFileBasedIndexEmbeddingsConfiguration: EmbeddingsConfiguration<Long> {
override fun getStorageManager(): TextEmbeddingsStorageManager<Long> {
return NativeServerTextEmbeddingsStorageManager.getInstance()
}
override fun getKeyProvider(): EmbeddingStorageKeyProvider<Long> {
return IndexLongKeyProvider.getInstance()
}
override fun isEnabled(): Boolean {
return Registry.Companion.`is`("intellij.platform.ml.embeddings.use.native.server") &&
Registry.Companion.`is`("intellij.platform.ml.embeddings.use.file.based.index")
}
}

View File

@@ -15,6 +15,7 @@ import com.intellij.platform.ml.embeddings.indexer.entities.IndexableEntity
import com.intellij.platform.ml.embeddings.indexer.entities.LongIndexableEntity
import com.intellij.psi.search.GlobalSearchScope
import com.intellij.util.indexing.FileBasedIndex
import com.intellij.util.indexing.ID
@Service(Service.Level.APP)
internal class IndexLongKeyProvider : EmbeddingStorageKeyProvider<Long> {
@@ -34,12 +35,7 @@ internal class IndexLongKeyProvider : EmbeddingStorageKeyProvider<Long> {
var result = ""
val index = when (indexId) {
IndexId.ACTIONS -> return "" // todo
IndexId.FILES -> FILE_NAME_EMBEDDING_INDEX_NAME
IndexId.CLASSES -> CLASS_NAME_EMBEDDING_INDEX_NAME
IndexId.SYMBOLS -> SYMBOL_NAME_EMBEDDING_INDEX_NAME
}
val index = getEmbeddingIndexId(indexId) ?: throw IllegalArgumentException("$indexId request is not supported")
smartReadAction(project) {
FileBasedIndex.getInstance().processValues(
@@ -52,4 +48,14 @@ internal class IndexLongKeyProvider : EmbeddingStorageKeyProvider<Long> {
return result
}
}
private fun getEmbeddingIndexId(indexId: IndexId): ID<EmbeddingKey, String>? {
val index = when (indexId) {
IndexId.FILES -> FILE_NAME_EMBEDDING_INDEX_NAME
IndexId.CLASSES -> CLASS_NAME_EMBEDDING_INDEX_NAME
IndexId.SYMBOLS -> SYMBOL_NAME_EMBEDDING_INDEX_NAME
else -> null
}
return index
}
}

View File

@@ -8,10 +8,11 @@ import com.intellij.platform.ml.embeddings.indexer.keys.EmbeddingStorageKeyProvi
import com.intellij.platform.ml.embeddings.utils.ScoredText
class EmbeddingsStorageManagerWrapper<KeyT>(
private val indexId: IndexId,
private val storageManager: TextEmbeddingsStorageManager<KeyT>,
private val keyProvider: EmbeddingStorageKeyProvider<KeyT>,
) {
suspend fun addAbsent(project: Project, indexId: IndexId, entities: List<IndexableEntity>) {
suspend fun addAbsent(project: Project, entities: List<IndexableEntity>) {
return storageManager.addAbsent(project, indexId, entities.map {
IndexEntry(
keyProvider.findKey(project, indexId, it),
@@ -21,8 +22,10 @@ class EmbeddingsStorageManagerWrapper<KeyT>(
}
suspend fun search(
project: Project, indexId: IndexId,
query: String, limit: Int, similarityThreshold: Float? = null,
project: Project,
query: String,
limit: Int,
similarityThreshold: Float? = null,
): List<ScoredText> {
val result = storageManager.search(project, indexId, query, limit, similarityThreshold)
.map { (id, similarity) ->
@@ -32,11 +35,11 @@ class EmbeddingsStorageManagerWrapper<KeyT>(
return result
}
suspend fun startIndexingSession(project: Project, indexId: IndexId) {
suspend fun startIndexingSession(project: Project) {
storageManager.startIndexingSession(project, indexId)
}
suspend fun finishIndexingSession(project: Project, indexId: IndexId) {
suspend fun finishIndexingSession(project: Project) {
storageManager.finishIndexingSession(project, indexId)
}

View File

@@ -26,8 +26,8 @@ abstract class SemanticPsiItemsProvider(val project: Project) : StreamSemanticIt
override suspend fun search(pattern: String, similarityThreshold: Double?): List<ScoredText> {
if (pattern.isBlank()) return emptyList()
return EmbeddingsConfiguration.getStorageManagerWrapper()
.search(project, indexId, convertNameToNaturalLanguage(pattern), itemLimit, similarityThreshold?.toFloat())
return EmbeddingsConfiguration
.getStorageManagerWrapper(indexId).search(project, convertNameToNaturalLanguage(pattern), itemLimit, similarityThreshold?.toFloat())
}
override suspend fun streamSearch(