[indexes] IJPL-181002, IDEA-327594: re-designed input filtering for IdIndex

+ if `IdIndexer` (`IdIndex` sub-indexer) implements `FileTypeIndexingHint` -- it is used by `IdInputFilter` to customize input files filtering
+ this feature is used to re-implement `index.ids.from.java.sources.in.jar` (instead of extension points)
+ test for `JavaIdIndexer` filtering
+ adjust `IdIndex`: processes .class-files only conditionally

+ rename registry keys (unify naming):
+ `ide.index.id.skip.java.sources.in.libs`: enables/disables indexing of java sources in libraries (index .class-files instead)
+ `ide.index.trigram.enable.exclude.extensions`: enable/disable excluding of .java-sources in libraries from trigram index

GitOrigin-RevId: 8b8e40d621e01466854364c282d00a4731d5f4de
This commit is contained in:
Ruslan Cheremin
2025-04-14 18:53:10 +02:00
committed by intellij-monorepo-bot
parent fef82ea8ae
commit ad74c6be73
11 changed files with 183 additions and 77 deletions

View File

@@ -327,8 +327,8 @@
<registryKey key="java.highest.language.level" defaultValue="24"
description="Highest released language level" restartRequired="false"/>
<registryKey key="index.ids.from.java.sources.in.jar" defaultValue="true"
description="Index ids from .class-files instead of .java-files in JARs" restartRequired="true"/>
<registryKey key="ide.index.id.skip.java.sources.in.libs" defaultValue="false"
description="IdIndex skips .java-files in libraries (index .class-files instead)" restartRequired="true"/>
<registryKey key="ide.jps.use.build.tool.window" defaultValue="true"
description="Enables 'Build' toolwindow for JPS compilation messages"/>
@@ -2750,7 +2750,6 @@
implementationClass="com.intellij.codeInsight.generation.analysis.GenerateLoggerStatisticsCollector"/>
<trigramIndexFilterExcludeExtension implementation="com.intellij.java.JavaLibrarySourcesExcludingIndexFilterExtension"/>
<idIndexFilterExcludeExtension implementation="com.intellij.java.JavaLibrarySourcesExcludingIndexFilterExtension"/>
<registryKey key="java.completion.argument.live.template" defaultValue="false"
description="When completing a method call, start a live template with all arguments"/>

View File

@@ -10,7 +10,7 @@ import com.intellij.util.indexing.IndexedFile
/**
* Extension for [com.intellij.util.indexing.FileBasedIndex.InputFilter]: excludes .java-sources located in libraries from indexing.
* Used e.g. in [TrigramIndex] and [IdIndex]
* Used e.g. in [TrigramIndex]
*/
internal class JavaLibrarySourcesExcludingIndexFilterExtension: IndexFilterExcludingExtension {
override fun getFileType(): FileType = JavaFileType.INSTANCE

View File

@@ -6,10 +6,18 @@ import com.intellij.openapi.util.registry.RegistryValueListener
import com.intellij.psi.impl.cache.impl.id.IdIndex
import com.intellij.util.indexing.FileBasedIndex
class JavaIdIndexRegistryValueListener : RegistryValueListener {
/**
* Forces [IdIndex] re-indexing after [JavaIdIndexer.SKIP_SOURCE_FILES_IN_LIBRARIES_REGISTRY_KEY] registry value is changed.
*
* The actual index(ing) behavior will change **only after IDE restart**, because changing the indexing filter on top of
* already existing index is hard to make sound -- basically, the simplest method to make index consistent in this case
* is to just rebuild it entirely, which is, in turn, safer to do on IDE restart.
* This is why [JavaIdIndexer.skipSourceFilesInLibraries] is intentionally made immutable, and the registry key is tagged
* with 'required restart'
*/
internal class JavaIdIndexRegistryValueListener : RegistryValueListener {
override fun afterValueChanged(value: RegistryValue) {
if (JavaIdIndexer.INDEX_SOURCE_FILES_IN_LIBRARIES_REGISTRY_KEY == value.key) {
JavaIdIndexer.indexSourceFilesInLibraries = value.asBoolean()
if (JavaIdIndexer.SKIP_SOURCE_FILES_IN_LIBRARIES_REGISTRY_KEY == value.key) {
FileBasedIndex.getInstance().requestRebuild(IdIndex.NAME)
}
}

View File

@@ -2,6 +2,7 @@
package com.intellij.psi.impl.cache.impl.idCache;
import com.intellij.ide.highlighter.JavaClassFileType;
import com.intellij.ide.highlighter.JavaFileType;
import com.intellij.lang.java.JavaParserDefinition;
import com.intellij.lexer.Lexer;
import com.intellij.openapi.diagnostic.Logger;
@@ -16,7 +17,10 @@ import com.intellij.psi.impl.cache.impl.id.IdIndexEntry;
import com.intellij.psi.impl.cache.impl.id.IdIndexer;
import com.intellij.psi.impl.source.JavaFileElementType;
import com.intellij.psi.search.UsageSearchContext;
import com.intellij.util.ThreeState;
import com.intellij.util.indexing.FileContent;
import com.intellij.util.indexing.IndexedFile;
import com.intellij.util.indexing.hints.FileTypeIndexingHint;
import it.unimi.dsi.fastutil.ints.IntArrayList;
import it.unimi.dsi.fastutil.ints.IntList;
import org.jetbrains.annotations.NotNull;
@@ -30,17 +34,70 @@ import java.util.Map;
import static com.intellij.psi.impl.cache.impl.BaseFilterLexerUtil.scanContentWithCheckCanceled;
/** @see com.intellij.psi.impl.cache.impl.id.IdIndexFilter */
public final class JavaIdIndexer implements IdIndexer {
public final class JavaIdIndexer implements IdIndexer, FileTypeIndexingHint {
private static final Logger LOG = Logger.getInstance(JavaIdIndexer.class);
static final String INDEX_SOURCE_FILES_IN_LIBRARIES_REGISTRY_KEY = "index.ids.from.java.sources.in.jar";
//BEWARE: there are 2 instances of this class in a container, because it is registered twice: for .java and .class file types
static final String SKIP_SOURCE_FILES_IN_LIBRARIES_REGISTRY_KEY = "ide.index.id.skip.java.sources.in.libs";
/**
* if true -> .java-files in libraries are indexes
* if false -> .class-files in libraries should be indexed instead, while .java-sources in libraries should be skipped
* Option for optimization:
* if true -> .class-files in libraries are indexed, while .java-sources in libraries should be skipped
* if false -> .java-files in libraries are indexed
*/
//MAYBE RC: it is better to have it through system-properties, since we need to trigger re-indexing, if the property is
// changed -- so Registry is not a good choice?
static volatile boolean indexSourceFilesInLibraries = Registry.is(INDEX_SOURCE_FILES_IN_LIBRARIES_REGISTRY_KEY, false);
private final boolean skipSourceFilesInLibraries;
public JavaIdIndexer() {
this(Registry.is(SKIP_SOURCE_FILES_IN_LIBRARIES_REGISTRY_KEY, false));
}
public JavaIdIndexer(boolean skipSourceFilesInLibraries) {
this.skipSourceFilesInLibraries = skipSourceFilesInLibraries;
LOG.info("skipSourceFilesInLibraries: " + skipSourceFilesInLibraries
+ " (registry: '" + SKIP_SOURCE_FILES_IN_LIBRARIES_REGISTRY_KEY + "')");
}
@Override
public @NotNull ThreeState acceptsFileTypeFastPath(@NotNull FileType fileType) {
if (fileType.equals(JavaClassFileType.INSTANCE)) {
//if we skip .java-files in libraries => we must at least index .class-files,
// but if we don't skip .java-files => we don't need to index .class-files
//(In theory, we could go father, and mix .class and .java files indexing -- use one or another depending on availability.
// But it is harder to implement correctly, so currently we don't bother)
if (skipSourceFilesInLibraries) {
return ThreeState.UNSURE; //need details to check if the file is in 'libraries'
}
return ThreeState.NO;
}
else if (fileType.equals(JavaFileType.INSTANCE)) {
if (skipSourceFilesInLibraries) {
return ThreeState.UNSURE;//need details to check if the file is in 'libraries'
}
return ThreeState.YES;
}
else {
return ThreeState.NO;
}
}
@Override
public boolean slowPathIfFileTypeHintUnsure(@NotNull IndexedFile inputData) {
VirtualFile file = inputData.getFile();
FileType fileType = file.getFileType();
if (fileType.equals(JavaClassFileType.INSTANCE)) {
//TODO RC: Currently there is no regular way to find out is .class in libs or not.
// For .java-files we use a hack (see JavaFileElementType.isInSourceContent()), but there is no such hack for
// .class-files => we behave as-is '.class is always in libraries'
return skipSourceFilesInLibraries; // && isClassInLibraries(file);
}
if (fileType.equals(JavaFileType.INSTANCE)) {
return !skipSourceFilesInLibraries || isJavaInSourceTree(file);
}
return false;//really, we shouldn't come here
}
@Override
public @NotNull Map<IdIndexEntry, Integer> map(@NotNull FileContent inputData) {
@@ -48,22 +105,28 @@ public final class JavaIdIndexer implements IdIndexer {
FileType fileType = file.getFileType();
if (fileType.equals(JavaClassFileType.INSTANCE)) {
if (!indexSourceFilesInLibraries) {
if (skipSourceFilesInLibraries) { //don't check isInLibraries(): filter must filter out .class-files that are not in libraries
Map<IdIndexEntry, Integer> idEntries = calculateIdEntriesParsingConstantPool(inputData);
if (idEntries != null) {
return idEntries;
}
}
//MAYBE RC: why skip indexing .class-files if source-files indexing is enabled?
// Even if .java-files indexing in libraries is enabled, it could be no .java-files in particular libraries
// => .class-file is the only option then.
return Map.of();
}
// we are skipping indexing of sources in libraries (we are going to index only the compiled library classes)
if (indexSourceFilesInLibraries || JavaFileElementType.isInSourceContent(file)) {
IdDataConsumer consumer = new IdDataConsumer();
scanContentWithCheckCanceled(inputData, createIndexingLexer(new OccurrenceConsumer(consumer, /*needToDo:*/ false)));
return consumer.getResult();
if (fileType.equals(JavaFileType.INSTANCE)) {
if (!skipSourceFilesInLibraries || isJavaInSourceTree(file)) {
IdDataConsumer idCollector = new IdDataConsumer();
scanContentWithCheckCanceled(inputData, createIndexingLexer(new OccurrenceConsumer(idCollector, /*needToDo:*/ false)));
return idCollector.getResult();
}
return Map.of();
}
return Map.of();
return Map.of();//really, we shouldn't come here
}
private static @Nullable Map<IdIndexEntry, Integer> calculateIdEntriesParsingConstantPool(@NotNull FileContent inputData) {
@@ -102,4 +165,9 @@ public final class JavaIdIndexer implements IdIndexer {
Lexer javaLexer = JavaParserDefinition.createLexer(LanguageLevel.JDK_1_3);
return new JavaFilterLexer(javaLexer, consumer);
}
/** @return true if the .java-file is located under one of project's source trees, false otherwise. */
private static boolean isJavaInSourceTree(@NotNull VirtualFile file) {
return JavaFileElementType.isInSourceContent(file);
}
}

View File

@@ -0,0 +1,37 @@
// Copyright 2000-2025 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license.
package com.intellij.psi.impl.cache.impl.idCache;
import com.intellij.ide.highlighter.JavaClassFileType;
import com.intellij.ide.highlighter.JavaFileType;
import com.intellij.util.ThreeState;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;
import static org.junit.jupiter.api.Assertions.assertEquals;
class JavaIdIndexerTest {
@ParameterizedTest(name = "skipSourceIndexing={0}")
@ValueSource(booleans = {true, false})
void acceptsJavaSourceFastPath(boolean skipSourceIndexing) {
JavaIdIndexer indexer = new JavaIdIndexer(skipSourceIndexing);
assertEquals(
skipSourceIndexing ? ThreeState.UNSURE : ThreeState.YES,
indexer.acceptsFileTypeFastPath(JavaFileType.INSTANCE),
"JavaIdIndexer must index all .java-files if skipSourceFilesInLibraries=false, and must be unsure (depends on file location) otherwise"
);
}
@ParameterizedTest(name = "skipSourceIndexing={0}")
@ValueSource(booleans = {true, false})
void acceptsJavaClassFastPath(boolean skipSourceIndexing) {
JavaIdIndexer indexer = new JavaIdIndexer(skipSourceIndexing);
assertEquals(
skipSourceIndexing ? ThreeState.UNSURE : ThreeState.NO,
indexer.acceptsFileTypeFastPath(JavaClassFileType.INSTANCE),
"JavaIdIndexer must NOT index .class-files if skipSourceFilesInLibraries=false, and must be unsure (depends on file location) otherwise"
);
}
}

View File

@@ -25,7 +25,6 @@
<extensionPoint name="binaryFileSourceProvider" interface="com.intellij.platform.indexing.BinaryFileSourceProvider" dynamic="true"/>
<extensionPoint name="trigramIndexFilterExcludeExtension" interface="com.intellij.util.indexing.IndexFilterExcludingExtension" dynamic="true"/>
<extensionPoint name="idIndexFilterExcludeExtension" interface="com.intellij.util.indexing.IndexFilterExcludingExtension" dynamic="true"/>
<extensionPoint name="stubElementRegistryExtension"
interface="com.intellij.psi.stubs.StubRegistryExtension"

View File

@@ -1,4 +1,4 @@
// Copyright 2000-2020 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license that can be found in the LICENSE file.
// Copyright 2000-2025 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license.
package com.intellij.psi.impl.cache.impl;
import com.intellij.lexer.Lexer;
@@ -57,7 +57,7 @@ public final class BaseFilterLexerUtil {
filterLexer.start(content.getContentAsText());
int tokenIdx = 0;
while (filterLexer.getTokenType() != null) {
if (tokenIdx++ % 100 == 0) {
if (tokenIdx++ % 128 == 0) {
ProgressManager.checkCanceled();
}
filterLexer.advance();

View File

@@ -1,70 +1,44 @@
// Copyright 2000-2025 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license.
package com.intellij.psi.impl.cache.impl.id
import com.intellij.openapi.diagnostic.logger
import com.intellij.openapi.extensions.ExtensionPointName
import com.intellij.openapi.fileTypes.FileType
import com.intellij.openapi.fileTypes.LanguageFileType
import com.intellij.openapi.fileTypes.PlainTextFileType
import com.intellij.openapi.util.registry.Registry
import com.intellij.util.ThreeState
import com.intellij.util.indexing.CustomizableExcludeExtensions
import com.intellij.util.indexing.FileBasedIndex
import com.intellij.util.indexing.IndexFilterExcludingExtension
import com.intellij.util.indexing.IndexedFile
import com.intellij.util.indexing.hints.BaseFileTypeInputFilter
import com.intellij.util.indexing.hints.FileTypeIndexingHint
import com.intellij.util.indexing.hints.FileTypeSubstitutionStrategy
/**
* 'Smart' file-filter for {@link IdIndex}: allows extending filtering patterns with {@link IndexFilterExcludingExtension}.
* The current use of it: exclude .java source-files in libraries (index .class-files instead).
*
* @see IndexFilterExcludingExtension
*/
internal class IdIndexFilter : BaseFileTypeInputFilter(FileTypeSubstitutionStrategy.BEFORE_SUBSTITUTION) {
private companion object {
//TODO RC: extension is currently used to support JavaIdIndexer ability to index .class-files from libraries instead of .java
// source files (IDEA-327594, IJPL-181002).
// This was the simplest option to implement the input files filtering required, but it is not a very good design --
// inflexible and unnatural -- because single specific sub-indexer (JavaIdIndexer) influences IdIndexFilter which is
// common for all sub-indexers. This is why we need to duplicate 'index.ids.from.java.sources.in.jar' registry key here,
// -- we can't just reference JavaIdIndexer.INDEX_SOURCE_FILES_IN_LIBRARIES_REGISTRY_KEY
// More logical approach is for give _each_ SubIndexers a way to contribute to input filter. We're already close to
// this: we use IdTableBuilding.getFileTypeIndexer(fileType) in the filtering. We just need DataIndexer/IdIndexer to
// implement some kind of BaseFileTypeInputFilter, which could be used both .acceptFileType() and .slowPathIfFileTypeHintUnsure()
// If >1 sub-indexer follow the same pattern as IdJavaIndexer => we should definitely shift to this approach.
val ENABLE_EXTENSION_EXCLUDES = !Registry.`is`("index.ids.from.java.sources.in.jar", false)
val EXTENSION_EXCLUDES: CustomizableExcludeExtensions = CustomizableExcludeExtensions(
ExtensionPointName.create("com.intellij.idIndexFilterExcludeExtension")
)
}
init{
logger<IdIndexFilter>().info(
"Filter extensions is ${if (ENABLE_EXTENSION_EXCLUDES) "enabled: $EXTENSION_EXCLUDES" else "disabled"}"
)
}
override fun acceptFileType(fileType: FileType): ThreeState {
return when {
fileType is PlainTextFileType -> ThreeState.fromBoolean(!FileBasedIndex.IGNORE_PLAIN_TEXT_FILES)
//'.class' fileType is also handled here:
val fileTypeIndexer = IdTableBuilding.getFileTypeIndexer(fileType)
ENABLE_EXTENSION_EXCLUDES && EXTENSION_EXCLUDES.hasExtensionForFileType(fileType) -> ThreeState.UNSURE //go through slowPathIfFileTypeHintUnsure()
fileType is LanguageFileType -> ThreeState.YES
//'.class' fileType is also handled here:
IdTableBuilding.getFileTypeIndexer(fileType) != null -> ThreeState.YES
else -> ThreeState.NO
if (fileTypeIndexer == null) {
return ThreeState.NO
}
if (fileTypeIndexer is FileTypeIndexingHint) {
//give subIndexer a chance to override default filtering
return fileTypeIndexer.acceptsFileTypeFastPath(fileType)
}
return ThreeState.YES
}
override fun slowPathIfFileTypeHintUnsure(file: IndexedFile): Boolean {
check(ENABLE_EXTENSION_EXCLUDES) { "ENABLE_EXTENSION_EXCLUDES must be true to reach this point" }
override fun slowPathIfFileTypeHintUnsure(indexedFile: IndexedFile): Boolean {
val file = indexedFile.file
val fileType = file.fileType
return !EXTENSION_EXCLUDES.shouldExcludeFile(file)
//'.class' fileType is also handled here:
val fileTypeIndexer = IdTableBuilding.getFileTypeIndexer(fileType)
check(fileTypeIndexer != null) { "slowPathIfFileTypeHintUnsure($indexedFile): must be called only with filetypes which have associated fileTypeIndexer" }
check(fileTypeIndexer is FileTypeIndexingHint) { "slowPathIfFileTypeHintUnsure($indexedFile): $fileTypeIndexer must implement FileTypeIndexingHint" }
return fileTypeIndexer.slowPathIfFileTypeHintUnsure(indexedFile)
}
}

View File

@@ -1,13 +1,19 @@
// Copyright 2000-2017 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license that can be found in the LICENSE file.
// Copyright 2000-2025 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license.
package com.intellij.psi.impl.cache.impl.id;
import com.intellij.util.indexing.DataIndexer;
import com.intellij.util.indexing.FileContent;
import com.intellij.util.indexing.hints.FileTypeIndexingHint;
import org.jetbrains.annotations.ApiStatus;
/**
* Id index is used in the reference searcher and other API that require "plain text" search by the identifier.
* Please note that for "find in path" search usually {@link com.intellij.find.ngrams.TrigramIndex} is used instead
*
* Implementation _could_ implement {@link FileTypeIndexingHint} to customize input files filtering -- i.e. eagerly filter
* out files that should not be indexed.
* If implementation does not implement {@link FileTypeIndexingHint} then all the files of the type this indexer is registered
* for -- will be indexed.
*
* @author traff
* @see IdIndex

View File

@@ -1,11 +1,15 @@
// Copyright 2000-2025 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license.
package com.intellij.psi.impl.cache.impl.id;
import com.intellij.openapi.fileTypes.FileType;
import com.intellij.openapi.util.Key;
import com.intellij.psi.impl.cache.impl.todo.TodoIndexers;
import com.intellij.psi.search.UsageSearchContext;
import com.intellij.util.ThreeState;
import com.intellij.util.indexing.FileBasedIndex;
import com.intellij.util.indexing.FileContent;
import com.intellij.util.indexing.IndexedFile;
import com.intellij.util.indexing.hints.FileTypeIndexingHint;
import org.jetbrains.annotations.ApiStatus.Internal;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
@@ -13,11 +17,21 @@ import org.jetbrains.annotations.Nullable;
import java.util.Map;
@Internal
public final class PlainTextIdIndexer implements IdIndexer {
public final class PlainTextIdIndexer implements IdIndexer, FileTypeIndexingHint {
private static final Key<Map<IdIndexEntry, Integer>> ID_INDEX_DATA_KEY = Key.create("plain.text.id.index");
@Override
public @NotNull Map<IdIndexEntry, Integer> map(final @NotNull FileContent inputData) {
public @NotNull ThreeState acceptsFileTypeFastPath(@NotNull FileType fileType) {
return ThreeState.fromBoolean(!FileBasedIndex.IGNORE_PLAIN_TEXT_FILES);
}
@Override
public boolean slowPathIfFileTypeHintUnsure(@NotNull IndexedFile file) {
throw new AssertionError("Should never come here");
}
@Override
public @NotNull Map<IdIndexEntry, Integer> map(@NotNull FileContent inputData) {
return getIdIndexData(inputData);
}

View File

@@ -658,8 +658,9 @@
<fileBasedIndex implementation="com.intellij.psi.search.FileTypeIndexImpl"/>
<fileBasedIndex implementation="com.intellij.find.ngrams.TrigramIndex"/>
<registryKey key="ide.trigram.index.uses.exclude.extensions" defaultValue="false"
description="Don't index source files in JARs with Trigram index, use plain search instead" restartRequired="true"/>
<registryKey key="ide.index.trigram.enable.exclude.extensions" defaultValue="false"
description="TrigramIndex doesn't index .java-files in libraries (plain un-indexed search will be used to search in libraries then)"
restartRequired="true"/>
<fileBasedIndex implementation="com.intellij.psi.impl.include.FileIncludeIndex"/>