IJPL-149806 Sort File Type mappings from most

specific to least specific one

The sorting order is:

  * longer patterns are treated as more specific than shorter ones, so longer patterns are sorted in first,
  * if patterns have the same length, then strings are compared lexicographically from left to right,
    with the exception of ? and * (they get treated, respectively, as the next to least possible character
    and least possible character).

GitOrigin-RevId: a1ba32b4f95845f308878c3ac9805e3e93fc59a1
This commit is contained in:
=?UTF-8?q?Tomek=20=22jaen=22=20Ma=C5=84ko?=
2024-07-23 15:25:09 +02:00
committed by intellij-monorepo-bot
parent ec501b1899
commit 1446deae21
2 changed files with 151 additions and 15 deletions

View File

@@ -19,6 +19,7 @@ import java.util.stream.Collectors;
@ApiStatus.Internal
public final class FileTypeAssocTable<T> {
private final Map<CharSequence, T> myExtensionMappings;
private final Map<CharSequence, T> myExactFileNameMappings;
private final Map<CharSequence, T> myExactFileNameAnyCaseMappings;
@@ -28,14 +29,15 @@ public final class FileTypeAssocTable<T> {
@FunctionalInterface
public interface ConcurrentCharSequenceMapBuilder<T> {
@NotNull Map<CharSequence, T> build(@NotNull Map<? extends CharSequence, ? extends T> source, boolean caseSensitive);
@NotNull
Map<CharSequence, T> build(@NotNull Map<? extends CharSequence, ? extends T> source, boolean caseSensitive);
}
private FileTypeAssocTable(@NotNull Map<? extends CharSequence, ? extends T> extensionMappings,
@NotNull Map<? extends CharSequence, ? extends T> exactFileNameMappings,
@NotNull Map<? extends CharSequence, ? extends T> exactFileNameAnyCaseMappings,
@NotNull ConcurrentCharSequenceMapBuilder<T> concurrentCharSequenceMapBuilder,
@NotNull Map<? extends String, ? extends T> hashBangMap,
@NotNull Map<String, ? extends T> hashBangMap,
@NotNull List<? extends Pair<FileNameMatcher, T>> matchingMappings) {
myExtensionMappings = concurrentCharSequenceMapBuilder.build(extensionMappings, false);
myExactFileNameMappings = concurrentCharSequenceMapBuilder.build(exactFileNameMappings, true);
@@ -78,25 +80,41 @@ public final class FileTypeAssocTable<T> {
String extension = ((ExtensionFileNameMatcher)matcher).getExtension();
return myExtensionMappings.put(extension, type);
}
if (matcher instanceof ExactFileNameMatcher) {
ExactFileNameMatcher exactFileNameMatcher = (ExactFileNameMatcher)matcher;
Map<CharSequence, T> mapToUse = exactFileNameMatcher.isIgnoreCase() ? myExactFileNameAnyCaseMappings : myExactFileNameMappings;
return mapToUse.put(exactFileNameMatcher.getFileName(), type);
}
int i = ContainerUtil.indexOf(myMatchingMappings, p -> p.first.equals(matcher));
if (i == -1) {
myMatchingMappings.add(Pair.create(matcher, type));
return null;
Pair<FileNameMatcher, T> previousAssociation = null;
int previousAssociationIndex = ContainerUtil.indexOf(myMatchingMappings, a -> a.first.equals(matcher));
if (previousAssociationIndex >= 0) {
previousAssociation = myMatchingMappings.get(previousAssociationIndex);
myMatchingMappings.set(previousAssociationIndex, Pair.create(matcher, type));
}
Pair<FileNameMatcher, T> old = myMatchingMappings.get(i);
myMatchingMappings.set(i, Pair.create(matcher, type));
return Pair.getSecond(old);
else {
myMatchingMappings.add(Pair.create(matcher, type));
}
// A comparator for pattern specificity added to resolve IJPL-149806. See the ticket and tests for this class for more details.
var mostSpecificPatternFirstComparator = Comparator
.comparing((Pair<FileNameMatcher, T> pair) -> pair.first.getPresentableString().length(), Comparator.reverseOrder())
.thenComparing(pair -> pair.first.getPresentableString()
.replace("?", "\uFFFE")
.replace("*", "\uFFFF"));
Collections.sort(myMatchingMappings, mostSpecificPatternFirstComparator);
return Pair.getSecond(previousAssociation);
}
void addHashBangPattern(@NotNull String hashBang, @NotNull T type) {
myHashBangMap.put(hashBang, type);
}
void removeHashBangPattern(@NotNull String hashBang, @NotNull T type) {
myHashBangMap.remove(hashBang, type);
}
@@ -298,7 +316,8 @@ public final class FileTypeAssocTable<T> {
}
// todo drop it, when ConcurrentCollectionFactory will be available in the classpath
private static @NotNull <T> Map<CharSequence, T> createCharSequenceConcurrentMap(@NotNull Map<? extends CharSequence, ? extends T> source, boolean caseSensitive) {
private static @NotNull <T> Map<CharSequence, T> createCharSequenceConcurrentMap(@NotNull Map<? extends CharSequence, ? extends T> source,
boolean caseSensitive) {
Map<CharSequence, T> map = CollectionFactory.createCharSequenceMap(caseSensitive, source.size(), 0.5f);
map.putAll(source);
return Collections.synchronizedMap(map);
@@ -314,11 +333,11 @@ public final class FileTypeAssocTable<T> {
@Override
public String toString() {
return "FileTypeAssocTable. myExtensionMappings="+myExtensionMappings+";\n"
+"myExactFileNameMappings="+myExactFileNameMappings+";\n"
+"myExactFileNameAnyCaseMappings="+myExactFileNameAnyCaseMappings+";\n"
+"myMatchingMappings="+myMatchingMappings+";\n"
+"myHashBangMap="+myHashBangMap+";";
return "FileTypeAssocTable. myExtensionMappings=" + myExtensionMappings + ";\n"
+ "myExactFileNameMappings=" + myExactFileNameMappings + ";\n"
+ "myExactFileNameAnyCaseMappings=" + myExactFileNameAnyCaseMappings + ";\n"
+ "myMatchingMappings=" + myMatchingMappings + ";\n"
+ "myHashBangMap=" + myHashBangMap + ";";
}
@TestOnly

View File

@@ -0,0 +1,117 @@
// Copyright 2000-2024 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license.
package com.intellij.openapi.fileTypes.impl;
import com.intellij.openapi.fileTypes.FileNameMatcher;
import com.intellij.openapi.util.Pair;
import com.intellij.testFramework.UsefulTestCase;
import com.intellij.util.containers.ContainerUtil;
import org.jetbrains.jps.model.fileTypes.FileNameMatcherFactory;
import java.util.List;
import static org.junit.Assert.assertNotEquals;
public class FileTypeAssocTableTest extends UsefulTestCase {
// This should still pass even without the fix for IJPL-149806, as wildcard patterns
// are always tried before extension patterns (like this single-segment one).
public void testWildcardPatternIsMoreSpecificThanExtensionPattern() {
var table = createAssocTable(
new Pair<>("*.bar", "foobar"),
new Pair<>("*.foo.bar", "foobar (more specific)")
);
assertEquals(
"foobar (more specific)",
table.findAssociatedFileType("hurrdurr.foo.bar")
);
}
// In cases where both patterns classify as wildcard patterns, the class needs
// to take their relative specificity into account when finding a match.
// We do this to resolve the aforementioned IJPL-149806 issue.
// We consider longer patterns to be more specific than shorter patterns.
public void testLongerWildcardPatternIsMoreSpecific() {
var table = createAssocTable(
new Pair<>("*.foo.bar", "foobar"),
new Pair<>("*.bonk.foo.bar", "foobar (more specific)")
);
assertEquals(
"foobar (more specific)",
table.findAssociatedFileType("hurrdurr.bonk.foo.bar")
);
}
// If the patterns are of equal length, compare them lexicographically except for * and ?,
// which we treat as — respectively — least and next-to-least-specific characters.
public void testLettersMoreSpecificThanWildcards() {
var table = createAssocTable(
new Pair<>("*.p??", "pdf"),
new Pair<>("*.p*f", "pdf (more specific)"),
new Pair<>("*.p?f", "pdf (most specific)")
);
assertEquals(
"pdf (most specific)",
table.findAssociatedFileType("ayy.pdf")
);
}
// Because we have a stable ordering of patterns — first by length, then by lexicographical
// comparison except wildcards, then by ? and then finally by * — then insertion order shouldn't
// change which pattern is recognised as more specific
public void testSpecificityDoesntDependOnInsertionOrder() {
var table1 = createAssocTable(
new Pair<>("*.p??", "pdf"),
new Pair<>("*.p*f", "pdf (more specific)"),
new Pair<>("*.p?f", "pdf (most specific)")
);
var table2 = createAssocTable(
new Pair<>("*.p?f", "pdf (most specific)"),
new Pair<>("*.p??", "pdf"),
new Pair<>("*.p*f", "pdf (more specific)")
);
assertEquals(
table1.findAssociatedFileType("ayy.pdf"),
table2.findAssociatedFileType("ayy.pdf")
);
}
// An unfortunate side-effect of this rather simple concept of pattern specificity is
// that some patterns might be counterintuitively preferred over others, such as in this
// case that is used to document the current behaviour of the class.
public void testLongerWildcardPatternIsMoreSpecific22() {
var table = createAssocTable(
new Pair<>("*.foo.cpp", "cpp"),
new Pair<>("env.foo.*", "env")
);
assertNotEquals(
"cpp",
table.findAssociatedFileType("env.foo.cpp")
);
}
@SafeVarargs
private static FileTypeAssocTable<String> createAssocTable(Pair<String, String>... associations) {
var assocTable = new FileTypeAssocTable<String>();
createAssocList(associations)
.forEach(association -> assocTable.addAssociation(association.first, association.second));
return assocTable;
}
@SafeVarargs
private static List<Pair<FileNameMatcher, String>> createAssocList(Pair<String, String>... associations) {
var factory = FileNameMatcherFactory.getInstance();
return ContainerUtil.map(
associations,
association -> new Pair<>(factory.createMatcher(association.first), association.second)
);
}
}