mirror of
https://gitflic.ru/project/openide/openide.git
synced 2026-02-04 23:39:07 +07:00
[vfs] optimize ExtendibleHashMap.size()
+ scan through segments without allocating SegmentLayout object + more tests GitOrigin-RevId: 85dd5aec9039d6177cd11a34d9a6273d6a38fbb7
This commit is contained in:
committed by
intellij-monorepo-bot
parent
b37df5fb80
commit
e4e404ff28
@@ -60,7 +60,7 @@ public abstract class KeyValueStoreTestBase<K, V, S extends KeyValueStore<K, V>>
|
||||
* Converts int (substrate) into a Entry(key, value).
|
||||
* Property: for different input substrate (int) function must return both Key and Value different
|
||||
*/
|
||||
private final IntFunction<Entry<K, V>> keyValueSubstrateDecoder;
|
||||
private final IntFunction<? extends Entry<K, V>> keyValueSubstrateDecoder;
|
||||
|
||||
@BeforeAll
|
||||
static void beforeAll() {
|
||||
@@ -68,7 +68,7 @@ public abstract class KeyValueStoreTestBase<K, V, S extends KeyValueStore<K, V>>
|
||||
}
|
||||
|
||||
|
||||
protected KeyValueStoreTestBase(@NotNull IntFunction<Entry<K, V>> decoder) { keyValueSubstrateDecoder = decoder; }
|
||||
protected KeyValueStoreTestBase(@NotNull IntFunction<? extends Entry<K, V>> decoder) { keyValueSubstrateDecoder = decoder; }
|
||||
|
||||
@BeforeEach
|
||||
void setUp(@TempDir Path tempDir) throws IOException {
|
||||
@@ -194,4 +194,8 @@ public abstract class KeyValueStoreTestBase<K, V, S extends KeyValueStore<K, V>>
|
||||
storage = factory().open(storagePath);
|
||||
return storage;
|
||||
}
|
||||
|
||||
protected Path storagePath() {
|
||||
return storagePath;
|
||||
}
|
||||
}
|
||||
@@ -125,6 +125,41 @@ public abstract class DurableIntToMultiIntMapTestBase<M extends DurableIntToMult
|
||||
assertTrue(values.contains(3), "Values for key=1 must contain 3");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void manyKeyValues_PutRemovedAndPutAgain_AreAllExistInTheMap() throws IOException {
|
||||
long[] packedKeysValues = generateUniqueKeyValues(entriesCountToTest);
|
||||
|
||||
//test for entries removing-overwriting: add/remove/add again the entries from the map,
|
||||
// and check map behave as-if only the last added entries are there
|
||||
|
||||
putAllEntries(multimap, packedKeysValues);
|
||||
removeAllEntries(multimap, packedKeysValues);
|
||||
|
||||
assertEquals(0, multimap.size(),
|
||||
"Map must be empty since all entries were removed"
|
||||
);
|
||||
|
||||
for (long packedKeyValue : packedKeysValues) {
|
||||
int key = key(packedKeyValue);
|
||||
int value = value(packedKeyValue);
|
||||
|
||||
boolean reallyPut = multimap.put(key, value);
|
||||
assertTrue(reallyPut, "(" + key + ", " + value + ") must be a new entry for the map");
|
||||
}
|
||||
|
||||
assertEquals(packedKeysValues.length,
|
||||
multimap.size(),
|
||||
"Map.size must be equal to the number of entries added"
|
||||
);
|
||||
|
||||
for (long packedKeyValue : packedKeysValues) {
|
||||
int key = key(packedKeyValue);
|
||||
int value = value(packedKeyValue);
|
||||
|
||||
assertTrue(multimap.has(key, value),
|
||||
"(" + key + ", " + value + ") must exist in the map");
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void withManyKeyValuesPut_MultimapSizeIsEqualToNumberOfTruthReturned() throws IOException {
|
||||
@@ -174,11 +209,7 @@ public abstract class DurableIntToMultiIntMapTestBase<M extends DurableIntToMult
|
||||
@Test
|
||||
public void remove_DeletesKeyValueFromTheMultimap_AndReturnsTrueIfKeyValuePreviouslyExists() throws IOException {
|
||||
long[] packedKeysValues = generateUniqueKeyValues(entriesCountToTest);
|
||||
for (long packedKeyValue : packedKeysValues) {
|
||||
int key = key(packedKeyValue);
|
||||
int value = value(packedKeyValue);
|
||||
multimap.put(key, value);
|
||||
}
|
||||
putAllEntries(multimap, packedKeysValues);
|
||||
|
||||
for (long packedKeyValue : packedKeysValues) {
|
||||
int key = key(packedKeyValue);
|
||||
@@ -256,13 +287,33 @@ public abstract class DurableIntToMultiIntMapTestBase<M extends DurableIntToMult
|
||||
/* ======================== infrastructure: ================================================================ */
|
||||
|
||||
|
||||
private int lookupSingleValue(int key,
|
||||
int value) throws IOException {
|
||||
protected static void putAllEntries(@NotNull DurableIntToMultiIntMap multimap,
|
||||
long[] packedKeysValues) throws IOException {
|
||||
for (long packedKeyValue : packedKeysValues) {
|
||||
int key = key(packedKeyValue);
|
||||
int value = value(packedKeyValue);
|
||||
|
||||
multimap.put(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
private static void removeAllEntries(@NotNull DurableIntToMultiIntMap multimap,
|
||||
long[] packedKeysValues) throws IOException {
|
||||
for (long packedKeyValue : packedKeysValues) {
|
||||
int key = key(packedKeyValue);
|
||||
int value = value(packedKeyValue);
|
||||
|
||||
multimap.remove(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
protected int lookupSingleValue(int key,
|
||||
int value) throws IOException {
|
||||
return multimap.lookup(key, v -> (v == value));
|
||||
}
|
||||
|
||||
private static @NotNull IntOpenHashSet lookupAllValues(@NotNull DurableIntToMultiIntMap multimap,
|
||||
int key) throws IOException {
|
||||
protected static @NotNull IntOpenHashSet lookupAllValues(@NotNull DurableIntToMultiIntMap multimap,
|
||||
int key) throws IOException {
|
||||
IntOpenHashSet values = new IntOpenHashSet();
|
||||
multimap.lookup(key, value -> {
|
||||
values.add(value);
|
||||
@@ -271,19 +322,19 @@ public abstract class DurableIntToMultiIntMapTestBase<M extends DurableIntToMult
|
||||
return values;
|
||||
}
|
||||
|
||||
private static int key(long packedKeyValue) {
|
||||
protected static int key(long packedKeyValue) {
|
||||
return (int)(packedKeyValue >> 32);
|
||||
}
|
||||
|
||||
private static int value(long packedKeyValue) {
|
||||
protected static int value(long packedKeyValue) {
|
||||
return (int)packedKeyValue;
|
||||
}
|
||||
|
||||
private static long[] generateUniqueKeyValues(int size) {
|
||||
protected static long[] generateUniqueKeyValues(int size) {
|
||||
return ThreadLocalRandom.current().longs()
|
||||
//generate more multi-value-keys, to better check appropriate branches: +1,+2,... is almost always
|
||||
// have same upper 32 bits (=key), but different lower 32 bits (=value) => this switch creates
|
||||
// approximately 1/14 of keys with 2 values, and another 3/14 of keys with 5 values, and 10/14 of
|
||||
// approximately 3/14 of keys with 2 values, another 1/14 of keys with 5 values, and 10/14 of
|
||||
// keys with a single value
|
||||
.flatMap(v -> switch ((int)(v % 14)) {
|
||||
case 13 -> LongStream.of(v, v + 1, v - 1, v + 42, v - 42);
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
// Copyright 2000-2024 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license.
|
||||
package com.intellij.platform.util.io.storages.intmultimaps.extendiblehashmap;
|
||||
|
||||
import com.intellij.platform.util.io.storages.intmultimaps.extendiblehashmap.ExtendibleHashMap.HashMapSegmentLayout;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
@@ -12,15 +14,31 @@ import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
public class SegmentLayoutTest {
|
||||
|
||||
private ExtendibleHashMap.HashMapSegmentLayout segmentLayout;
|
||||
public static final int SEGMENT_INDEX = 1;
|
||||
public static final int SEGMENT_SIZE = 1 << 16;
|
||||
|
||||
private HashMapSegmentLayout segmentLayout;
|
||||
private ExtendibleHashMap.BufferSource bufferSource;
|
||||
|
||||
@BeforeEach
|
||||
void setUp() throws IOException {
|
||||
ByteBuffer buffer = ByteBuffer.allocate(1 << 16);
|
||||
segmentLayout = new ExtendibleHashMap.HashMapSegmentLayout(
|
||||
(offsetInFile, length) -> buffer,
|
||||
1,
|
||||
buffer.capacity()
|
||||
ByteBuffer buffer = ByteBuffer.allocate(SEGMENT_SIZE);
|
||||
bufferSource = new ExtendibleHashMap.BufferSource() {
|
||||
@Override
|
||||
public @NotNull ByteBuffer slice(long offsetInFile, int length) throws IOException {
|
||||
return buffer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getInt(long offsetInFile) throws IOException {
|
||||
//we emulate 1st segment (because first segment is header, it is special), but buffer is 0-based
|
||||
return buffer.getInt((int)offsetInFile - SEGMENT_SIZE);
|
||||
}
|
||||
};
|
||||
segmentLayout = new HashMapSegmentLayout(
|
||||
bufferSource,
|
||||
SEGMENT_INDEX,
|
||||
SEGMENT_SIZE
|
||||
);
|
||||
}
|
||||
|
||||
@@ -111,4 +129,24 @@ public class SegmentLayoutTest {
|
||||
new int[]{0b0011, 0b0111, 0b1011, 0b1111}
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
void updated_aliveEntriesCount_MustBeReadViaBothAccessors() throws IOException {
|
||||
for (int aliveEntriesCount : new int[]{1, 10, 42, Integer.MAX_VALUE / 2}) {
|
||||
|
||||
segmentLayout.updateAliveEntriesCount(aliveEntriesCount);
|
||||
|
||||
assertEquals(
|
||||
aliveEntriesCount,
|
||||
segmentLayout.aliveEntriesCount(),
|
||||
"Must be the value just updated in"
|
||||
);
|
||||
|
||||
assertEquals(
|
||||
aliveEntriesCount,
|
||||
HashMapSegmentLayout.aliveEntriesCount(bufferSource, 1, SEGMENT_SIZE),
|
||||
"Must be the value just updated in"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -79,7 +79,7 @@ public class ExtendibleHashMap implements DurableIntToMultiIntMap, Unmappable {
|
||||
// 0) remove 'synchronized' and put synchronization on clients to decide?
|
||||
// 1) prune tombstones (see comment in a .split() method for details)
|
||||
// 2) .size() is now O(N), make it O(1)
|
||||
// 3) Half-utilized segmentTable room (see HeaderLayout)
|
||||
// 3) Half-utilized segmentTable room (see HeaderLayout comments)
|
||||
|
||||
private final MMappedFileStorage storage;
|
||||
private transient BufferSource bufferSource;
|
||||
@@ -110,12 +110,7 @@ public class ExtendibleHashMap implements DurableIntToMultiIntMap, Unmappable {
|
||||
this.storage = storage;
|
||||
boolean fileIsEmpty = (storage.actualFileSize() == 0);
|
||||
|
||||
bufferSource = (offsetInFile, length) -> {
|
||||
ByteBuffer buffer = storage.pageByOffset(offsetInFile).rawPageBuffer();
|
||||
int offsetInPage = storage.toOffsetInPage(offsetInFile);
|
||||
return buffer.slice(offsetInPage, length)
|
||||
.order(buffer.order());
|
||||
};
|
||||
bufferSource = new BufferSourceOverMMappedFileStorage(storage);
|
||||
|
||||
header = new HeaderLayout(bufferSource, segmentSize);
|
||||
|
||||
@@ -245,8 +240,7 @@ public class ExtendibleHashMap implements DurableIntToMultiIntMap, Unmappable {
|
||||
int segmentsCount = header.actualSegmentsCount();
|
||||
int totalEntries = 0;
|
||||
for (int segmentIndex = 1; segmentIndex <= segmentsCount; segmentIndex++) {
|
||||
HashMapSegmentLayout segment = new HashMapSegmentLayout(bufferSource, segmentIndex, segmentSize);
|
||||
totalEntries += segment.aliveEntriesCount();
|
||||
totalEntries += HashMapSegmentLayout.aliveEntriesCount(bufferSource, segmentIndex, segmentSize);
|
||||
}
|
||||
return totalEntries;
|
||||
}
|
||||
@@ -257,8 +251,8 @@ public class ExtendibleHashMap implements DurableIntToMultiIntMap, Unmappable {
|
||||
int segmentSize = header.segmentSize();
|
||||
int segmentsCount = header.actualSegmentsCount();
|
||||
for (int segmentIndex = 1; segmentIndex <= segmentsCount; segmentIndex++) {
|
||||
HashMapSegmentLayout segment = new HashMapSegmentLayout(bufferSource, segmentIndex, segmentSize);
|
||||
if (segment.aliveEntriesCount() > 0) {
|
||||
int aliveEntriesCount = HashMapSegmentLayout.aliveEntriesCount(bufferSource, segmentIndex, segmentSize);
|
||||
if (aliveEntriesCount > 0) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -771,6 +765,20 @@ public class ExtendibleHashMap implements DurableIntToMultiIntMap, Unmappable {
|
||||
return segmentBuffer.getInt(LIVE_ENTRIES_COUNT_OFFSET);
|
||||
}
|
||||
|
||||
/**
|
||||
* 'Inlined' version of {@code new HashMapSegmentLayout(bufferSource, segmentIndex, segmentSize).aliveEntriesCount()}
|
||||
* with reduced allocations and slicing
|
||||
*/
|
||||
public static int aliveEntriesCount(@NotNull BufferSource bufferSource,
|
||||
int segmentIndex,
|
||||
int segmentSize) throws IOException {
|
||||
if (segmentIndex < 1) {
|
||||
throw new IllegalArgumentException("segmentIndex(=" + segmentIndex + ") must be >=1 (0-th segment is a header)");
|
||||
}
|
||||
long offsetInFile = segmentIndex * (long)segmentSize;
|
||||
return bufferSource.getInt(offsetInFile + LIVE_ENTRIES_COUNT_OFFSET);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void updateAliveEntriesCount(int aliveCount) {
|
||||
segmentBuffer.putInt(LIVE_ENTRIES_COUNT_OFFSET, aliveCount);
|
||||
@@ -868,11 +876,13 @@ public class ExtendibleHashMap implements DurableIntToMultiIntMap, Unmappable {
|
||||
}
|
||||
}
|
||||
|
||||
@FunctionalInterface
|
||||
public interface BufferSource {
|
||||
@NotNull
|
||||
ByteBuffer slice(long offsetInFile,
|
||||
int length) throws IOException;
|
||||
|
||||
/** == {@code slice(offsetInFile, 4).getInt()} */
|
||||
int getInt(long offsetInFile) throws IOException;
|
||||
}
|
||||
|
||||
/** Abstracts data storage for open-addressing hash-table implementation */
|
||||
@@ -893,15 +903,16 @@ public class ExtendibleHashMap implements DurableIntToMultiIntMap, Unmappable {
|
||||
int value);
|
||||
}
|
||||
|
||||
public static final class HashMapAlgo {
|
||||
private static final class HashMapAlgo {
|
||||
public static final int NO_VALUE = 0;
|
||||
|
||||
private final float loadFactor;
|
||||
|
||||
//MAYBE RC: in-memory open-addressing hashtables usually employ loadFactor=0.5 -- to prevent excessive
|
||||
// clustering and probe sequence length increasing. But for on-disk hash table it may worth to actually
|
||||
// increase loadFactor up to 0.6-0.7 -- because less IO due to more compact representation may easily
|
||||
// outweigh more probing.
|
||||
//MAYBE RC: load factor and probing algorithm choice: in-memory open-addressing hash tables usually employ
|
||||
// loadFactor=0.5 -- to prevent excessive clustering and probe sequence length increasing. For multi-valued
|
||||
// maps this may be even down to 0.4 due to more excessive clustering (see comments in Int2IntMultimap).
|
||||
// But for on-disk hash table it may worth to actually increase loadFactor up to 0.6-0.7 -- because less
|
||||
// IO due to more compact representation may easily outweigh more probing.
|
||||
// Tuning probing sequence could be also a thing: currently we use linear probing, since it is the
|
||||
// fastest, and also has spatial locality -- consequent probes are one-after-another, which is beneficial
|
||||
// given probing is done over IO-backed storage. But linear probing is also more susceptible to clustering,
|
||||
@@ -919,7 +930,7 @@ public class ExtendibleHashMap implements DurableIntToMultiIntMap, Unmappable {
|
||||
// (key: NO_VALUE, value != NO_VALUE) = 'tombstone', i.e. deleted slot (key-value pair was inserted and removed)
|
||||
|
||||
|
||||
public HashMapAlgo(float loadFactor) {
|
||||
private HashMapAlgo(float loadFactor) {
|
||||
this.loadFactor = loadFactor;
|
||||
}
|
||||
|
||||
@@ -1017,9 +1028,9 @@ public class ExtendibleHashMap implements DurableIntToMultiIntMap, Unmappable {
|
||||
// not very effective
|
||||
if (aliveValues(table) == 0) {
|
||||
//If there is 0 alive records => it is OK to clear all the tombstones.
|
||||
// We can't clear all tombstones while there alive entries because such cleaning breaks lookup: we treat
|
||||
// We can't clear all tombstones while alive entries exist because such a cleaning breaks lookup: we treat
|
||||
// free slots and tombstones differently during the probing -- continue to probe over tombstones, but stop
|
||||
// on free slots. Converting tombstone to free slot could stop probing earlier than it should stop, thus
|
||||
// on free slots. Converting tombstone to free slot could stop the probing earlier than it should stop, thus
|
||||
// making some existing entries unreachable.
|
||||
// But if there are no alive entries anymore -- we finally _can_ clear everything without breaking anything!
|
||||
|
||||
@@ -1036,7 +1047,7 @@ public class ExtendibleHashMap implements DurableIntToMultiIntMap, Unmappable {
|
||||
|
||||
//Table must be resized well before such a condition occurs!
|
||||
throw new AssertionError(
|
||||
"Table is full: all " + capacity + " items were traversed, but no free slot found" +
|
||||
"Table is full: all " + capacity + " items were traversed, but no free slot found " +
|
||||
"table.aliveEntries: " + table.aliveEntriesCount() + ", table: " + table
|
||||
);
|
||||
}
|
||||
@@ -1180,4 +1191,27 @@ public class ExtendibleHashMap implements DurableIntToMultiIntMap, Unmappable {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private record BufferSourceOverMMappedFileStorage(@NotNull MMappedFileStorage storage) implements BufferSource {
|
||||
@Override
|
||||
public @NotNull ByteBuffer slice(long offsetInFile,
|
||||
int length) throws IOException {
|
||||
ByteBuffer buffer = storage.pageByOffset(offsetInFile).rawPageBuffer();
|
||||
int offsetInPage = storage.toOffsetInPage(offsetInFile);
|
||||
return buffer.slice(offsetInPage, length)
|
||||
.order(buffer.order());
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getInt(long offsetInFile) throws IOException {
|
||||
ByteBuffer buffer = storage.pageByOffset(offsetInFile).rawPageBuffer();
|
||||
int offsetInPage = storage.toOffsetInPage(offsetInFile);
|
||||
return buffer.getInt(offsetInPage);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "BufferSourceOverMMappedFileStorage{" + storage + '}';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user