From e4e404ff28bf50fe705d88755f60d1271ad6c78c Mon Sep 17 00:00:00 2001 From: Ruslan Cheremin Date: Thu, 9 May 2024 15:37:14 +0200 Subject: [PATCH] [vfs] optimize ExtendibleHashMap.size() + scan through segments without allocating SegmentLayout object + more tests GitOrigin-RevId: 85dd5aec9039d6177cd11a34d9a6273d6a38fbb7 --- .../io/storages/KeyValueStoreTestBase.java | 8 +- .../DurableIntToMultiIntMapTestBase.java | 77 +++++++++++++++---- .../extendiblehashmap/SegmentLayoutTest.java | 50 ++++++++++-- .../extendiblehashmap/ExtendibleHashMap.java | 76 +++++++++++++----- 4 files changed, 169 insertions(+), 42 deletions(-) diff --git a/platform/platform-tests/testSrc/com/intellij/platform/util/io/storages/KeyValueStoreTestBase.java b/platform/platform-tests/testSrc/com/intellij/platform/util/io/storages/KeyValueStoreTestBase.java index 0820d5e4f9df..c304954bf256 100644 --- a/platform/platform-tests/testSrc/com/intellij/platform/util/io/storages/KeyValueStoreTestBase.java +++ b/platform/platform-tests/testSrc/com/intellij/platform/util/io/storages/KeyValueStoreTestBase.java @@ -60,7 +60,7 @@ public abstract class KeyValueStoreTestBase> * Converts int (substrate) into a Entry(key, value). * Property: for different input substrate (int) function must return both Key and Value different */ - private final IntFunction> keyValueSubstrateDecoder; + private final IntFunction> keyValueSubstrateDecoder; @BeforeAll static void beforeAll() { @@ -68,7 +68,7 @@ public abstract class KeyValueStoreTestBase> } - protected KeyValueStoreTestBase(@NotNull IntFunction> decoder) { keyValueSubstrateDecoder = decoder; } + protected KeyValueStoreTestBase(@NotNull IntFunction> decoder) { keyValueSubstrateDecoder = decoder; } @BeforeEach void setUp(@TempDir Path tempDir) throws IOException { @@ -194,4 +194,8 @@ public abstract class KeyValueStoreTestBase> storage = factory().open(storagePath); return storage; } + + protected Path storagePath() { + return storagePath; + } } \ No newline at end of file diff --git a/platform/platform-tests/testSrc/com/intellij/platform/util/io/storages/intmultimaps/DurableIntToMultiIntMapTestBase.java b/platform/platform-tests/testSrc/com/intellij/platform/util/io/storages/intmultimaps/DurableIntToMultiIntMapTestBase.java index a687596a422d..c1f168a48e6a 100644 --- a/platform/platform-tests/testSrc/com/intellij/platform/util/io/storages/intmultimaps/DurableIntToMultiIntMapTestBase.java +++ b/platform/platform-tests/testSrc/com/intellij/platform/util/io/storages/intmultimaps/DurableIntToMultiIntMapTestBase.java @@ -125,6 +125,41 @@ public abstract class DurableIntToMultiIntMapTestBase (v == value)); } - private static @NotNull IntOpenHashSet lookupAllValues(@NotNull DurableIntToMultiIntMap multimap, - int key) throws IOException { + protected static @NotNull IntOpenHashSet lookupAllValues(@NotNull DurableIntToMultiIntMap multimap, + int key) throws IOException { IntOpenHashSet values = new IntOpenHashSet(); multimap.lookup(key, value -> { values.add(value); @@ -271,19 +322,19 @@ public abstract class DurableIntToMultiIntMapTestBase> 32); } - private static int value(long packedKeyValue) { + protected static int value(long packedKeyValue) { return (int)packedKeyValue; } - private static long[] generateUniqueKeyValues(int size) { + protected static long[] generateUniqueKeyValues(int size) { return ThreadLocalRandom.current().longs() //generate more multi-value-keys, to better check appropriate branches: +1,+2,... is almost always // have same upper 32 bits (=key), but different lower 32 bits (=value) => this switch creates - // approximately 1/14 of keys with 2 values, and another 3/14 of keys with 5 values, and 10/14 of + // approximately 3/14 of keys with 2 values, another 1/14 of keys with 5 values, and 10/14 of // keys with a single value .flatMap(v -> switch ((int)(v % 14)) { case 13 -> LongStream.of(v, v + 1, v - 1, v + 42, v - 42); diff --git a/platform/platform-tests/testSrc/com/intellij/platform/util/io/storages/intmultimaps/extendiblehashmap/SegmentLayoutTest.java b/platform/platform-tests/testSrc/com/intellij/platform/util/io/storages/intmultimaps/extendiblehashmap/SegmentLayoutTest.java index 2325f10c1801..f3138f8701f3 100644 --- a/platform/platform-tests/testSrc/com/intellij/platform/util/io/storages/intmultimaps/extendiblehashmap/SegmentLayoutTest.java +++ b/platform/platform-tests/testSrc/com/intellij/platform/util/io/storages/intmultimaps/extendiblehashmap/SegmentLayoutTest.java @@ -1,6 +1,8 @@ // Copyright 2000-2024 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license. package com.intellij.platform.util.io.storages.intmultimaps.extendiblehashmap; +import com.intellij.platform.util.io.storages.intmultimaps.extendiblehashmap.ExtendibleHashMap.HashMapSegmentLayout; +import org.jetbrains.annotations.NotNull; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -12,15 +14,31 @@ import static org.junit.jupiter.api.Assertions.assertEquals; public class SegmentLayoutTest { - private ExtendibleHashMap.HashMapSegmentLayout segmentLayout; + public static final int SEGMENT_INDEX = 1; + public static final int SEGMENT_SIZE = 1 << 16; + + private HashMapSegmentLayout segmentLayout; + private ExtendibleHashMap.BufferSource bufferSource; @BeforeEach void setUp() throws IOException { - ByteBuffer buffer = ByteBuffer.allocate(1 << 16); - segmentLayout = new ExtendibleHashMap.HashMapSegmentLayout( - (offsetInFile, length) -> buffer, - 1, - buffer.capacity() + ByteBuffer buffer = ByteBuffer.allocate(SEGMENT_SIZE); + bufferSource = new ExtendibleHashMap.BufferSource() { + @Override + public @NotNull ByteBuffer slice(long offsetInFile, int length) throws IOException { + return buffer; + } + + @Override + public int getInt(long offsetInFile) throws IOException { + //we emulate 1st segment (because first segment is header, it is special), but buffer is 0-based + return buffer.getInt((int)offsetInFile - SEGMENT_SIZE); + } + }; + segmentLayout = new HashMapSegmentLayout( + bufferSource, + SEGMENT_INDEX, + SEGMENT_SIZE ); } @@ -111,4 +129,24 @@ public class SegmentLayoutTest { new int[]{0b0011, 0b0111, 0b1011, 0b1111} ); } + + @Test + void updated_aliveEntriesCount_MustBeReadViaBothAccessors() throws IOException { + for (int aliveEntriesCount : new int[]{1, 10, 42, Integer.MAX_VALUE / 2}) { + + segmentLayout.updateAliveEntriesCount(aliveEntriesCount); + + assertEquals( + aliveEntriesCount, + segmentLayout.aliveEntriesCount(), + "Must be the value just updated in" + ); + + assertEquals( + aliveEntriesCount, + HashMapSegmentLayout.aliveEntriesCount(bufferSource, 1, SEGMENT_SIZE), + "Must be the value just updated in" + ); + } + } } diff --git a/platform/util/storages/src/com/intellij/platform/util/io/storages/intmultimaps/extendiblehashmap/ExtendibleHashMap.java b/platform/util/storages/src/com/intellij/platform/util/io/storages/intmultimaps/extendiblehashmap/ExtendibleHashMap.java index 121f8687a342..e87ffc999c60 100644 --- a/platform/util/storages/src/com/intellij/platform/util/io/storages/intmultimaps/extendiblehashmap/ExtendibleHashMap.java +++ b/platform/util/storages/src/com/intellij/platform/util/io/storages/intmultimaps/extendiblehashmap/ExtendibleHashMap.java @@ -79,7 +79,7 @@ public class ExtendibleHashMap implements DurableIntToMultiIntMap, Unmappable { // 0) remove 'synchronized' and put synchronization on clients to decide? // 1) prune tombstones (see comment in a .split() method for details) // 2) .size() is now O(N), make it O(1) - // 3) Half-utilized segmentTable room (see HeaderLayout) + // 3) Half-utilized segmentTable room (see HeaderLayout comments) private final MMappedFileStorage storage; private transient BufferSource bufferSource; @@ -110,12 +110,7 @@ public class ExtendibleHashMap implements DurableIntToMultiIntMap, Unmappable { this.storage = storage; boolean fileIsEmpty = (storage.actualFileSize() == 0); - bufferSource = (offsetInFile, length) -> { - ByteBuffer buffer = storage.pageByOffset(offsetInFile).rawPageBuffer(); - int offsetInPage = storage.toOffsetInPage(offsetInFile); - return buffer.slice(offsetInPage, length) - .order(buffer.order()); - }; + bufferSource = new BufferSourceOverMMappedFileStorage(storage); header = new HeaderLayout(bufferSource, segmentSize); @@ -245,8 +240,7 @@ public class ExtendibleHashMap implements DurableIntToMultiIntMap, Unmappable { int segmentsCount = header.actualSegmentsCount(); int totalEntries = 0; for (int segmentIndex = 1; segmentIndex <= segmentsCount; segmentIndex++) { - HashMapSegmentLayout segment = new HashMapSegmentLayout(bufferSource, segmentIndex, segmentSize); - totalEntries += segment.aliveEntriesCount(); + totalEntries += HashMapSegmentLayout.aliveEntriesCount(bufferSource, segmentIndex, segmentSize); } return totalEntries; } @@ -257,8 +251,8 @@ public class ExtendibleHashMap implements DurableIntToMultiIntMap, Unmappable { int segmentSize = header.segmentSize(); int segmentsCount = header.actualSegmentsCount(); for (int segmentIndex = 1; segmentIndex <= segmentsCount; segmentIndex++) { - HashMapSegmentLayout segment = new HashMapSegmentLayout(bufferSource, segmentIndex, segmentSize); - if (segment.aliveEntriesCount() > 0) { + int aliveEntriesCount = HashMapSegmentLayout.aliveEntriesCount(bufferSource, segmentIndex, segmentSize); + if (aliveEntriesCount > 0) { return false; } } @@ -771,6 +765,20 @@ public class ExtendibleHashMap implements DurableIntToMultiIntMap, Unmappable { return segmentBuffer.getInt(LIVE_ENTRIES_COUNT_OFFSET); } + /** + * 'Inlined' version of {@code new HashMapSegmentLayout(bufferSource, segmentIndex, segmentSize).aliveEntriesCount()} + * with reduced allocations and slicing + */ + public static int aliveEntriesCount(@NotNull BufferSource bufferSource, + int segmentIndex, + int segmentSize) throws IOException { + if (segmentIndex < 1) { + throw new IllegalArgumentException("segmentIndex(=" + segmentIndex + ") must be >=1 (0-th segment is a header)"); + } + long offsetInFile = segmentIndex * (long)segmentSize; + return bufferSource.getInt(offsetInFile + LIVE_ENTRIES_COUNT_OFFSET); + } + @Override public void updateAliveEntriesCount(int aliveCount) { segmentBuffer.putInt(LIVE_ENTRIES_COUNT_OFFSET, aliveCount); @@ -868,11 +876,13 @@ public class ExtendibleHashMap implements DurableIntToMultiIntMap, Unmappable { } } - @FunctionalInterface public interface BufferSource { @NotNull ByteBuffer slice(long offsetInFile, int length) throws IOException; + + /** == {@code slice(offsetInFile, 4).getInt()} */ + int getInt(long offsetInFile) throws IOException; } /** Abstracts data storage for open-addressing hash-table implementation */ @@ -893,15 +903,16 @@ public class ExtendibleHashMap implements DurableIntToMultiIntMap, Unmappable { int value); } - public static final class HashMapAlgo { + private static final class HashMapAlgo { public static final int NO_VALUE = 0; private final float loadFactor; - //MAYBE RC: in-memory open-addressing hashtables usually employ loadFactor=0.5 -- to prevent excessive - // clustering and probe sequence length increasing. But for on-disk hash table it may worth to actually - // increase loadFactor up to 0.6-0.7 -- because less IO due to more compact representation may easily - // outweigh more probing. + //MAYBE RC: load factor and probing algorithm choice: in-memory open-addressing hash tables usually employ + // loadFactor=0.5 -- to prevent excessive clustering and probe sequence length increasing. For multi-valued + // maps this may be even down to 0.4 due to more excessive clustering (see comments in Int2IntMultimap). + // But for on-disk hash table it may worth to actually increase loadFactor up to 0.6-0.7 -- because less + // IO due to more compact representation may easily outweigh more probing. // Tuning probing sequence could be also a thing: currently we use linear probing, since it is the // fastest, and also has spatial locality -- consequent probes are one-after-another, which is beneficial // given probing is done over IO-backed storage. But linear probing is also more susceptible to clustering, @@ -919,7 +930,7 @@ public class ExtendibleHashMap implements DurableIntToMultiIntMap, Unmappable { // (key: NO_VALUE, value != NO_VALUE) = 'tombstone', i.e. deleted slot (key-value pair was inserted and removed) - public HashMapAlgo(float loadFactor) { + private HashMapAlgo(float loadFactor) { this.loadFactor = loadFactor; } @@ -1017,9 +1028,9 @@ public class ExtendibleHashMap implements DurableIntToMultiIntMap, Unmappable { // not very effective if (aliveValues(table) == 0) { //If there is 0 alive records => it is OK to clear all the tombstones. - // We can't clear all tombstones while there alive entries because such cleaning breaks lookup: we treat + // We can't clear all tombstones while alive entries exist because such a cleaning breaks lookup: we treat // free slots and tombstones differently during the probing -- continue to probe over tombstones, but stop - // on free slots. Converting tombstone to free slot could stop probing earlier than it should stop, thus + // on free slots. Converting tombstone to free slot could stop the probing earlier than it should stop, thus // making some existing entries unreachable. // But if there are no alive entries anymore -- we finally _can_ clear everything without breaking anything! @@ -1036,7 +1047,7 @@ public class ExtendibleHashMap implements DurableIntToMultiIntMap, Unmappable { //Table must be resized well before such a condition occurs! throw new AssertionError( - "Table is full: all " + capacity + " items were traversed, but no free slot found" + + "Table is full: all " + capacity + " items were traversed, but no free slot found " + "table.aliveEntries: " + table.aliveEntriesCount() + ", table: " + table ); } @@ -1180,4 +1191,27 @@ public class ExtendibleHashMap implements DurableIntToMultiIntMap, Unmappable { } } } + + private record BufferSourceOverMMappedFileStorage(@NotNull MMappedFileStorage storage) implements BufferSource { + @Override + public @NotNull ByteBuffer slice(long offsetInFile, + int length) throws IOException { + ByteBuffer buffer = storage.pageByOffset(offsetInFile).rawPageBuffer(); + int offsetInPage = storage.toOffsetInPage(offsetInFile); + return buffer.slice(offsetInPage, length) + .order(buffer.order()); + } + + @Override + public int getInt(long offsetInFile) throws IOException { + ByteBuffer buffer = storage.pageByOffset(offsetInFile).rawPageBuffer(); + int offsetInPage = storage.toOffsetInPage(offsetInFile); + return buffer.getInt(offsetInPage); + } + + @Override + public String toString() { + return "BufferSourceOverMMappedFileStorage{" + storage + '}'; + } + } }