[core] IJPL-193: moving experimental Enumerator impls into dev.enumerator package

+ new DurableEnumerator implementation prototyped

GitOrigin-RevId: 9b2937926d4d48d63ee37c438764df8189d2700c
This commit is contained in:
Ruslan Cheremin
2023-08-22 12:40:57 +02:00
committed by intellij-monorepo-bot
parent 3fabbd90c4
commit e06cf98d6a
11 changed files with 462 additions and 10 deletions

View File

@@ -6,7 +6,7 @@ import com.intellij.openapi.application.ApplicationManager;
import com.intellij.openapi.diagnostic.Logger;
import com.intellij.openapi.util.NotNullLazyValue;
import com.intellij.openapi.util.io.FileUtil;
import com.intellij.openapi.vfs.newvfs.persistent.dev.DurableStringEnumerator;
import com.intellij.openapi.vfs.newvfs.persistent.dev.enumerator.DurableStringEnumerator;
import com.intellij.openapi.vfs.newvfs.persistent.dev.blobstorage.LargeSizeStreamlinedBlobStorage;
import com.intellij.openapi.vfs.newvfs.persistent.dev.blobstorage.SpaceAllocationStrategy;
import com.intellij.openapi.vfs.newvfs.persistent.dev.blobstorage.StreamlinedBlobStorage;

View File

@@ -82,7 +82,7 @@ public class InvertedFilenameHashBasedIndex {
*/
@ApiStatus.Internal
public static class Int2IntMultimap {
protected static final int NO_VALUE = 0;
public static final int NO_VALUE = 0;
private final float loadFactor;

View File

@@ -0,0 +1,164 @@
// Copyright 2000-2023 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license.
package com.intellij.openapi.vfs.newvfs.persistent.dev.enumerator;
import com.intellij.openapi.vfs.newvfs.persistent.PersistentFSRecordsLockFreeOverMMappedFile.MMappedFileStorage;
import com.intellij.openapi.vfs.newvfs.persistent.dev.appendonlylog.AppendOnlyLog;
import com.intellij.openapi.vfs.newvfs.persistent.dev.appendonlylog.AppendOnlyLogOverMMappedFile;
import com.intellij.util.Processor;
import com.intellij.util.io.ScannableDataEnumeratorEx;
import com.intellij.util.io.VersionUpdatedException;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import java.io.Closeable;
import java.io.Flushable;
import java.io.IOException;
import java.nio.file.Path;
import java.util.function.Supplier;
/**
* Persistent enumerator for objects.
* 'Durable' is to separate it from {@link com.intellij.util.io.PersistentEnumerator}, which is conceptually
* the same, but right now tightly bounded to BTree-based implementation.
* <p>
* Implementation uses append-only log to store objects, and some (pluggable) Map[object.hash -> id*].
*/
public class DurableEnumerator<K> implements ScannableDataEnumeratorEx<K>, Flushable, Closeable {
public static final int DATA_FORMAT_VERSION = 1;
public static final int PAGE_SIZE = 8 << 20;
private final AppendOnlyLog keysLog;
private final @NotNull KeyDescriptorEx<K> keyDescriptor;
//MAYBE RC: we actually don't need _durable_ map here. We could go with
// 1) in-memory map, transient & re-populated from log on each start
// 2) swappable in-memory/on-disk map, there on-disk part is transient and
// map is re-populated from log on each start
// 3) on-disk map, durable between restarts re-populated from log only on
// corruption
private final @NotNull IntToMultiIntMap keyHashToId;
public static <K> DurableEnumerator<K> open(@NotNull Path storagePath,
@NotNull KeyDescriptorEx<K> keyDescriptor,
@NotNull Supplier<IntToMultiIntMap> mapFactory) throws IOException {
AppendOnlyLog appendOnlyLog = openLog(storagePath);
return new DurableEnumerator<>(
keyDescriptor,
appendOnlyLog,
mapFactory
);
}
public DurableEnumerator(@NotNull KeyDescriptorEx<K> keyDescriptor,
@NotNull AppendOnlyLog appendOnlyLog,
@NotNull Supplier<IntToMultiIntMap> mapFactory) throws IOException {
this.keyDescriptor = keyDescriptor;
this.keysLog = appendOnlyLog;
this.keyHashToId = mapFactory.get();
//MAYBE RC: Could be filled async -- to not delay initialization
//MAYBE RC: Extract this loading from ctor? I.e. define that map should already be populated, same as keysLog is.
// This way sync/async loading would be a property of factory(-ies), not ctor.
this.keysLog.forEachRecord((logId, buffer) -> {
K key = this.keyDescriptor.read(buffer);
int keyHash = this.keyDescriptor.hashCodeOf(key);
int id = logIdToEnumeratorId(logId);
keyHashToId.put(keyHash, id);
return true;
});
}
private static @NotNull AppendOnlyLog openLog(@NotNull Path storagePath) throws IOException {
AppendOnlyLogOverMMappedFile keysLog = new AppendOnlyLogOverMMappedFile(
new MMappedFileStorage(storagePath, PAGE_SIZE)
);
int dataFormatVersion = keysLog.getDataVersion();
if (dataFormatVersion == 0) {//FIXME RC: check log is empty for this branch
keysLog.setDataVersion(DATA_FORMAT_VERSION);
}
else if (dataFormatVersion != DATA_FORMAT_VERSION) {
keysLog.close();
throw new VersionUpdatedException(storagePath, DATA_FORMAT_VERSION, dataFormatVersion);
}
return keysLog;
}
@Override
public void flush() throws IOException {
keysLog.flush(true);
keyHashToId.flush();
}
@Override
public void close() throws IOException {
keysLog.close();
keyHashToId.close();
}
@Override
public int enumerate(@Nullable K key) throws IOException {
if (key == null) {
return NULL_ID;
}
return lookupOrCreateIdForKey(key);
}
@Override
public int tryEnumerate(@Nullable K key) throws IOException {
if (key == null) {
return NULL_ID;
}
return lookupIdForKey(key);
}
@Override
public @Nullable K valueOf(int keyId) throws IOException {
if (keyId == NULL_ID) {
return null;
}
return keysLog.read(keyId, keyDescriptor::read);
}
@Override
public boolean processAllDataObjects(@NotNull Processor<? super K> processor) throws IOException {
return keysLog.forEachRecord((recordId, buffer) -> {
K key = keyDescriptor.read(buffer);
return processor.process(key);
});
}
/**
* append-log identifies records by _long_ id, while enumerator API uses _int_ ids -- the method does the
* conversion
*/
private static int logIdToEnumeratorId(long logRecordId) {
return Math.toIntExact(logRecordId);
}
private int lookupIdForKey(@NotNull K key) throws IOException {
int keyHash = keyDescriptor.hashCodeOf(key);
return keyHashToId.lookup(keyHash, candidateId -> {
K candidateKey = keysLog.read(candidateId, keyDescriptor::read);
return keyDescriptor.areEqual(candidateKey, key);
});
}
private int lookupOrCreateIdForKey(@NotNull K key) throws IOException {
int keyHash = keyDescriptor.hashCodeOf(key);
return keyHashToId.lookupOrInsert(
keyHash,
candidateId -> {
K candidateKey = keysLog.read(candidateId, keyDescriptor::read);
return keyDescriptor.areEqual(candidateKey, key);
},
_keyHash_ -> {
long logRecordId = keyDescriptor.saveToLog(key, keysLog);
return logIdToEnumeratorId(logRecordId);
});
}
}

View File

@@ -1,5 +1,5 @@
// Copyright 2000-2023 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license.
package com.intellij.openapi.vfs.newvfs.persistent.dev;
package com.intellij.openapi.vfs.newvfs.persistent.dev.enumerator;
import com.intellij.openapi.Forceable;
import com.intellij.openapi.util.IntRef;
@@ -17,6 +17,7 @@ import org.jsoup.UncheckedIOException;
import java.io.Closeable;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.file.Path;
import static java.nio.charset.StandardCharsets.UTF_8;
@@ -46,7 +47,7 @@ public class DurableStringEnumerator implements ScannableDataEnumeratorEx<String
//fill the in-memory mapping:
//MAYBE RC: could be filled async -- to not delay initialization
this.valuesLog.forEachRecord((logId, buffer) -> {
String value = IOUtil.readString(buffer);
String value = readString(buffer);
int id = Math.toIntExact(logId);
int valueHash = hashOf(value);
valueHashToId.put(valueHash, id);
@@ -138,7 +139,7 @@ public class DurableStringEnumerator implements ScannableDataEnumeratorEx<String
@Override
public boolean processAllDataObjects(@NotNull Processor<? super String> processor) throws IOException {
return valuesLog.forEachRecord((recordId, buffer) -> {
String value = IOUtil.readString(buffer);
String value = readString(buffer);
return processor.process(value);
});
}
@@ -154,7 +155,7 @@ public class DurableStringEnumerator implements ScannableDataEnumeratorEx<String
IntRef foundIdRef = new IntRef(NULL_ID);
valueHashToId.lookup(hash, candidateId -> {
try {
String candidateValue = valuesLog.read(candidateId, IOUtil::readString);
String candidateValue = valuesLog.read(candidateId, DurableStringEnumerator::readString);
if (candidateValue.equals(value)) {
foundIdRef.set(candidateId);
return false;//stop
@@ -167,4 +168,11 @@ public class DurableStringEnumerator implements ScannableDataEnumeratorEx<String
});
return foundIdRef.get();
}
private static @NotNull String readString(@NotNull ByteBuffer buffer) {
//MAYBE RC: instead of converting string bytes to/from UTF8 -- maybe just store String fields as-is?
// i.e. access private .value and .coder fields, and write/read their values? -- this allows
// to bypass 1 array copy, and probably also a character encoding/decoding
return IOUtil.readString(buffer);
}
}

View File

@@ -0,0 +1,64 @@
// Copyright 2000-2023 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license.
package com.intellij.openapi.vfs.newvfs.persistent.dev.enumerator;
import com.intellij.util.io.DataEnumeratorEx;
import org.jetbrains.annotations.NotNull;
import java.io.Closeable;
import java.io.Flushable;
import java.io.IOException;
/**
* Map[int -> int*].
* This is a building block for {@link DurableEnumerator}, which is why API may look quite specialized.
* <p>
* Threading: in general, implementations of this interface must provide at least thread-safety -- e.g.
* {@link #lookupOrInsert(int, ValueAcceptor, ValueCreator)} expected to be atomic, i.e. {@link ValueCreator#newValueForKey(int)}
* invoked only once for a key. But the concurrency level is up to the implementation -- it is OK to
* just have everything guarded by a single lock.
* <p>
* Durability is optional: the map implements {@link Closeable} and {@link Flushable}, but it doesn't
* _require_ to be durable -- empty flush/close methods are OK.
*/
public interface IntToMultiIntMap extends Flushable, Closeable {
int NULL_ID = DataEnumeratorEx.NULL_ID;
void put(int key,
int value) throws IOException;
/**
* Method lookups values for a key, and gets them tested by valuesAcceptor -- and return the first value
* accepted by valuesAcceptor. If no values were found, or none were accepted -- returns {@link #NULL_ID}.
*
* @return first value for a key which was accepted by valuesProcessor -- or {@link #NULL_ID} if no
* values were found, or none of values found were accepted by valuesAcceptor
*/
int lookup(int key,
@NotNull ValueAcceptor valuesAcceptor) throws IOException;
/**
* Method behaves the same way as {@link #lookup(int, ValueAcceptor)}, but if no values were found/none were
* accepted -- method calls {@link ValueCreator#newValueForKey(int)}, inserts returned value into the map,
* and returns it. Method never return {@link #NULL_ID}.
*
* @return value for a key which was accepted by valuesProcessor. If no values were found,
* {@link ValueCreator#newValueForKey(int)} is called, and newly generated value inserted into the map,
* and returned. Method should never return {@link #NULL_ID}
*/
int lookupOrInsert(int key,
@NotNull ValueAcceptor valuesAcceptor,
@NotNull ValueCreator valueCreator) throws IOException;
@FunctionalInterface
interface ValueAcceptor {
boolean accept(int value) throws IOException;
}
@FunctionalInterface
interface ValueCreator {
/** Method should never return {@link #NULL_ID} */
int newValueForKey(int key) throws IOException;
}
}

View File

@@ -0,0 +1,43 @@
// Copyright 2000-2023 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license.
package com.intellij.openapi.vfs.newvfs.persistent.dev.enumerator;
import com.intellij.openapi.vfs.newvfs.persistent.dev.appendonlylog.AppendOnlyLog;
import com.intellij.util.io.KeyDescriptor;
import org.jetbrains.annotations.NotNull;
import java.io.IOException;
import java.nio.ByteBuffer;
/**
* Full analog of {@link KeyDescriptor}, but with {@link ByteBuffer} instead of {@link java.io.InputStream} and
* {@link java.io.OutputStream}
*/
public interface KeyDescriptorEx<K> {
int hashCodeOf(K value);
boolean areEqual(K key1,
K key2);
K read(@NotNull ByteBuffer input) throws IOException;
//TODO RC: this is quite troubling API choice: we need to know the size of key binary
// representation to allocate room for the record in append-log. But for many
// types K the only way to know the size is to actually serialize the object
// -- hence API basically forces to do it twice: first time to assess the size,
// second time to actually write the object into ByteBuffer. This is dummy.
default long saveToLog(K key,
@NotNull AppendOnlyLog log) throws IOException {
int recordSize = sizeOfSerialized(key);
return log.append(buffer -> {
save(buffer, key);
return buffer;
}, recordSize);
}
int sizeOfSerialized(K key) throws IOException;
void save(@NotNull ByteBuffer output,
K key) throws IOException;
}

View File

@@ -0,0 +1,90 @@
// Copyright 2000-2023 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license.
package com.intellij.openapi.vfs.newvfs.persistent.dev.enumerator;
import com.intellij.openapi.util.IntRef;
import com.intellij.openapi.vfs.newvfs.persistent.dev.InvertedFilenameHashBasedIndex.Int2IntMultimap;
import org.jetbrains.annotations.NotNull;
import java.io.IOException;
import java.io.UncheckedIOException;
/**
* Implements {@link IntToMultiIntMap} on top of {@link Int2IntMultimap}
* Thread-safe but not concurrent -- all operations are just guarded by 'this' lock
*/
public class NonParallelNonPersistentIntToMultiIntMap implements IntToMultiIntMap {
private final Int2IntMultimap multimap = new Int2IntMultimap();
@Override
public synchronized void put(int key,
int value) throws IOException {
multimap.put(adjustKey(key), value);
}
@Override
public synchronized int lookup(int key,
@NotNull ValueAcceptor valuesAcceptor) throws IOException {
IntRef returnValue = new IntRef(NULL_ID);
multimap.lookup(adjustKey(key), value -> {
try {
if (valuesAcceptor.accept(value)) {
returnValue.set(value);
return false;
}
return true;
}
catch (IOException e) {
throw new UncheckedIOException(e);
}
});
return returnValue.get();
}
@Override
public synchronized int lookupOrInsert(int key,
@NotNull ValueAcceptor valuesAcceptor,
@NotNull ValueCreator valueCreator) throws IOException {
IntRef returnValue = new IntRef(NULL_ID);
int adjustedKey = adjustKey(key);
multimap.lookup(adjustedKey, value -> {
try {
if (valuesAcceptor.accept(value)) {
returnValue.set(value);
return false;
}
return true;
}
catch (IOException e) {
throw new UncheckedIOException(e);
}
});
if (returnValue.get() != NULL_ID) {
return returnValue.get();
}
int newValue = valueCreator.newValueForKey(key);
multimap.put(adjustedKey, newValue);
return newValue;
}
@Override
public synchronized void flush() throws IOException {
//nothing
}
@Override
public synchronized void close() throws IOException {
//nothing
}
private static int adjustKey(int key) {
if (key == Int2IntMultimap.NO_VALUE) {
//Int2IntMultimap doesn't allow 0 keys/values, hence replace 0 key with just any value!=0. Key doesn't
// identify value uniquely anyway, hence this replacement just adds another collision -- basically,
// we replaced original Key.hash with our own hash, which avoids 0 at the cost of slightly higher collision
// chances
return -1;// any value!=0 will do
}
return key;
}
}

View File

@@ -1,5 +1,5 @@
// Copyright 2000-2023 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license.
package com.intellij.openapi.vfs.newvfs.persistent.dev;
package com.intellij.openapi.vfs.newvfs.persistent.dev.enumerator;
import com.intellij.openapi.Forceable;
import com.intellij.openapi.util.ThrowableComputable;

View File

@@ -0,0 +1,81 @@
// Copyright 2000-2023 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license.
package com.intellij.openapi.vfs.newvfs.persistent.dev.enumerator;
import com.intellij.openapi.vfs.newvfs.persistent.dev.StringEnumeratorTestBase;
import com.intellij.openapi.vfs.newvfs.persistent.dev.appendonlylog.AppendOnlyLog;
import com.intellij.util.io.IOUtil;
import org.jetbrains.annotations.NotNull;
import org.junit.Test;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.file.Path;
import static com.intellij.util.io.DataEnumeratorEx.NULL_ID;
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
public class DurableEnumeratorOfStringsTest extends StringEnumeratorTestBase<DurableEnumerator<String>> {
@Test
public void nullValue_EnumeratedTo_NULL_ID() throws IOException {
int id = enumerator.enumerate(null);
assertEquals(
"null value enumerated to NULL_ID",
NULL_ID,
id
);
}
@Test
public void valueOf_NULL_ID_IsNull() throws IOException {
String value = enumerator.valueOf(NULL_ID);
assertNull(
"valueOf(NULL_ID(=0)) must be null",
value
);
}
@Override
protected DurableEnumerator<String> openEnumerator(@NotNull Path storagePath) throws IOException {
return DurableEnumerator.open(
storagePath,
new KeyDescriptorEx<String>() {
@Override
public int hashCodeOf(String value) {
return value.hashCode();
}
@Override
public boolean areEqual(String key1, String key2) {
return key1.equals(key2);
}
@Override
public String read(@NotNull ByteBuffer input) throws IOException {
return IOUtil.readString(input);
}
@Override
public long saveToLog(String key,
@NotNull AppendOnlyLog log) throws IOException {
byte[] stringBytes = key.getBytes(UTF_8);
return log.append(stringBytes);
}
@Override
public int sizeOfSerialized(String key) throws IOException {
throw new UnsupportedOperationException("Method not implemented");
}
@Override
public void save(@NotNull ByteBuffer output,
String key) throws IOException {
throw new UnsupportedOperationException("Method not implemented");
}
},
NonParallelNonPersistentIntToMultiIntMap::new
);
}
}

View File

@@ -1,6 +1,7 @@
// Copyright 2000-2023 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license.
package com.intellij.openapi.vfs.newvfs.persistent.dev;
package com.intellij.openapi.vfs.newvfs.persistent.dev.enumerator;
import com.intellij.openapi.vfs.newvfs.persistent.dev.StringEnumeratorTestBase;
import org.jetbrains.annotations.NotNull;
import org.junit.Test;

View File

@@ -1,6 +1,7 @@
// Copyright 2000-2022 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license.
package com.intellij.openapi.vfs.newvfs.persistent.dev;
// Copyright 2000-2023 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license.
package com.intellij.openapi.vfs.newvfs.persistent.dev.enumerator;
import com.intellij.openapi.vfs.newvfs.persistent.dev.StringEnumeratorTestBase;
import com.intellij.util.io.IOUtil;
import com.intellij.util.io.ResizeableMappedFile;
import org.jetbrains.annotations.NotNull;