Extract ContentHashEnumerator. Hide hashes evaluation in DigestUtil.

GitOrigin-RevId: b57fee1e15bc4c87019318ec4f5cd3eb93b23f62
This commit is contained in:
Sergey Patrikeev
2019-12-23 17:25:44 +03:00
committed by intellij-monorepo-bot
parent 6ea628e684
commit d29c5309e1
9 changed files with 172 additions and 152 deletions

View File

@@ -23,7 +23,6 @@ import com.intellij.openapi.util.text.StringUtil;
import com.intellij.openapi.vfs.VfsUtilCore;
import com.intellij.openapi.vfs.VirtualFile;
import com.intellij.openapi.vfs.VirtualFileVisitor;
import com.intellij.openapi.vfs.newvfs.persistent.ContentHashesUtil;
import com.intellij.psi.SingleRootFileViewProvider;
import com.intellij.psi.impl.JavaSimplePropertyIndex;
import com.intellij.psi.impl.cache.impl.id.IdIndex;
@@ -34,6 +33,7 @@ import com.intellij.psi.impl.java.stubs.index.JavaAutoModuleNameIndex;
import com.intellij.psi.impl.search.JavaNullMethodArgumentIndex;
import com.intellij.psi.stubs.StubUpdatingIndex;
import com.intellij.util.containers.ContainerUtil;
import com.intellij.util.hash.ContentHashEnumerator;
import com.intellij.util.indexing.FileBasedIndexExtension;
import com.intellij.util.indexing.ID;
import com.intellij.util.indexing.IndexableSetContributor;
@@ -213,7 +213,7 @@ public class DumpIndexAction extends AnAction {
@NotNull Collection<HashBasedIndexGenerator<?, ?>> generators,
@NotNull Path chunkOut) {
try {
ContentHashesUtil.HashEnumerator hashEnumerator = new ContentHashesUtil.HashEnumerator(chunkOut.resolve("hashes"));
ContentHashEnumerator hashEnumerator = new ContentHashEnumerator(chunkOut.resolve("hashes"));
try {
for (HashBasedIndexGenerator<?, ?> generator : generators) {
generator.openIndex();

View File

@@ -0,0 +1,94 @@
// Copyright 2000-2019 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license that can be found in the LICENSE file.
package com.intellij.util.hash;
import com.intellij.util.io.DifferentSerializableBytesImplyNonEqualityPolicy;
import com.intellij.util.io.KeyDescriptor;
import com.intellij.util.io.PagedFileStorage;
import com.intellij.util.io.PersistentBTreeEnumerator;
import org.jetbrains.annotations.ApiStatus;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.nio.file.Path;
import java.util.Arrays;
@ApiStatus.Internal
public final class ContentHashEnumerator extends PersistentBTreeEnumerator<byte[]> {
private static final int SIGNATURE_LENGTH = 20;
public ContentHashEnumerator(@NotNull Path contentsHashesFile) throws IOException {
this(contentsHashesFile, null);
}
public ContentHashEnumerator(@NotNull Path contentsHashesFile,
@Nullable PagedFileStorage.StorageLockContext storageLockContext) throws IOException {
this(contentsHashesFile, new ContentHashesDescriptor(), 64 * 1024, storageLockContext);
}
private ContentHashEnumerator(@NotNull Path file,
@NotNull KeyDescriptor<byte[]> dataDescriptor,
int initialSize,
@Nullable PagedFileStorage.StorageLockContext lockContext) throws IOException {
super(file, dataDescriptor, initialSize, lockContext);
LOG.assertTrue(dataDescriptor instanceof DifferentSerializableBytesImplyNonEqualityPolicy);
}
@Override
public int enumerate(byte[] value) throws IOException {
LOG.assertTrue(SIGNATURE_LENGTH == value.length);
return super.enumerate(value);
}
@Override
protected int doWriteData(byte[] value) throws IOException {
return super.doWriteData(value) / SIGNATURE_LENGTH;
}
@Override
public int getLargestId() {
return super.getLargestId() / SIGNATURE_LENGTH;
}
@Override
protected boolean isKeyAtIndex(byte[] value, int idx) throws IOException {
return super.isKeyAtIndex(value, addrToIndex(indexToAddr(idx) * SIGNATURE_LENGTH));
}
@Override
public byte[] valueOf(int idx) throws IOException {
return super.valueOf(addrToIndex(indexToAddr(idx) * SIGNATURE_LENGTH));
}
private static class ContentHashesDescriptor implements KeyDescriptor<byte[]>, DifferentSerializableBytesImplyNonEqualityPolicy {
@Override
public void save(@NotNull DataOutput out, byte[] value) throws IOException {
out.write(value);
}
@Override
public byte[] read(@NotNull DataInput in) throws IOException {
byte[] b = new byte[SIGNATURE_LENGTH];
in.readFully(b);
return b;
}
@Override
public int getHashCode(byte[] value) {
int hash = 0; // take first 4 bytes, this should be good enough hash given we reference git revisions with 7-8 hex digits
for (int i = 0; i < 4; ++i) {
hash = (hash << 8) + (value[i] & 0xFF);
}
return hash;
}
@Override
public boolean isEqual(byte[] val1, byte[] val2) {
return Arrays.equals(val1, val2);
}
}
}

View File

@@ -6,8 +6,8 @@ import com.intellij.openapi.diagnostic.Logger;
import com.intellij.openapi.util.Disposer;
import com.intellij.openapi.util.Key;
import com.intellij.openapi.util.ShutDownTracker;
import com.intellij.openapi.vfs.newvfs.persistent.ContentHashesUtil;
import com.intellij.util.containers.ContainerUtil;
import com.intellij.util.hash.ContentHashEnumerator;
import com.intellij.util.indexing.*;
import com.intellij.util.indexing.impl.IndexStorage;
import com.intellij.util.indexing.snapshot.IndexedHashesSupport;
@@ -29,7 +29,7 @@ public class FileContentHashIndexExtension extends FileBasedIndexExtension<Long,
public static final ID<Long, Void> HASH_INDEX_ID = ID.create("file.content.hash.index");
@NotNull
private final ContentHashesUtil.HashEnumerator[] myEnumerators;
private final ContentHashEnumerator[] myEnumerators;
@NotNull
public static FileContentHashIndexExtension create(@NotNull Path[] enumeratorDirs, @NotNull Disposable parent) throws IOException {
@@ -41,9 +41,9 @@ public class FileContentHashIndexExtension extends FileBasedIndexExtension<Long,
private FileContentHashIndexExtension(@NotNull Path[] enumeratorDirs) throws IOException {
IOException[] exception = {null};
myEnumerators = ContainerUtil.map2Array(enumeratorDirs, ContentHashesUtil.HashEnumerator.class, d -> {
myEnumerators = ContainerUtil.map2Array(enumeratorDirs, ContentHashEnumerator.class, d -> {
try {
return new ContentHashesUtil.HashEnumerator(d.getParent().resolve("hashes"));
return new ContentHashEnumerator(d.getParent().resolve("hashes"));
}
catch (IOException e) {
exception[0] = e;
@@ -93,7 +93,7 @@ public class FileContentHashIndexExtension extends FileBasedIndexExtension<Long,
private Long tryEnumerate(byte[] hash) throws IOException {
for (int i = 0; i < myEnumerators.length; i++) {
ContentHashesUtil.HashEnumerator enumerator = myEnumerators[i];
ContentHashEnumerator enumerator = myEnumerators[i];
//noinspection SynchronizationOnLocalVariableOrMethodParameter
synchronized (enumerator) {
int id = Math.abs(enumerator.tryEnumerate(hash));
@@ -166,7 +166,7 @@ public class FileContentHashIndexExtension extends FileBasedIndexExtension<Long,
}
private void closeEnumerator() {
for (ContentHashesUtil.HashEnumerator enumerator : myEnumerators) {
for (ContentHashEnumerator enumerator : myEnumerators) {
synchronized (enumerator) {
if (enumerator.isClosed()) return;
try {

View File

@@ -5,7 +5,7 @@ import com.intellij.openapi.fileTypes.FileType;
import com.intellij.openapi.project.Project;
import com.intellij.openapi.util.text.StringUtil;
import com.intellij.openapi.vfs.VirtualFile;
import com.intellij.openapi.vfs.newvfs.persistent.ContentHashesUtil;
import com.intellij.util.hash.ContentHashEnumerator;
import com.intellij.util.indexing.*;
import com.intellij.util.indexing.impl.IndexStorage;
import com.intellij.util.indexing.impl.InputData;
@@ -66,10 +66,10 @@ public class HashBasedIndexGenerator<K, V> {
public void openIndex() throws IOException {
IndexStorage<K, V> indexStorage = new MapIndexStorage<K, V>(myStorageFile,
myExtension.getKeyDescriptor(),
myExtension.getValueExternalizer(),
myExtension.getCacheSize(),
myExtension.keyIsUniqueForIndexedFile()) {
myExtension.getKeyDescriptor(),
myExtension.getValueExternalizer(),
myExtension.getCacheSize(),
myExtension.keyIsUniqueForIndexedFile()) {
@Override
protected void checkCanceled() {
//ignore
@@ -132,7 +132,7 @@ public class HashBasedIndexGenerator<K, V> {
}
}
public void indexFile(@NotNull VirtualFile f, @NotNull Project project, @NotNull ContentHashesUtil.HashEnumerator hashEnumerator) {
public void indexFile(@NotNull VirtualFile f, @NotNull Project project, @NotNull ContentHashEnumerator hashEnumerator) {
if (!myInputFilter.acceptInput(f)) {
return;
}

View File

@@ -6,11 +6,17 @@ import com.intellij.openapi.editor.Document;
import com.intellij.openapi.fileEditor.FileDocumentManager;
import com.intellij.openapi.fileTypes.FileType;
import com.intellij.openapi.util.ShutDownTracker;
import com.intellij.openapi.vfs.newvfs.persistent.*;
import com.intellij.openapi.vfs.newvfs.persistent.FlushingDaemon;
import com.intellij.openapi.vfs.newvfs.persistent.PersistentFS;
import com.intellij.openapi.vfs.newvfs.persistent.PersistentFSImpl;
import com.intellij.psi.PsiDocumentManager;
import com.intellij.psi.PsiFile;
import com.intellij.util.ArrayUtilRt;
import com.intellij.util.hash.ContentHashEnumerator;
import com.intellij.util.indexing.FileContent;
import com.intellij.util.indexing.FileContentImpl;
import com.intellij.util.indexing.IndexInfrastructure;
import com.intellij.util.io.DigestUtil;
import com.intellij.util.io.IOUtil;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
@@ -18,15 +24,19 @@ import org.jetbrains.annotations.Nullable;
import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
/**
* @author Maxim.Mossienko
*/
public class IndexedHashesSupport {
private static final Logger LOG = Logger.getInstance("#" + IndexedHashesSupport.class.getPackage().getName());
private static volatile ContentHashesUtil.HashEnumerator ourHashesWithFileType;
private static final MessageDigest CONTENT_HASH_WITH_FILE_TYPE_DIGEST = DigestUtil.sha1();
private static volatile ContentHashEnumerator ourHashesWithFileType;
public static void initContentHashesEnumerator() throws IOException {
if (ourHashesWithFileType != null) return;
@@ -35,7 +45,7 @@ public class IndexedHashesSupport {
if (ourHashesWithFileType != null) return;
final File hashEnumeratorFile = new File(IndexInfrastructure.getPersistentIndexRoot(), "hashesWithFileType");
try {
ContentHashesUtil.HashEnumerator hashEnumerator = new ContentHashesUtil.HashEnumerator(hashEnumeratorFile.toPath());
ContentHashEnumerator hashEnumerator = new ContentHashEnumerator(hashEnumeratorFile.toPath());
FlushingDaemon.everyFiveSeconds(IndexedHashesSupport::flushContentHashes);
ShutDownTracker.getInstance().registerShutdownTask(IndexedHashesSupport::flushContentHashes);
ourHashesWithFileType = hashEnumerator;
@@ -83,16 +93,15 @@ public class IndexedHashesSupport {
}
if (contentHash == null) {
byte[] bytes = content.getContent();
contentHash = calculateFileContentHash(bytes);
contentHash = calculateContentHash(content);
// todo store content hash in FS
}
return mergeIndexedHash(contentHash, binary ? null : content.getCharset(), content.getFileType());
}
private static byte[] calculateFileContentHash(byte[] bytes) {
return ContentHashesUtil.calculateContentHash(bytes, 0, bytes.length);
private static byte[] calculateContentHash(@NotNull FileContent content) {
return DigestUtil.calculateContentHash(CONTENT_HASH_WITH_FILE_TYPE_DIGEST, content.getContent());
}
@Nullable
@@ -107,7 +116,7 @@ public class IndexedHashesSupport {
if (file != null) {
Charset charset = content.getCharset();
FileType fileType = content.getFileType();
return mergeIndexedHash(calculateFileContentHash(file.getText().getBytes(charset)), charset, fileType);
return mergeIndexedHash(DigestUtil.calculateContentHash(CONTENT_HASH_WITH_FILE_TYPE_DIGEST, file.getText().getBytes(charset)), charset, fileType);
}
}
}
@@ -118,18 +127,10 @@ public class IndexedHashesSupport {
private static byte[] mergeIndexedHash(@NotNull byte[] binaryContentHash,
@Nullable Charset charsetOrNullForBinary,
@NotNull FileType fileType) {
MessageDigest messageDigest = ContentHashesUtil.HASHER_CACHE.getValue();
LOG.assertTrue(binaryContentHash.length == ContentHashesUtil.SIGNATURE_LENGTH,
"binaryContentHash should be a pre-calculated sha1 signature");
messageDigest.reset();
messageDigest.update(binaryContentHash);
messageDigest.update(fileType.getName().getBytes(ContentHashesUtil.HASHER_CHARSET));
if (charsetOrNullForBinary != null) {
messageDigest.update(charsetOrNullForBinary.name().getBytes(ContentHashesUtil.HASHER_CHARSET));
}
return messageDigest.digest();
byte[] fileTypeBytes = fileType.getName().getBytes(StandardCharsets.UTF_8);
byte[] charsetBytes = charsetOrNullForBinary != null
? charsetOrNullForBinary.name().getBytes(StandardCharsets.UTF_8)
: ArrayUtilRt.EMPTY_BYTE_ARRAY;
return DigestUtil.calculateMergedHash(CONTENT_HASH_WITH_FILE_TYPE_DIGEST, new byte[][]{binaryContentHash, fileTypeBytes, charsetBytes});
}
}

View File

@@ -13,8 +13,8 @@ import com.intellij.util.indexing.impl.DebugAssertions;
import com.intellij.util.indexing.impl.InputData;
import com.intellij.util.indexing.impl.forward.AbstractForwardIndexAccessor;
import com.intellij.util.indexing.impl.forward.PersistentMapBasedForwardIndex;
import com.intellij.util.io.*;
import com.intellij.util.io.DataOutputStream;
import com.intellij.util.io.*;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;

View File

@@ -1,110 +0,0 @@
// Copyright 2000-2019 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license that can be found in the LICENSE file.
package com.intellij.openapi.vfs.newvfs.persistent;
import com.intellij.openapi.util.ThreadLocalCachedValue;
import com.intellij.util.io.*;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.security.MessageDigest;
import java.util.Arrays;
public class ContentHashesUtil {
public static final ThreadLocalCachedValue<MessageDigest> HASHER_CACHE = new ThreadLocalCachedValue<MessageDigest>() {
@NotNull
@Override
public MessageDigest create() {
return DigestUtil.sha1();
}
@Override
protected void init(@NotNull MessageDigest value) {
value.reset();
}
};
public static final int SIGNATURE_LENGTH = 20;
public static final Charset HASHER_CHARSET = StandardCharsets.UTF_8;
public static byte[] calculateContentHash(byte[] bytes, int offset, int length) {
MessageDigest digest = HASHER_CACHE.getValue();
digest.reset();
digest.update(String.valueOf(length).getBytes(HASHER_CHARSET));
digest.update("\0".getBytes(HASHER_CHARSET));
digest.update(bytes, offset, length);
return digest.digest();
}
public static class HashEnumerator extends PersistentBTreeEnumerator<byte[]> {
public HashEnumerator(@NotNull Path contentsHashesFile) throws IOException {
this(contentsHashesFile, null);
}
public HashEnumerator(@NotNull Path contentsHashesFile, @Nullable PagedFileStorage.StorageLockContext storageLockContext) throws IOException {
this(contentsHashesFile, new ContentHashesDescriptor(), 64 * 1024, storageLockContext);
}
private HashEnumerator(@NotNull Path file,
@NotNull KeyDescriptor<byte[]> dataDescriptor,
int initialSize,
@Nullable PagedFileStorage.StorageLockContext lockContext) throws IOException {
super(file, dataDescriptor, initialSize, lockContext);
LOG.assertTrue(dataDescriptor instanceof DifferentSerializableBytesImplyNonEqualityPolicy);
}
@Override
protected int doWriteData(byte[] value) throws IOException {
return super.doWriteData(value) / SIGNATURE_LENGTH;
}
@Override
public int getLargestId() {
return super.getLargestId() / SIGNATURE_LENGTH;
}
@Override
protected boolean isKeyAtIndex(byte[] value, int idx) throws IOException {
return super.isKeyAtIndex(value, addrToIndex(indexToAddr(idx) * SIGNATURE_LENGTH));
}
@Override
public byte[] valueOf(int idx) throws IOException {
return super.valueOf(addrToIndex(indexToAddr(idx) * SIGNATURE_LENGTH));
}
}
private static class ContentHashesDescriptor implements KeyDescriptor<byte[]>, DifferentSerializableBytesImplyNonEqualityPolicy {
@Override
public void save(@NotNull DataOutput out, byte[] value) throws IOException {
out.write(value);
}
@Override
public byte[] read(@NotNull DataInput in) throws IOException {
byte[] b = new byte[SIGNATURE_LENGTH];
in.readFully(b);
return b;
}
@Override
public int getHashCode(byte[] value) {
int hash = 0; // take first 4 bytes, this should be good enough hash given we reference git revisions with 7-8 hex digits
for (int i = 0; i < 4; ++i) {
hash = (hash << 8) + (value[i] & 0xFF);
}
return hash;
}
@Override
public boolean isEqual(byte[] val1, byte[] val2) {
return Arrays.equals(val1, val2);
}
}
}

View File

@@ -21,6 +21,7 @@ import com.intellij.util.*;
import com.intellij.util.concurrency.SequentialTaskExecutor;
import com.intellij.util.containers.ConcurrentIntObjectMap;
import com.intellij.util.containers.IntArrayList;
import com.intellij.util.hash.ContentHashEnumerator;
import com.intellij.util.io.DataOutputStream;
import com.intellij.util.io.*;
import com.intellij.util.io.storage.*;
@@ -158,7 +159,7 @@ public class FSRecords {
private static Storage myAttributes;
private static RefCountingStorage myContents;
private static ResizeableMappedFile myRecords;
private static PersistentBTreeEnumerator<byte[]> myContentHashesEnumerator;
private static ContentHashEnumerator myContentHashesEnumerator;
private static File myRootsFile;
private static final VfsDependentEnum<String> myAttributesList = new VfsDependentEnum<>("attrib", EnumeratorStringDescriptor.INSTANCE, 1);
private static final TIntArrayList myFreeRecords = new TIntArrayList();
@@ -276,7 +277,7 @@ public class FSRecords {
};
// sources usually zipped with 4x ratio
myContentHashesEnumerator = WE_HAVE_CONTENT_HASHES ? new ContentHashesUtil.HashEnumerator(contentsHashesFile.toPath(), storageLockContext) : null;
myContentHashesEnumerator = WE_HAVE_CONTENT_HASHES ? new ContentHashEnumerator(contentsHashesFile.toPath()) : null;
boolean aligned = PagedFileStorage.BUFFER_SIZE % RECORD_SIZE == 0;
if (!aligned) LOG.error("Buffer size " + PagedFileStorage.BUFFER_SIZE + " is not aligned for record size " + RECORD_SIZE);
@@ -557,7 +558,7 @@ public class FSRecords {
return records;
}
private static PersistentBTreeEnumerator<byte[]> getContentHashesEnumerator() {
private static ContentHashEnumerator getContentHashesEnumerator() {
return DbConnection.myContentHashesEnumerator;
}
@@ -1603,11 +1604,13 @@ public class FSRecords {
private static int contents;
private static int reuses;
private static final MessageDigest CONTENT_HASH_DIGEST = DigestUtil.sha1();
private static int findOrCreateContentRecord(byte[] bytes, int offset, int length) throws IOException {
assert WE_HAVE_CONTENT_HASHES;
long started = DUMP_STATISTICS ? System.nanoTime():0;
byte[] digest = ContentHashesUtil.calculateContentHash(bytes, offset, length);
byte[] contentHash = DigestUtil.calculateContentHash(CONTENT_HASH_DIGEST, bytes, offset, length);
long done = DUMP_STATISTICS ? System.nanoTime() - started : 0;
time += done;
@@ -1617,9 +1620,10 @@ public class FSRecords {
if (DUMP_STATISTICS && (contents & 0x3FFF) == 0) {
LOG.info("Contents:" + contents + " of " + totalContents + ", reuses:" + reuses + " of " + totalReuses + " for " + time / 1000000);
}
PersistentBTreeEnumerator<byte[]> hashesEnumerator = getContentHashesEnumerator();
ContentHashEnumerator hashesEnumerator = getContentHashesEnumerator();
final int largestId = hashesEnumerator.getLargestId();
int page = hashesEnumerator.enumerate(digest);
int page = hashesEnumerator.enumerate(contentHash);
if (page <= largestId) {
++reuses;

View File

@@ -20,6 +20,28 @@ object DigestUtil {
@JvmStatic
fun sha256() = getMessageDigest("SHA-256")
@JvmStatic
fun calculateContentHash(digest: MessageDigest, bytes: ByteArray): ByteArray =
calculateContentHash(digest, bytes, 0, bytes.size)
@JvmStatic
fun calculateContentHash(digest: MessageDigest, bytes: ByteArray, offset: Int, length: Int): ByteArray {
val cloned = cloneDigest(digest)
cloned.update(length.toString().toByteArray())
cloned.update("\u0000".toByteArray())
cloned.update(bytes, offset, length)
return cloned.digest()
}
@JvmStatic
fun calculateMergedHash(digest: MessageDigest, hashArrays: Array<ByteArray>): ByteArray {
val cloned = cloneDigest(digest)
for (bytes in hashArrays) {
cloned.update(bytes)
}
return cloned.digest()
}
// http://stackoverflow.com/a/41156 - shorter than UUID, but secure
fun randomToken(): String {
return BigInteger(130, random).toString(32)
@@ -28,4 +50,13 @@ object DigestUtil {
private fun getMessageDigest(algorithm: String): MessageDigest {
return MessageDigest.getInstance(algorithm, sunSecurityProvider)
}
private fun cloneDigest(digest: MessageDigest): MessageDigest = try {
val clone = digest.clone() as MessageDigest
clone.reset()
clone
}
catch (e: CloneNotSupportedException) {
throw IllegalArgumentException("Message digest is not cloneable: $digest")
}
}