[vfs] PY-77389: limit max file size to store in VFS cache

+ previously we limited max file size to cache _on_load -- but not on save, so if huge file is !readOnly, its saving could easily overflow VFSContentStorage -> now all ways to VFSContentStorage have same limits

(cherry picked from commit 0a5e639f35ae41901a7312d25e2e33be49aa4e97)


(cherry picked from commit 61ba8143c82cf1f32c7ae8d1d82e00505b7f91b0)

IJ-CR-150186

GitOrigin-RevId: c8161c5b36cdcbe0b441b5e85b4268f42d9854a6
This commit is contained in:
Ruslan Cheremin
2024-11-19 21:53:45 +01:00
committed by intellij-monorepo-bot
parent 329fa5cf88
commit baad7297be
4 changed files with 64 additions and 39 deletions

View File

@@ -1,32 +1,36 @@
// Copyright 2000-2020 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license that can be found in the LICENSE file.
// Copyright 2000-2024 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license.
package com.intellij.openapi.vfs;
import com.intellij.openapi.application.ApplicationManager;
import com.intellij.openapi.fileTypes.FileType;
import com.intellij.openapi.util.io.FileUtilRt;
import com.intellij.openapi.vfs.limits.FileSizeLimit;
import org.jetbrains.annotations.TestOnly;
public final class PersistentFSConstants {
@SuppressWarnings("deprecation")
/** Max file size to cache inside VFS */
public static final long FILE_LENGTH_TO_CACHE_THRESHOLD = FileSizeLimit.getFileLengthToCacheThreshold();
/**
* always in range [0, {@link #FILE_LENGTH_TO_CACHE_THRESHOLD}]
* Must always be in range [0, {@link #FILE_LENGTH_TO_CACHE_THRESHOLD}]
* <p>
* Currently, this is always true, because
* <pre>FILE_LENGTH_TO_CACHE_THRESHOLD = ... = max(20Mb, userFileSizeLimit, userContentLoadLimit)</pre>
* but could be changed in the future, hence .min(...) here is to ensure that.
*/
private static int ourMaxIntellisenseFileSize = Math.min(FileUtilRt.getUserFileSizeLimit(), (int)FILE_LENGTH_TO_CACHE_THRESHOLD);
/**
* @deprecated Prefer using {@link com.intellij.openapi.vfs.limits.FileSizeLimit#getIntellisenseLimit(FileType)}
*/
/** @deprecated Prefer using {@link com.intellij.openapi.vfs.limits.FileSizeLimit#getIntellisenseLimit(String)} */
@SuppressWarnings("DeprecatedIsStillUsed")
@Deprecated()
public static int getMaxIntellisenseFileSize() {
return ourMaxIntellisenseFileSize;
}
@TestOnly
public static void setMaxIntellisenseFileSize(int sizeInBytes) {
if (!ApplicationManager.getApplication().isUnitTestMode()) {
throw new IllegalStateException("cannot change max setMaxIntellisenseFileSize while running");
throw new IllegalStateException("maxIntellisenseFileSize could be changed only in tests");
}
ourMaxIntellisenseFileSize = sizeInBytes;
}

View File

@@ -21,6 +21,7 @@ interface FileSizeLimit {
private val limitsByExtension: AtomicReference<Map<String, ExtensionSizeLimitInfo>?> = AtomicReference(null)
/** Max file size to cache inside VFS */
@JvmStatic
@ApiStatus.Internal
fun getFileLengthToCacheThreshold(): Int = FileUtilRt.LARGE_FOR_CONTENT_LOADING

View File

@@ -43,8 +43,8 @@ import com.intellij.util.containers.*;
import com.intellij.util.io.ReplicatorInputStream;
import io.opentelemetry.api.metrics.Meter;
import it.unimi.dsi.fastutil.Hash;
import it.unimi.dsi.fastutil.ints.IntArrayList;
import it.unimi.dsi.fastutil.ints.*;
import it.unimi.dsi.fastutil.ints.IntArrayList;
import it.unimi.dsi.fastutil.objects.ObjectLinkedOpenCustomHashSet;
import it.unimi.dsi.fastutil.objects.ReferenceOpenHashSet;
import org.jetbrains.annotations.*;
@@ -54,9 +54,9 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.charset.StandardCharsets;
import java.util.*;
import java.util.HashMap;
import java.util.HashSet;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
@@ -154,6 +154,8 @@ public final class PersistentFSImpl extends PersistentFS implements Disposable {
ShutDownTracker.getInstance().registerCacheShutdownTask(this::disconnect);
setupOTelMonitoring(TelemetryManager.getInstance().getMeter(PlatformScopesKt.VFS));
LOG.info("VFS.maxFileLengthToCache: " + PersistentFSConstants.FILE_LENGTH_TO_CACHE_THRESHOLD);
}
@ApiStatus.Internal
@@ -600,8 +602,6 @@ public final class PersistentFSImpl extends PersistentFS implements Disposable {
}
if (child == null) {
//FIXME RC: inside makeChildRecord() there will be a fileRecordLock acquisition -- and recursive acquisition is
// impossible with StampedLock
child = makeChildRecord(parent, parentId, canonicalName, childData, fs, null);
foundChildRef.set(child);
return children.insert(child);
@@ -802,7 +802,7 @@ public final class PersistentFSImpl extends PersistentFS implements Disposable {
byte[] content = fs.contentsToByteArray(file);
if (mayCacheContent && shouldCache(content.length)) {
if (mayCacheContent && shouldCacheFileContentInVFS(content.length)) {
updateContentForFile(fileId, new ByteArraySequence(content));
}
else {
@@ -894,7 +894,7 @@ public final class PersistentFSImpl extends PersistentFS implements Disposable {
InputStream contentStream;
if (result.contentRecordId <= 0 || result.mustReloadContent) {
InputStream fileStream = fs.getInputStream(file);
if (shouldCache(result.actualFileLength)) {
if (shouldCacheFileContentInVFS(result.actualFileLength)) {
contentStream = createReplicatorAndStoreContent(file, fileStream, result.actualFileLength);
}
else {
@@ -908,11 +908,8 @@ public final class PersistentFSImpl extends PersistentFS implements Disposable {
return contentStream;
}
private static boolean shouldCache(long len) {
if (len > PersistentFSConstants.FILE_LENGTH_TO_CACHE_THRESHOLD) {
return false;
}
return true;
private static boolean shouldCacheFileContentInVFS(long fileLength) {
return fileLength <= PersistentFSConstants.FILE_LENGTH_TO_CACHE_THRESHOLD;
}
private @NotNull InputStream createReplicatorAndStoreContent(@NotNull VirtualFile file,
@@ -1008,27 +1005,42 @@ public final class PersistentFSImpl extends PersistentFS implements Disposable {
long oldLength = getLastRecordedLength(file);
VFileContentChangeEvent event = new VFileContentChangeEvent(
requestor, file, file.getModificationStamp(), modStamp, file.getTimeStamp(), -1, oldLength, count);
requestor, file, file.getModificationStamp(), modStamp, file.getTimeStamp(), -1, oldLength, count
);
List<VFileEvent> events = List.of(event);
fireBeforeEvents(getPublisher(), events);
NewVirtualFileSystem fs = getFileSystem(file);
// `FSRecords.ContentOutputStream` already buffered, no need to wrap in `BufferedStream`
try (OutputStream persistenceStream = writeContent(file, /*contentOfFixedSize: */ fs.isReadOnly())) {
persistenceStream.write(buf, 0, count);
try {
if (shouldCacheFileContentInVFS(count)) {
// `FSRecords.ContentOutputStream` is already buffered => no need to wrap in `BufferedStream`
try (OutputStream persistenceStream = writeContent(file, /*contentOfFixedSize: */ fs.isReadOnly())) {
persistenceStream.write(buf, 0, count);
}
}
else {
cleanPersistedContent(fileId(file));//so next turn content will be loaded from FS again
}
}
finally {
try (OutputStream ioFileStream = fs.getOutputStream(file, requestor, modStamp, timeStamp)) {
ioFileStream.write(buf, 0, count);
}
finally {
closed = true;
FileAttributes attributes = fs.getAttributes(file);
// due to FS rounding, the timestamp of the file can significantly differ from the current time
long newTimestamp = attributes != null ? attributes.lastModified : DEFAULT_TIMESTAMP;
long newLength = attributes != null ? attributes.length : DEFAULT_LENGTH;
executeTouch(file, false, event.getModificationStamp(), newLength, newTimestamp);
fireAfterEvents(getPublisher(), events);
}
writeToDisk(fs, event, events);
}
}
private void writeToDisk(@NotNull NewVirtualFileSystem fs,
@NotNull VFileContentChangeEvent event,
@NotNull List<VFileEvent> events) throws IOException {
try (OutputStream ioFileStream = fs.getOutputStream(file, requestor, modStamp, timeStamp)) {
ioFileStream.write(buf, 0, count);
}
finally {
closed = true;
FileAttributes attributes = fs.getAttributes(file);
// due to FS rounding, the timestamp of the file can significantly differ from the current time
long newTimestamp = attributes != null ? attributes.lastModified : DEFAULT_TIMESTAMP;
long newLength = attributes != null ? attributes.length : DEFAULT_LENGTH;
executeTouch(file, false, event.getModificationStamp(), newLength, newTimestamp);
fireAfterEvents(getPublisher(), events);
}
}
};
@@ -1529,7 +1541,8 @@ public final class PersistentFSImpl extends PersistentFS implements Disposable {
if (!(vf instanceof VirtualDirectoryImpl)) {
return;
}
parent = (VirtualDirectoryImpl)vf; // retain in `myIdToDirCache` at least for the duration of this block, so that subsequent `findFileById` won't crash
parent =
(VirtualDirectoryImpl)vf; // retain in `myIdToDirCache` at least for the duration of this block, so that subsequent `findFileById` won't crash
NewVirtualFileSystem fs = getFileSystem(parent);
List<ChildInfo> childrenAdded = new ArrayList<>(createEvents.size());
@@ -1546,7 +1559,8 @@ public final class PersistentFSImpl extends PersistentFS implements Disposable {
childrenAdded.sort(ChildInfo.BY_ID);
boolean caseSensitive = parent.isCaseSensitive();
vfsPeer.update(parent, parentId, oldChildren -> oldChildren.merge(vfsPeer, childrenAdded, caseSensitive));
parent.createAndAddChildren(childrenAdded, false, (__, ___) -> {});
parent.createAndAddChildren(childrenAdded, false, (__, ___) -> {
});
saveScannedChildrenRecursively(createEvents, fs, parent.isCaseSensitive());
}
@@ -2373,8 +2387,8 @@ public final class PersistentFSImpl extends PersistentFS implements Disposable {
}
@TestOnly
public void cleanPersistedContent(int id) {
doCleanPersistedContent(id);
public void cleanPersistedContent(int fileId) {
doCleanPersistedContent(fileId);
}
@TestOnly
@@ -2394,6 +2408,7 @@ public final class PersistentFSImpl extends PersistentFS implements Disposable {
private void doCleanPersistedContent(int id) {
vfsPeer.updateRecordFields(id, record -> {
//current .contentId, if any, should be ignored when MUST_RELOAD_CONTENT flag is set
return record.addFlags(Flags.MUST_RELOAD_CONTENT | Flags.MUST_RELOAD_LENGTH);
});
}

View File

@@ -30,6 +30,11 @@ public final class FileUtilRt {
public static final int MEGABYTE = KILOBYTE * KILOBYTE;
/**
* This threshold has a mixed semantics, it is used in at least 2 roles:
* 1. Max file size to store ('cache') in VFS (i.e. PersistentFSImpl)
* 2. Just 'big enough' file size, to switch from simple one-chunk loading to incremental loading, or not load it at all
* (e.g. {@linkplain #isTooLarge(long)}, JupyterFileUtils, JBZipEntry)
* TODO RC: those roles are different enough to split them into independent thresholds
* @deprecated Prefer using @link {@link com.intellij.openapi.vfs.limits.FileSizeLimit#getContentLoadLimit}
*/
@SuppressWarnings("DeprecatedIsStillUsed")