From baad7297be7283ae427f5262abc320dac2a39ba2 Mon Sep 17 00:00:00 2001 From: Ruslan Cheremin Date: Tue, 19 Nov 2024 21:53:45 +0100 Subject: [PATCH] [vfs] PY-77389: limit max file size to store in VFS cache + previously we limited max file size to cache _on_load -- but not on save, so if huge file is !readOnly, its saving could easily overflow VFSContentStorage -> now all ways to VFSContentStorage have same limits (cherry picked from commit 0a5e639f35ae41901a7312d25e2e33be49aa4e97) (cherry picked from commit 61ba8143c82cf1f32c7ae8d1d82e00505b7f91b0) IJ-CR-150186 GitOrigin-RevId: c8161c5b36cdcbe0b441b5e85b4268f42d9854a6 --- .../openapi/vfs/PersistentFSConstants.java | 20 +++-- .../openapi/vfs/limits/FileSizeLimit.kt | 1 + .../newvfs/persistent/PersistentFSImpl.java | 77 +++++++++++-------- .../intellij/openapi/util/io/FileUtilRt.java | 5 ++ 4 files changed, 64 insertions(+), 39 deletions(-) diff --git a/platform/core-api/src/com/intellij/openapi/vfs/PersistentFSConstants.java b/platform/core-api/src/com/intellij/openapi/vfs/PersistentFSConstants.java index 24122795c64e..6b668b18cb16 100644 --- a/platform/core-api/src/com/intellij/openapi/vfs/PersistentFSConstants.java +++ b/platform/core-api/src/com/intellij/openapi/vfs/PersistentFSConstants.java @@ -1,32 +1,36 @@ -// Copyright 2000-2020 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license that can be found in the LICENSE file. +// Copyright 2000-2024 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license. package com.intellij.openapi.vfs; import com.intellij.openapi.application.ApplicationManager; -import com.intellij.openapi.fileTypes.FileType; import com.intellij.openapi.util.io.FileUtilRt; import com.intellij.openapi.vfs.limits.FileSizeLimit; +import org.jetbrains.annotations.TestOnly; public final class PersistentFSConstants { - @SuppressWarnings("deprecation") + + /** Max file size to cache inside VFS */ public static final long FILE_LENGTH_TO_CACHE_THRESHOLD = FileSizeLimit.getFileLengthToCacheThreshold(); /** - * always in range [0, {@link #FILE_LENGTH_TO_CACHE_THRESHOLD}] + * Must always be in range [0, {@link #FILE_LENGTH_TO_CACHE_THRESHOLD}] + *

+ * Currently, this is always true, because + *

FILE_LENGTH_TO_CACHE_THRESHOLD = ... = max(20Mb, userFileSizeLimit, userContentLoadLimit)
+ * but could be changed in the future, hence .min(...) here is to ensure that. */ private static int ourMaxIntellisenseFileSize = Math.min(FileUtilRt.getUserFileSizeLimit(), (int)FILE_LENGTH_TO_CACHE_THRESHOLD); - /** - * @deprecated Prefer using {@link com.intellij.openapi.vfs.limits.FileSizeLimit#getIntellisenseLimit(FileType)} - */ + /** @deprecated Prefer using {@link com.intellij.openapi.vfs.limits.FileSizeLimit#getIntellisenseLimit(String)} */ @SuppressWarnings("DeprecatedIsStillUsed") @Deprecated() public static int getMaxIntellisenseFileSize() { return ourMaxIntellisenseFileSize; } + @TestOnly public static void setMaxIntellisenseFileSize(int sizeInBytes) { if (!ApplicationManager.getApplication().isUnitTestMode()) { - throw new IllegalStateException("cannot change max setMaxIntellisenseFileSize while running"); + throw new IllegalStateException("maxIntellisenseFileSize could be changed only in tests"); } ourMaxIntellisenseFileSize = sizeInBytes; } diff --git a/platform/core-api/src/com/intellij/openapi/vfs/limits/FileSizeLimit.kt b/platform/core-api/src/com/intellij/openapi/vfs/limits/FileSizeLimit.kt index 753d0863b442..981c9cec76c1 100644 --- a/platform/core-api/src/com/intellij/openapi/vfs/limits/FileSizeLimit.kt +++ b/platform/core-api/src/com/intellij/openapi/vfs/limits/FileSizeLimit.kt @@ -21,6 +21,7 @@ interface FileSizeLimit { private val limitsByExtension: AtomicReference?> = AtomicReference(null) + /** Max file size to cache inside VFS */ @JvmStatic @ApiStatus.Internal fun getFileLengthToCacheThreshold(): Int = FileUtilRt.LARGE_FOR_CONTENT_LOADING diff --git a/platform/platform-impl/src/com/intellij/openapi/vfs/newvfs/persistent/PersistentFSImpl.java b/platform/platform-impl/src/com/intellij/openapi/vfs/newvfs/persistent/PersistentFSImpl.java index 5c4a002e03f5..06af21288aa0 100644 --- a/platform/platform-impl/src/com/intellij/openapi/vfs/newvfs/persistent/PersistentFSImpl.java +++ b/platform/platform-impl/src/com/intellij/openapi/vfs/newvfs/persistent/PersistentFSImpl.java @@ -43,8 +43,8 @@ import com.intellij.util.containers.*; import com.intellij.util.io.ReplicatorInputStream; import io.opentelemetry.api.metrics.Meter; import it.unimi.dsi.fastutil.Hash; -import it.unimi.dsi.fastutil.ints.IntArrayList; import it.unimi.dsi.fastutil.ints.*; +import it.unimi.dsi.fastutil.ints.IntArrayList; import it.unimi.dsi.fastutil.objects.ObjectLinkedOpenCustomHashSet; import it.unimi.dsi.fastutil.objects.ReferenceOpenHashSet; import org.jetbrains.annotations.*; @@ -54,9 +54,9 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.nio.charset.StandardCharsets; +import java.util.*; import java.util.HashMap; import java.util.HashSet; -import java.util.*; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; @@ -154,6 +154,8 @@ public final class PersistentFSImpl extends PersistentFS implements Disposable { ShutDownTracker.getInstance().registerCacheShutdownTask(this::disconnect); setupOTelMonitoring(TelemetryManager.getInstance().getMeter(PlatformScopesKt.VFS)); + + LOG.info("VFS.maxFileLengthToCache: " + PersistentFSConstants.FILE_LENGTH_TO_CACHE_THRESHOLD); } @ApiStatus.Internal @@ -600,8 +602,6 @@ public final class PersistentFSImpl extends PersistentFS implements Disposable { } if (child == null) { - //FIXME RC: inside makeChildRecord() there will be a fileRecordLock acquisition -- and recursive acquisition is - // impossible with StampedLock child = makeChildRecord(parent, parentId, canonicalName, childData, fs, null); foundChildRef.set(child); return children.insert(child); @@ -802,7 +802,7 @@ public final class PersistentFSImpl extends PersistentFS implements Disposable { byte[] content = fs.contentsToByteArray(file); - if (mayCacheContent && shouldCache(content.length)) { + if (mayCacheContent && shouldCacheFileContentInVFS(content.length)) { updateContentForFile(fileId, new ByteArraySequence(content)); } else { @@ -894,7 +894,7 @@ public final class PersistentFSImpl extends PersistentFS implements Disposable { InputStream contentStream; if (result.contentRecordId <= 0 || result.mustReloadContent) { InputStream fileStream = fs.getInputStream(file); - if (shouldCache(result.actualFileLength)) { + if (shouldCacheFileContentInVFS(result.actualFileLength)) { contentStream = createReplicatorAndStoreContent(file, fileStream, result.actualFileLength); } else { @@ -908,11 +908,8 @@ public final class PersistentFSImpl extends PersistentFS implements Disposable { return contentStream; } - private static boolean shouldCache(long len) { - if (len > PersistentFSConstants.FILE_LENGTH_TO_CACHE_THRESHOLD) { - return false; - } - return true; + private static boolean shouldCacheFileContentInVFS(long fileLength) { + return fileLength <= PersistentFSConstants.FILE_LENGTH_TO_CACHE_THRESHOLD; } private @NotNull InputStream createReplicatorAndStoreContent(@NotNull VirtualFile file, @@ -1008,27 +1005,42 @@ public final class PersistentFSImpl extends PersistentFS implements Disposable { long oldLength = getLastRecordedLength(file); VFileContentChangeEvent event = new VFileContentChangeEvent( - requestor, file, file.getModificationStamp(), modStamp, file.getTimeStamp(), -1, oldLength, count); + requestor, file, file.getModificationStamp(), modStamp, file.getTimeStamp(), -1, oldLength, count + ); List events = List.of(event); fireBeforeEvents(getPublisher(), events); + NewVirtualFileSystem fs = getFileSystem(file); - // `FSRecords.ContentOutputStream` already buffered, no need to wrap in `BufferedStream` - try (OutputStream persistenceStream = writeContent(file, /*contentOfFixedSize: */ fs.isReadOnly())) { - persistenceStream.write(buf, 0, count); + try { + if (shouldCacheFileContentInVFS(count)) { + // `FSRecords.ContentOutputStream` is already buffered => no need to wrap in `BufferedStream` + try (OutputStream persistenceStream = writeContent(file, /*contentOfFixedSize: */ fs.isReadOnly())) { + persistenceStream.write(buf, 0, count); + } + } + else { + cleanPersistedContent(fileId(file));//so next turn content will be loaded from FS again + } } finally { - try (OutputStream ioFileStream = fs.getOutputStream(file, requestor, modStamp, timeStamp)) { - ioFileStream.write(buf, 0, count); - } - finally { - closed = true; - FileAttributes attributes = fs.getAttributes(file); - // due to FS rounding, the timestamp of the file can significantly differ from the current time - long newTimestamp = attributes != null ? attributes.lastModified : DEFAULT_TIMESTAMP; - long newLength = attributes != null ? attributes.length : DEFAULT_LENGTH; - executeTouch(file, false, event.getModificationStamp(), newLength, newTimestamp); - fireAfterEvents(getPublisher(), events); - } + writeToDisk(fs, event, events); + } + } + + private void writeToDisk(@NotNull NewVirtualFileSystem fs, + @NotNull VFileContentChangeEvent event, + @NotNull List events) throws IOException { + try (OutputStream ioFileStream = fs.getOutputStream(file, requestor, modStamp, timeStamp)) { + ioFileStream.write(buf, 0, count); + } + finally { + closed = true; + FileAttributes attributes = fs.getAttributes(file); + // due to FS rounding, the timestamp of the file can significantly differ from the current time + long newTimestamp = attributes != null ? attributes.lastModified : DEFAULT_TIMESTAMP; + long newLength = attributes != null ? attributes.length : DEFAULT_LENGTH; + executeTouch(file, false, event.getModificationStamp(), newLength, newTimestamp); + fireAfterEvents(getPublisher(), events); } } }; @@ -1529,7 +1541,8 @@ public final class PersistentFSImpl extends PersistentFS implements Disposable { if (!(vf instanceof VirtualDirectoryImpl)) { return; } - parent = (VirtualDirectoryImpl)vf; // retain in `myIdToDirCache` at least for the duration of this block, so that subsequent `findFileById` won't crash + parent = + (VirtualDirectoryImpl)vf; // retain in `myIdToDirCache` at least for the duration of this block, so that subsequent `findFileById` won't crash NewVirtualFileSystem fs = getFileSystem(parent); List childrenAdded = new ArrayList<>(createEvents.size()); @@ -1546,7 +1559,8 @@ public final class PersistentFSImpl extends PersistentFS implements Disposable { childrenAdded.sort(ChildInfo.BY_ID); boolean caseSensitive = parent.isCaseSensitive(); vfsPeer.update(parent, parentId, oldChildren -> oldChildren.merge(vfsPeer, childrenAdded, caseSensitive)); - parent.createAndAddChildren(childrenAdded, false, (__, ___) -> {}); + parent.createAndAddChildren(childrenAdded, false, (__, ___) -> { + }); saveScannedChildrenRecursively(createEvents, fs, parent.isCaseSensitive()); } @@ -2373,8 +2387,8 @@ public final class PersistentFSImpl extends PersistentFS implements Disposable { } @TestOnly - public void cleanPersistedContent(int id) { - doCleanPersistedContent(id); + public void cleanPersistedContent(int fileId) { + doCleanPersistedContent(fileId); } @TestOnly @@ -2394,6 +2408,7 @@ public final class PersistentFSImpl extends PersistentFS implements Disposable { private void doCleanPersistedContent(int id) { vfsPeer.updateRecordFields(id, record -> { + //current .contentId, if any, should be ignored when MUST_RELOAD_CONTENT flag is set return record.addFlags(Flags.MUST_RELOAD_CONTENT | Flags.MUST_RELOAD_LENGTH); }); } diff --git a/platform/util-rt/src/com/intellij/openapi/util/io/FileUtilRt.java b/platform/util-rt/src/com/intellij/openapi/util/io/FileUtilRt.java index 62369d885b94..b59ce81f4d23 100644 --- a/platform/util-rt/src/com/intellij/openapi/util/io/FileUtilRt.java +++ b/platform/util-rt/src/com/intellij/openapi/util/io/FileUtilRt.java @@ -30,6 +30,11 @@ public final class FileUtilRt { public static final int MEGABYTE = KILOBYTE * KILOBYTE; /** + * This threshold has a mixed semantics, it is used in at least 2 roles: + * 1. Max file size to store ('cache') in VFS (i.e. PersistentFSImpl) + * 2. Just 'big enough' file size, to switch from simple one-chunk loading to incremental loading, or not load it at all + * (e.g. {@linkplain #isTooLarge(long)}, JupyterFileUtils, JBZipEntry) + * TODO RC: those roles are different enough to split them into independent thresholds * @deprecated Prefer using @link {@link com.intellij.openapi.vfs.limits.FileSizeLimit#getContentLoadLimit} */ @SuppressWarnings("DeprecatedIsStillUsed")