PY-50751: Custom rsync for WSL.

``rsync`` is slow because every file system access call from WSL to Windows and vice versa is slow.
We need to decrement number of such calls.

This approach does the following:
* Calculates hashes on Windows side using Intellij process
* Calculates hashes on WSL side using native Linux process
* Compares them searching for differences
* Splits different files into groups, tars each group and copies them on the other side.

From what I checked this is the fastest approach to sync WSL and Windows.

* How to use *

`WslSyncAction` is a temporary internal only action. Run it and try to sync some folder.

`WslSync` is a class with static method you can use to sync to folders.

GitOrigin-RevId: 6c63329d73ff32b7ee5e4436bfde39e72fe35a44
This commit is contained in:
Ilya.Kazakevich
2022-02-05 00:56:54 +03:00
committed by intellij-monorepo-bot
parent dd124d846d
commit e23b3dcbc9
26 changed files with 6387 additions and 42 deletions

View File

@@ -1,12 +1,12 @@
<component name="libraryTable">
<library name="lz4-java" type="repository">
<properties maven-id="org.lz4:lz4-pure-java:1.8.0" />
<properties maven-id="org.lz4:lz4-java:1.8.0" />
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/org/lz4/lz4-pure-java/1.8.0/lz4-pure-java-1.8.0.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/lz4/lz4-java/1.8.0/lz4-java-1.8.0.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/org/lz4/lz4-pure-java/1.8.0/lz4-pure-java-1.8.0-sources.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/lz4/lz4-java/1.8.0/lz4-java-1.8.0-sources.jar!/" />
</SOURCES>
</library>
</component>

BIN
bin/win/wslhash Normal file

Binary file not shown.

View File

@@ -1,3 +1,4 @@
musl
*.tgz
wsploxy
wsploxy
wslhash

View File

@@ -10,7 +10,7 @@ CFLAGS = -Wall -Wextra -pedantic -Werror -Os -D_POSIX_SOURCE=1 -D_BSD_SOURCE=1
LDLIBS = -static
CC = $(MUSL_CC)
all: $(MUSL_CC) wslproxy
all: $(MUSL_CC) wslproxy wslhash
musl:
@echo I will now download musl. If it fails, check you have wget and see README
@@ -21,8 +21,9 @@ $(MUSL_CC): musl
cd $(MUSL_DISTR) && ./configure --prefix=$(MUSL_HOME) --syslibdir=$(MUSL_HOME)/lib && $(MAKE) && $(MAKE) install
wslproxy: $(SOURCES) | $(MUSL_CC)
wslhash: wslhash.c xxhash.h xxhash.c | $(MUSL_CC)
.PHONY: all clean
clean:
$(RM) wslproxy
$(RM) wslproxy wslhash

View File

@@ -6,6 +6,11 @@ This proxy accepts two clients: one for egress (eth0) and one for ingress (loopb
It then reports IP and port via stdout
EOF (close stream) written to the stdin kills process.
wslhash
Calculates hashes for all files in certain folder to implement custom rsync-like functionality. `rsync` may be missing on some WSL distros,
and also it may be slow: access from WSL to Windows takes a lot of time.
This tool runs on WSL only, so it is fast. See WslSync.kt
To build tool use Makefile. We link it statically because WSL may lack glibc. Kernel ABI is backward compatible, so use some old Linux
We use musl libc: https://musl.libc.org/

141
native/WslTools/wslhash.c Normal file
View File

@@ -0,0 +1,141 @@
#define XXH_VECTOR XXH_SSE2
#define XXH_STATIC_LINKING_ONLY 1
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "xxhash.h"
#include <ftw.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <limits.h>
#include <errno.h>
#include <locale.h>
#include <langinfo.h>
// wslhash folder [ext1 ext2 ..]
// Calculates hashes for all files in this folder (optionally limit by extensions)
// output is: file:[hash]file[hash]
// hash is 8 bytes little endian
static size_t g_dir_len = 0; // Length of dir string
static char **g_exts; // list of extensions or NULL if no filter needed
static int g_num_of_exts = 0; // number of extensions
static const char empty[sizeof(XXH64_hash_t)];
// Check is file extension ok or should be skipped
static int file_extension_ok(const char *file) {
if (g_num_of_exts == 0) return 1;
const char *dot = strrchr(file, '.');
if (!dot) return 1; // No extension
for (int i = 0; i < g_num_of_exts; i++) {
if (strcmp(dot + 1, g_exts[i]) == 0) {
return 1;
}
}
return 0;
}
// Called on each file
static int
process_file(const char *fpath, const struct stat *sb, int tflag, __attribute__((unused)) struct FTW *ftwbuf) {
if (tflag != FTW_F) {
return 0; // Not a file
}
if (!file_extension_ok(fpath)) {
return 0; // File has wrong extension, skip
}
const char *file_name = fpath + g_dir_len + 1; // remove first "/"
printf("%s:", file_name);
if (sb->st_size == 0) {
// No need to calculate hash for empty file
fwrite(empty, sizeof(empty), 1, stdout);
return 0;
}
const int fd = open(fpath, O_RDONLY);
if (fd == -1) {
fprintf(stderr, "Can't open file %s", fpath);
perror("Can't open file");
exit(2);
}
// Mmap file and calculate hash
char *buffer;
buffer = mmap(NULL, sb->st_size, PROT_READ, MAP_FILE | MAP_PRIVATE, fd, 0);
madvise(buffer, sb->st_size, MADV_SEQUENTIAL);
if (buffer == MAP_FAILED) {
fprintf(stderr, "Can't mmap file %s", fpath);
perror("Can't mmap file");
exit(3);
}
XXH64_hash_t hash = XXH64(buffer, sb->st_size, 0);
fwrite(&hash, sizeof(XXH64_hash_t), 1, stdout);
munmap(buffer, sb->st_size);
close(fd);
return 0;
}
static int ensure_charset() {
setlocale(LC_CTYPE, "");
const char *charset = nl_langinfo(CODESET);
if (strncmp(charset, "UTF-8", sizeof "UTF-8") == 0) {
// Java side decodes output as UTF-8 and almost all WSL distros use UTF
return 1;
}
if (strncmp(charset, "ASCII", sizeof "ASCII") == 0) {
// ASCII is 7 bit, so english texts could be decoded by java either
return 1;
}
// Other charsets aren't used nor supported by WSL
fprintf(stderr, "Please use UTF-8 locale, not %s", charset);
return 0;
}
int main(int argc, char *argv[]) {
if (!ensure_charset()) {
return -1;
}
if (argc < 2) {
fprintf(stderr, "No path provided");
return 1;
}
char *root_dir = argv[1];
struct stat path_stat;
stat(root_dir, &path_stat);
if (!S_ISDIR(path_stat.st_mode)) {
fprintf(stderr, "Provided path is not dir\n");
return 2;
}
char root_dir_clean[PATH_MAX];
if (realpath(root_dir, root_dir_clean) == NULL) {
fprintf(stderr, "realpath failed: %d", errno);
return -1;
}
g_dir_len = strlen(root_dir_clean);
const int args_before_exts = 2;
if (argc > args_before_exts) { // Extensions are provided: store argc+argv
g_exts = argv + args_before_exts;
g_num_of_exts = argc - args_before_exts;
}
// number of file descriptors is more or less random taken from example
// we don't know how many descriptors are available on the particular WSL, but sure not less than 20
if (nftw(root_dir_clean, process_file, 20, FTW_MOUNT) == -1) { // Walk through files, see nftw(3)
perror("nftw failed");
return 3;
}
return 0;
}

43
native/WslTools/xxhash.c Normal file
View File

@@ -0,0 +1,43 @@
/*
* xxHash - Extremely Fast Hash algorithm
* Copyright (C) 2012-2020 Yann Collet
*
* BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* You can contact the author at:
* - xxHash homepage: https://www.xxhash.com
* - xxHash source repository: https://github.com/Cyan4973/xxHash
*/
/*
* xxhash.c instantiates functions defined in xxhash.h
*/
#define XXH_STATIC_LINKING_ONLY /* access advanced declarations */
#define XXH_IMPLEMENTATION /* access definitions */
#include "xxhash.h"

5445
native/WslTools/xxhash.h Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -1,5 +1,6 @@
<idea-plugin>
<actions>
<action class="com.intellij.execution.wsl.target.WslSyncAction" internal="true"/>
<action id="TerminalIncreaseFontSize" class="com.intellij.openapi.editor.actions.TerminalChangeFontSizeAction$IncreaseEditorFontSize"
use-shortcut-of="EditorIncreaseFontSize"/>
<action id="TerminalDecreaseFontSize" class="com.intellij.openapi.editor.actions.TerminalChangeFontSizeAction$DecreaseEditorFontSize"

View File

@@ -0,0 +1,68 @@
// Copyright 2000-2022 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license.
package com.intellij.execution.wsl.target
import com.intellij.execution.wsl.WslDistributionManager
import com.intellij.execution.wsl.sync.WslSync
import com.intellij.openapi.actionSystem.AnActionEvent
import com.intellij.openapi.diagnostic.logger
import com.intellij.openapi.progress.ProgressIndicator
import com.intellij.openapi.progress.ProgressManager
import com.intellij.openapi.progress.Task
import com.intellij.openapi.project.DumbAwareAction
import com.intellij.util.TimeoutUtil
import java.nio.file.Path
import javax.swing.JOptionPane
/**
* Demonstrates how [WslSync.syncWslFolders] works
*/
@Suppress("HardCodedStringLiteral") // This is a test, internal only action
class WslSyncAction : DumbAwareAction("WSL Sync") {
override fun actionPerformed(e: AnActionEvent) {
val progressManager = ProgressManager.getInstance()
val distroNames = ArrayList<String>()
progressManager.run(object : Task.Modal(e.project, "Getting List of Distros", false) {
override fun run(indicator: ProgressIndicator) {
distroNames += WslDistributionManager.getInstance().installedDistributions.map { it.presentableName }
}
})
if (distroNames.isEmpty()) {
throw Exception("Please install WSL")
}
val distroName = JOptionPane.showInputDialog(null, "Choose distro", "Distro", JOptionPane.QUESTION_MESSAGE, null, distroNames.toArray(),
distroNames[0])
val directions = arrayOf("Linux -> Windows", "Windows -> Linux")
val linToWin = directions[0]
val direction = JOptionPane.showInputDialog(null, "Choose direction", "Direction", JOptionPane.QUESTION_MESSAGE, null, directions,
linToWin)
val linux = JOptionPane.showInputDialog("Linux path", "/home/link/huge_folder")
val windows = Path.of(JOptionPane.showInputDialog("Windows path", "c:\\temp\\huge_folder"))
val extensionsStr = JOptionPane.showInputDialog("Comma separated extensions (or nothing for all)", "py,pyi").trim()
val extensions = if (extensionsStr.isNotBlank()) extensionsStr.split(',') else emptyList()
progressManager.run(object : Task.Modal(e.project, "Syncing Folders..", false) {
override fun run(indicator: ProgressIndicator) {
val distro = WslDistributionManager.getInstance().installedDistributions.first { it.presentableName == distroName }
try {
val time = TimeoutUtil.measureExecutionTime<Exception> {
WslSync.syncWslFolders(linux, windows, distro, direction == linToWin, extensions.toTypedArray())
}
val message = "Synced in $time"
JOptionPane.showMessageDialog(null, message)
logger<WslSyncAction>().warn(message)
}
catch (e: Exception) {
JOptionPane.showMessageDialog(null, "Error running sync. Check logs", "Error", JOptionPane.ERROR_MESSAGE)
logger<WslSyncAction>().warn(e)
}
}
})
}
}

View File

@@ -3,7 +3,10 @@ package com.intellij.execution.wsl
import com.intellij.execution.ExecutionException
import com.intellij.execution.configurations.GeneralCommandLine
import com.intellij.openapi.project.Project
import com.intellij.openapi.util.NlsSafe
import com.intellij.openapi.vfs.VirtualFile
import org.jetbrains.annotations.ApiStatus
/**
* This is a temporary interface used for wsl classes transition. Please, do not use it
@@ -16,9 +19,40 @@ interface AbstractWslDistribution {
fun getWslPath(windowsPath: String): String?
/**
* @return creates and patches command line, e.g:
* `ruby -v` => `bash -c "ruby -v"`
*/
* Patches passed command line to make it runnable in WSL context, e.g changes `date` to `ubuntu run "date"`.
*
*
*
*
* Environment variables and working directory are mapped to the chain calls: working dir using `cd` and environment variables using `export`,
* e.g `bash -c "export var1=val1 && export var2=val2 && cd /some/working/dir && date"`.
*
*
*
*
* Method should properly handle quotation and escaping of the environment variables.
*
*
*
* @param commandLine command line to patch
* @param project current project
* @param options [WSLCommandLineOptions] instance
* @param <T> GeneralCommandLine or descendant
* @return original `commandLine`, prepared to run in WSL context
</T> */
@Throws(ExecutionException::class)
fun createWslCommandLine(vararg command: String): GeneralCommandLine
fun <T : GeneralCommandLine> patchCommandLine(commandLine: T,
project: Project?,
options: WSLCommandLineOptions): T
/**
* @return UNC root for the distribution, e.g. `\\wsl$\Ubuntu`
* @implNote there is a hack in [LocalFileSystemBase.getAttributes] which causes all network
* virtual files to exists all the time. So we need to check explicitly that root exists. After implementing proper non-blocking check
* for the network resource availability, this method may be simplified to findFileByIoFile
* @see VfsUtil.findFileByIoFile
*/
@ApiStatus.Experimental
fun getUNCRootVirtualFile(refreshIfNeed: Boolean): VirtualFile?
}

View File

@@ -21,7 +21,6 @@ import com.intellij.openapi.util.text.StringUtil;
import com.intellij.openapi.util.text.Strings;
import com.intellij.openapi.vfs.VfsUtil;
import com.intellij.openapi.vfs.VirtualFile;
import com.intellij.openapi.vfs.impl.local.LocalFileSystemBase;
import com.intellij.openapi.vfs.impl.wsl.WslConstants;
import com.intellij.util.Consumer;
import com.intellij.util.Functions;
@@ -135,11 +134,6 @@ public class WSLDistribution implements AbstractWslDistribution {
return myVersion;
}
@Override
public @NotNull GeneralCommandLine createWslCommandLine(String @NotNull ... command) throws ExecutionException {
return patchCommandLine(new GeneralCommandLine(command), null, new WSLCommandLineOptions());
}
/**
* Creates a patched command line, executes it on wsl distribution and returns output
*
@@ -234,20 +228,7 @@ public class WSLDistribution implements AbstractWslDistribution {
}
}
/**
* Patches passed command line to make it runnable in WSL context, e.g changes {@code date} to {@code ubuntu run "date"}.<p/>
* <p>
* Environment variables and working directory are mapped to the chain calls: working dir using {@code cd} and environment variables using {@code export},
* e.g {@code bash -c "export var1=val1 && export var2=val2 && cd /some/working/dir && date"}.<p/>
* <p>
* Method should properly handle quotation and escaping of the environment variables.<p/>
*
* @param commandLine command line to patch
* @param project current project
* @param options {@link WSLCommandLineOptions} instance
* @param <T> GeneralCommandLine or descendant
* @return original {@code commandLine}, prepared to run in WSL context
*/
@Override
public @NotNull <T extends GeneralCommandLine> T patchCommandLine(@NotNull T commandLine,
@Nullable Project project,
@NotNull WSLCommandLineOptions options) throws ExecutionException {
@@ -596,13 +577,7 @@ public class WSLDistribution implements AbstractWslDistribution {
return Paths.get(WslConstants.UNC_PREFIX + myDescriptor.getMsId());
}
/**
* @return UNC root for the distribution, e.g. {@code \\wsl$\Ubuntu}
* @implNote there is a hack in {@link LocalFileSystemBase#getAttributes(VirtualFile)} which causes all network
* virtual files to exists all the time. So we need to check explicitly that root exists. After implementing proper non-blocking check
* for the network resource availability, this method may be simplified to findFileByIoFile
* @see VfsUtil#findFileByIoFile(File, boolean)
*/
@Override
@ApiStatus.Experimental
public @Nullable VirtualFile getUNCRootVirtualFile(boolean refreshIfNeed) {
if (!Experiments.getInstance().isFeatureEnabled("wsl.p9.support")) {

View File

@@ -66,5 +66,6 @@
<orderEntry type="library" scope="TEST" name="JUnit5" level="project" />
<orderEntry type="module" module-name="intellij.platform.util.zip" scope="TEST" />
<orderEntry type="module" module-name="intellij.platform.util.rt.java8" />
<orderEntry type="module" module-name="intellij.platform.wsl.impl" scope="TEST" />
</component>
</module>

View File

@@ -11,7 +11,7 @@ import org.junit.rules.ExternalResource
* Depends on [com.intellij.testFramework.fixtures.TestFixtureRule], so make sure enable it before this class
* @see WslTestBase
*/
internal class WslRule : ExternalResource() {
class WslRule : ExternalResource() {
lateinit var wsl: WSLDistribution
private set

View File

@@ -0,0 +1,153 @@
// Copyright 2000-2021 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license that can be found in the LICENSE file.
package com.intellij.execution.wsl
import com.intellij.execution.wsl.sync.WslSync
import com.intellij.testFramework.rules.TempDirectory
import com.intellij.util.io.*
import org.junit.Assert
import org.junit.Rule
import org.junit.Test
import org.junit.rules.ExternalResource
import org.junit.rules.Timeout
import org.junit.runner.RunWith
import org.junit.runners.Parameterized
import org.junit.runners.Parameterized.Parameters
import java.nio.file.Path
import java.nio.file.attribute.FileTime
import java.util.concurrent.TimeUnit
import kotlin.io.path.exists
import kotlin.io.path.writeBytes
import kotlin.io.path.writeText
class LinuxTemp(private val wslRule: WslRule) : ExternalResource() {
lateinit var dir: String
private set
override fun before() {
dir = wslRule.wsl.runCommand("mktemp", "-d")
}
override fun after() {
wslRule.wsl.runCommand("rm", "-rf", dir)
}
}
@RunWith(Parameterized::class)
class WslSyncTest(private val linToWin: Boolean) : WslTestBase() {
companion object {
@Parameters
@JvmStatic
fun data(): Collection<Array<Boolean>> = listOf(arrayOf(true), arrayOf(false))
}
@JvmField
@Rule
val winDirRule = TempDirectory()
@JvmField
@Rule
val linuxDirRule = LinuxTemp(wslRule)
@JvmField
@Rule
val timeoutRule = Timeout(30, TimeUnit.SECONDS)
private val linuxDirAsPath: Path get() = wslRule.wsl.getUNCRootVirtualFile(true)!!.toNioPath().resolve(linuxDirRule.dir)
@Test
fun syncDifferentRegister() {
val win = winDirRule.newDirectoryPath()
val sourceDir = if (linToWin) linuxDirAsPath else win
val newFile = sourceDir.resolve("file.txt").createFile()
val lastModified = newFile.lastModified()
if (linToWin) {
wsl.executeOnWsl(1000, "touch", "${linuxDirRule.dir}/File.txt")
}
else {
val file = sourceDir.resolve("file.txt")
val data = file.readBytes()
file.writeBytes(data)
}
WslSync.syncWslFolders(linuxDirRule.dir, win, wslRule.wsl, linToWin)
Assert.assertEquals(sourceDir.resolve(if (linToWin) "file.txt" else "File.txt").lastModified(), lastModified)
}
@Test
fun syncEmptyFolder() {
val windowsDir = winDirRule.newDirectoryPath()
WslSync.syncWslFolders(linuxDirRule.dir, windowsDir, wslRule.wsl, linToWin)
Assert.assertTrue(windowsDir.toFile().list()!!.isEmpty())
Assert.assertTrue(linuxDirAsPath.toFile().list()!!.isEmpty())
}
@Test
fun syncFullThenChange() {
val numberOfFiles = 100
val modifyEachFile = 3
val windowsDir = winDirRule.newDirectoryPath()
val fileNames = (0..numberOfFiles).map { "$it-по-русски.txt" }
val sourceDir = if (linToWin) linuxDirAsPath else windowsDir
val destDir = if (linToWin) windowsDir else linuxDirAsPath
for (fileName in fileNames) {
sourceDir.resolve(fileName).writeText("hello $fileName")
}
WslSync.syncWslFolders(linuxDirRule.dir, windowsDir, wslRule.wsl, linToWin)
val modificationTimes = mutableMapOf<Path, FileTime>()
for (fileName in fileNames) {
val file = destDir.resolve(fileName)
Assert.assertTrue("File hasn't been copied", file.exists())
Assert.assertEquals("Copied with wrong content", "hello $fileName", file.readText())
modificationTimes[file] = file.lastModified()
}
Assert.assertEquals(fileNames.size, destDir.toFile().list()!!.size)
Thread.sleep(1000) // To check modification time
val fileIdsToModify = fileNames.indices.filter { it % modifyEachFile == 0 }
for (fileId in fileIdsToModify) {
sourceDir.resolve(fileNames[fileId]).writeText("Modified")
}
WslSync.syncWslFolders(linuxDirRule.dir, windowsDir, wslRule.wsl, linToWin)
for ((id, fileName) in fileNames.withIndex()) {
val file = destDir.resolve(fileName)
if (id in fileIdsToModify) {
Assert.assertEquals("File not copied", "Modified", file.readText())
Assert.assertNotEquals("File not modified: $file", modificationTimes[file], file.lastModified())
}
else {
Assert.assertEquals("Content broken", "hello $fileName", file.readText())
Assert.assertEquals("Wrong file modified: $file", modificationTimes[file], file.lastModified())
}
}
}
@Test
fun removeFiles() {
val windowsDir = winDirRule.newDirectoryPath()
val sourceDir = if (linToWin) linuxDirAsPath else windowsDir
val destDir = if (linToWin) windowsDir else linuxDirAsPath
for (i in (1..5)) {
sourceDir.resolve("file$i.txt").createFile().writeText("test")
}
WslSync.syncWslFolders(linuxDirRule.dir, windowsDir, wslRule.wsl, linToWin)
Assert.assertTrue("File hasn't been copied", destDir.resolve("file2.txt").exists())
sourceDir.resolve("file2.txt").delete()
WslSync.syncWslFolders(linuxDirRule.dir, windowsDir, wslRule.wsl, linToWin)
Assert.assertFalse("File hasn't been deleted", destDir.resolve("file2.txt").exists())
}
}

View File

@@ -9,7 +9,7 @@ import org.junit.rules.RuleChain
open class WslTestBase {
private val testFixtureRule = TestFixtureRule()
private val wslRule = WslRule()
internal val wslRule = WslRule()
protected val wsl: WSLDistribution get() = wslRule.wsl
protected val testFixture: BareTestFixture get() = testFixtureRule.fixture

View File

@@ -51,5 +51,8 @@
<SOURCES />
</library>
</orderEntry>
<orderEntry type="module" module-name="intellij.platform.core" />
<orderEntry type="library" name="lz4-java" level="project" />
<orderEntry type="module" module-name="intellij.platform.util.ex" />
</component>
</module>

View File

@@ -0,0 +1,33 @@
package com.intellij.execution.wsl
import com.intellij.execution.configurations.GeneralCommandLine
import java.util.concurrent.CompletableFuture
internal fun AbstractWslDistribution.createWslCommandLine(vararg commands: String):
GeneralCommandLine = patchCommandLine(GeneralCommandLine(*commands), null, WSLCommandLineOptions())
/**
* Waits for process to complete and returns stdout as String. Uses [waitProcess] under the hood
*/
fun AbstractWslDistribution.runCommand(vararg commands: String): String {
val process = createWslCommandLine(*commands).createProcess()
val stdout = CompletableFuture.supplyAsync {
process.inputStream.readAllBytes()
}
waitProcess(process, commands.joinToString(" "))
return stdout.get().decodeToString().trimEnd('\n')
}
/**
* Waits for process, and in case of error (exit code != 0) throws exception with stderr
*/
internal fun waitProcess(process: Process, tool: String) {
val stderr = CompletableFuture.supplyAsync {
process.errorStream.readAllBytes()
}
val exitCode = process.waitFor()
if (exitCode != 0) {
throw Exception("Can't execute $tool: $exitCode. ${stderr.get().decodeToString()}")
}
}

View File

@@ -92,9 +92,7 @@ class WslProxy(distro: AbstractWslDistribution, private val applicationPort: Int
private suspend fun readPortFromChannel(channel: ByteReadChannel): Int = readToBuffer(channel, 2).short.toUShort().toInt()
init {
val file = PathManager.findBinFileWithException("wslproxy").toString()
val wspPath = distro.getWslPath(file) ?: throw AssertionError("Can't access $file from Linux")
val wslCommandLine = distro.createWslCommandLine(wspPath)
val wslCommandLine = distro.getTool("wslhash")
val process = Runtime.getRuntime().exec(wslCommandLine.commandLineString)
val log = Logger.getInstance(WslProxy::class.java)

View File

@@ -0,0 +1,11 @@
package com.intellij.execution.wsl
import com.intellij.execution.configurations.GeneralCommandLine
import com.intellij.openapi.application.PathManager
import java.nio.file.Path
internal fun AbstractWslDistribution.getWslPath(path: Path): String = getWslPath(path.toString())
?: throw Exception("Can't access from Linux: $path")
internal fun AbstractWslDistribution.getTool(toolName: String, vararg arguments: String): GeneralCommandLine =
createWslCommandLine(getWslPath(PathManager.findBinFileWithException(toolName)), *arguments)

View File

@@ -0,0 +1,38 @@
// Copyright 2000-2022 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license.
package com.intellij.execution.wsl.sync
import com.intellij.execution.wsl.AbstractWslDistribution
/**
* Linux or Windows file storage
* [MyFileType] is file type for this storage (Linux uses [LinuxFileStorage] and Windows is [WindowsFileStorage])
* [OtherSideFileType] is for other side.
* [FilePathRelativeToDir] are relative to [dir]
*/
internal abstract class FileStorage<MyFileType, OtherSideFileType>(
protected val dir: MyFileType,
protected val distro: AbstractWslDistribution,
protected val onlyExtensions: Array<String>
) {
/**
* File names relative to [dir] as strings
*/
abstract fun getAllFilesInDir(): Collection<FilePathRelativeToDir>
abstract fun getHashes(): List<WslHashRecord>
/**
* is [dir] empty
*/
abstract fun isEmpty(): Boolean
abstract fun removeFiles(filesToRemove: Collection<FilePathRelativeToDir>)
abstract fun createTempFile(): MyFileType
abstract fun removeTempFile(file: MyFileType)
/**
* tar [files] and copy to [destTar]
*/
abstract fun tarAndCopyTo(files: Collection<FilePathRelativeToDir>, destTar: OtherSideFileType)
abstract fun unTar(tarFile: MyFileType)
}

View File

@@ -0,0 +1,139 @@
// Copyright 2000-2022 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license.
package com.intellij.execution.wsl.sync
import com.intellij.execution.configurations.GeneralCommandLine
import com.intellij.execution.wsl.*
import com.intellij.openapi.diagnostic.Logger
import com.intellij.util.TimeoutUtil
import com.intellij.util.io.delete
import java.io.InputStream
import java.nio.ByteBuffer
import java.nio.ByteOrder
import java.util.concurrent.TimeUnit
import kotlin.io.path.writeText
private val LOGGER = Logger.getInstance(LinuxFileStorage::class.java)
internal class LinuxFileStorage(dir: LinuxFilePath, distro: AbstractWslDistribution, onlyExtensions: Array<String>)
: FileStorage<LinuxFilePath, WindowsFilePath>(dir.trimEnd('/') + '/', distro, onlyExtensions) {
override fun getHashes(): List<WslHashRecord> {
val result = ArrayList<WslHashRecord>(AVG_NUM_FILES)
val time = TimeoutUtil.measureExecutionTime<Throwable> {
val tool = distro.getTool("wslhash", dir, *onlyExtensions)
val process = tool.createProcess()
process.inputStream.use {
result += getHashesInternal(it)
}
waitProcess(process, tool.commandLineString)
}
LOGGER.info("Linux files calculated in $time")
return result
}
override fun getAllFilesInDir(): Collection<FilePathRelativeToDir> {
val extCommands = ArrayList<String>(onlyExtensions.size)
for (ext in onlyExtensions) {
extCommands += listOf("-name", "*.$ext", "-or")
}
extCommands.removeLastOrNull()
// See find(1)
return distro.runCommand("find", dir, "-xdev", "-type", "f", *(extCommands.toTypedArray()))
.splitToSequence('\n')
.filterNot { it.isBlank() }
.map { it.substring(dir.length) }
.toList()
}
override fun createTempFile(): String = distro.runCommand("mktemp", "-u")
override fun isEmpty(): Boolean {
val options = WSLCommandLineOptions().apply {
val escapedDir = GeneralCommandLine(dir).commandLineString
addInitCommand("[ -e $escapedDir ]")
}
val process = distro.patchCommandLine(GeneralCommandLine("ls", "-A", dir), null, options).createProcess()
if (!process.waitFor(5, TimeUnit.SECONDS)) throw Exception("Process didn't finished: WSL frozen?")
if (process.exitValue() == 0) {
// Folder exists, lets check if empty
return process.inputStream.read() == -1
}
else {
// Folder doesn't exist
val error = process.errorStream.readAllBytes().decodeToString()
if (error.isEmpty()) return true // Doesn't exist, but still empty
throw Exception("Error checking folder: $error")
}
}
override fun removeFiles(filesToRemove: Collection<FilePathRelativeToDir>) {
LOGGER.info("Removing files")
if (filesToRemove.size < 3) {
for (file in filesToRemove) {
distro.runCommand("rm", "$dir/$file")
}
return
}
val script = createTmpWinFile(distro)
script.first.writeText(filesToRemove.joinToString("\n") { GeneralCommandLine("rm", "$dir/$it").commandLineString })
distro.runCommand("sh", script.second)
script.first.delete()
}
override fun removeTempFile(file: LinuxFilePath) {
distro.runCommand("rm", file)
}
override fun tarAndCopyTo(files: Collection<FilePathRelativeToDir>, destTar: WindowsFilePath) {
val linuxTarFile = createTempFile()
val listFile = createTmpWinFile(distro)
listFile.first.writeText(files.joinToString("\n"))
LOGGER.info("Creating tar")
// See tar(1)
distro.runCommand("tar", "cf", linuxTarFile, "-m", "-h", "-O", "-C", dir, "-T", listFile.second)
listFile.first.delete()
LOGGER.info("Copying tar")
distro.runCommand("cp", linuxTarFile, distro.getWslPath(destTar))
distro.runCommand("rm", linuxTarFile)
}
override fun unTar(tarFile: LinuxFilePath) {
LOGGER.info("Unpacking")
distro.runCommand("mkdir", "-p", dir)
distro.runCommand("tar", "xf", tarFile, "-C", dir)
}
/**
* Read `wslhash` stdout and return map of [file->hash]
*/
private fun getHashesInternal(toolStdout: InputStream): List<WslHashRecord> {
val result = ArrayList<WslHashRecord>(AVG_NUM_FILES)
val fileOutput = ByteBuffer.wrap(toolStdout.readAllBytes()).order(ByteOrder.LITTLE_ENDIAN)
// Linux side only works with UTF of 7-bit ASCII which is also supported by UTF and WSL doesn't support other charsets
val charset = Charsets.UTF_8
// See wslhash.c: format is the following: [file_path]:[hash].
// Hash is little-endian 8 byte (64 bit) integer
val separator = charset.encode(":").get()
var fileStarted = 0
val limit = fileOutput.limit()
while (fileOutput.position() < limit) {
val byte = fileOutput.get()
if (byte == separator) {
val hash = fileOutput.long
val prevPos = fileOutput.position()
// 9 = 8 bytes long + separator
val message = charset.decode(fileOutput.limit(prevPos - 9).position(fileStarted))
fileOutput.limit(limit).position(prevPos)
val name = message.toString()
result += WslHashRecord(name, hash)
fileStarted = prevPos
}
}
return result
}
}

View File

@@ -0,0 +1,111 @@
// Copyright 2000-2022 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license.
package com.intellij.execution.wsl.sync
import com.intellij.execution.wsl.AbstractWslDistribution
import com.intellij.openapi.diagnostic.Logger
import com.intellij.util.TimeoutUtil
import com.intellij.util.io.*
import net.jpountz.xxhash.XXHashFactory
import java.nio.channels.FileChannel
import java.nio.file.*
import java.nio.file.attribute.BasicFileAttributes
import java.util.concurrent.CompletableFuture
import kotlin.io.path.extension
import kotlin.io.path.listDirectoryEntries
import kotlin.io.path.notExists
private val LOGGER = Logger.getInstance(WindowsFileStorage::class.java)
private class MyFileVisitor(private val onlyExtensions: Array<String>,
private val dir: Path,
private val process: (relativeToDir: FilePathRelativeToDir, file: Path, attrs: BasicFileAttributes) -> Unit) : SimpleFileVisitor<Path>() {
override fun visitFile(file: Path, attrs: BasicFileAttributes): FileVisitResult {
if (!(attrs.isRegularFile)) return FileVisitResult.CONTINUE
if (onlyExtensions.isNotEmpty() && file.extension !in onlyExtensions) {
// Skip because we don't care about this extension
return FileVisitResult.CONTINUE
}
val name = dir.relativize(file).joinToString("/").lowercase()
process(name, file, attrs)
return FileVisitResult.CONTINUE
}
}
internal class WindowsFileStorage(dir: Path, distro: AbstractWslDistribution, onlyExtensions: Array<String>)
: FileStorage<WindowsFilePath, LinuxFilePath>(dir, distro, onlyExtensions) {
override fun getAllFilesInDir(): Collection<FilePathRelativeToDir> {
val result = ArrayList<FilePathRelativeToDir>(AVG_NUM_FILES)
val visitor = MyFileVisitor(onlyExtensions, dir) { relativeToDir: FilePathRelativeToDir, _: Path, _: BasicFileAttributes ->
result.add(relativeToDir)
}
Files.walkFileTree(dir, visitor)
return result
}
override fun getHashes(): List<WslHashRecord> {
val result = ArrayList<WslHashRecord>(AVG_NUM_FILES)
val hashTool = XXHashFactory.nativeInstance().hash64() // Native hash can access direct (mapped) buffer a little-bit faster
val visitor = MyFileVisitor(onlyExtensions, dir) { relativeToDir: FilePathRelativeToDir, file: Path, attrs: BasicFileAttributes ->
if (attrs.size() == 0L) {
// Empty file's hash is 0, see wslhash.c
result.add(WslHashRecord(relativeToDir, 0))
}
else {
// Map file and read hash
FileChannel.open(file, StandardOpenOption.READ).use {
val buf = it.map(FileChannel.MapMode.READ_ONLY, 0, attrs.size())
try {
result.add(WslHashRecord(relativeToDir, hashTool.hash(buf, 0))) // Seed 0 is default, see wslhash.c
}
finally {
ByteBufferUtil.cleanBuffer(buf) // Unmap file: can't overwrite mapped file
}
}
}
}
val time = TimeoutUtil.measureExecutionTime<Throwable> {
Files.walkFileTree(dir, visitor)
}
LOGGER.info("Windows files calculated in $time")
return result
}
override fun isEmpty(): Boolean = dir.notExists() || dir.listDirectoryEntries().isEmpty()
override fun removeFiles(filesToRemove: Collection<FilePathRelativeToDir>) {
if (filesToRemove.isEmpty()) return
for (file in filesToRemove) {
val fileToDelete = dir.resolve(file)
assert(dir.isAncestor(fileToDelete))
Files.delete(fileToDelete)
}
LOGGER.info("${filesToRemove.size} files removed")
}
override fun createTempFile(): Path = createTmpWinFile(distro).first
override fun unTar(tarFile: WindowsFilePath) {
LOGGER.info("Unpacking")
Decompressor.Tar(tarFile).extract(dir)
}
override fun tarAndCopyTo(files: Collection<FilePathRelativeToDir>, destTar: LinuxFilePath) {
LOGGER.info("Creating tar")
val tarFile = createTempFile()
val feature = CompletableFuture.supplyAsync {
Compressor.Tar(tarFile.toFile(), Compressor.Tar.Compression.NONE).use { tar ->
for (relativeFile in files) {
tar.addFile(relativeFile, dir.resolve(relativeFile))
}
}
}
val dest = distro.getUNCRootVirtualFile(true)!!.toNioPath().resolve(destTar)
feature.get()
LOGGER.info("Copying")
Files.copy(tarFile, dest)
tarFile.delete()
}
override fun removeTempFile(file: WindowsFilePath) {
file.delete()
}
}

View File

@@ -0,0 +1,4 @@
// Copyright 2000-2022 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license.
package com.intellij.execution.wsl.sync
internal class WslHashRecord(val file: String, val hash: Long, val fileLowerCase: String = file.lowercase())

View File

@@ -0,0 +1,122 @@
// Copyright 2000-2022 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license.
package com.intellij.execution.wsl.sync
import com.intellij.execution.process.ProcessIOExecutorService
import com.intellij.execution.wsl.AbstractWslDistribution
import com.intellij.openapi.diagnostic.Logger
import java.nio.file.Path
import java.util.concurrent.CompletableFuture.runAsync
import java.util.concurrent.CompletableFuture.supplyAsync
import java.util.concurrent.Future
/**
* When copying files in parallel we must split them to the several chunks (each chunk is separate .tar file).
* No reason to have less than [MIN_CHUNK_SIZE] files in one chunk: two files shouldn't be split to two chunks
*/
private const val MIN_CHUNK_SIZE = 1000
private val LOGGER = Logger.getInstance(WslSync::class.java)
class WslSync<SourceFile, DestFile> private constructor(private val source: FileStorage<SourceFile, DestFile>,
private val dest: FileStorage<DestFile, SourceFile>) {
companion object {
/**
* Makes [windowsDir] reflect [linuxDir] (or vice versa depending on [linToWinCopy]) on [distribution] much like rsync.
* Redundant files deleted, new/changed files copied.
* Set [onlyExtensions] if you only care about certain extensions.
* Direction depends on [linToWinCopy]
*/
fun syncWslFolders(linuxDir: String,
windowsDir: Path,
distribution: AbstractWslDistribution,
linToWinCopy: Boolean = true,
onlyExtensions: Array<String> = emptyArray()) {
val win = WindowsFileStorage(windowsDir, distribution, onlyExtensions)
val lin = LinuxFileStorage(linuxDir, distribution, onlyExtensions)
if (linToWinCopy) {
WslSync(lin, win)
}
else {
WslSync(win, lin)
}
}
}
init {
if (dest.isEmpty()) { //Shortcut: no need to sync anything, just copy everything
copyFilesInParallel(source.getAllFilesInDir())
}
else {
syncFoldersInternal()
}
}
private fun syncFoldersInternal() {
val sourceHashesFuture = supplyAsync({
source.getHashes()
}, ProcessIOExecutorService.INSTANCE)
val destHashesFuture = supplyAsync({
dest.getHashes()
}, ProcessIOExecutorService.INSTANCE)
val sourceHashes: MutableMap<String, WslHashRecord> = sourceHashesFuture.get().associateBy { it.fileLowerCase }.toMutableMap()
val destHashes: List<WslHashRecord> = destHashesFuture.get()
val destFilesToRemove = ArrayList<String>(AVG_NUM_FILES)
for (destRecord in destHashes) {
// Lowercase is to ignore case when comparing files since Win is case-insensitive
val sourceHashAndName = sourceHashes[destRecord.fileLowerCase]
if (sourceHashAndName != null && sourceHashAndName.hash == destRecord.hash) {
// Dest file matches Source file
// Remove this record, so at the end there will be a list of files to copy from SRC to DST
sourceHashes.remove(destRecord.fileLowerCase)
}
else if (sourceHashAndName == null) {
// No such file on Source, remove it from Dest
destFilesToRemove.add(destRecord.file) // Lin is case-sensitive so we must use real file name, not lowerecased as we used for cmp
}
}
copyFilesInParallel(sourceHashes.values.map { it.file })
dest.removeFiles(destFilesToRemove)
}
/**
* Copies [filesToCopy] from src to dst
* It may split files to the several chunks to copy them in parallel, see [MIN_CHUNK_SIZE]
*/
private fun copyFilesInParallel(filesToCopy: Collection<String>) {
if (filesToCopy.isEmpty()) return
LOGGER.info("Will copy ${filesToCopy.size} files")
// Copy files in parallel
// 4 suggested by V.Lagunov and https://pkolaczk.github.io/disk-parallelism/
val chunkSize = (filesToCopy.size / 4).coerceAtLeast(MIN_CHUNK_SIZE)
val parts = filesToCopy.size / chunkSize
if (parts == 0) {
copyFilesToOtherSide(filesToCopy)
//copyFilesFromSourceToDest(distribution, filesToCopy, linuxDir, windowsDir)
}
else {
LOGGER.info("Split to $parts chunks")
val futures = ArrayList<Future<*>>(parts)
for (chunk in filesToCopy.chunked(chunkSize)) {
futures += runAsync({
copyFilesToOtherSide(chunk)
}, ProcessIOExecutorService.INSTANCE)
}
futures.forEach { it.get() }
}
LOGGER.info("Copied")
}
private fun copyFilesToOtherSide(files: Collection<String>) {
val destTar = dest.createTempFile()
source.tarAndCopyTo(files, destTar)
dest.unTar(destTar)
dest.removeTempFile(destTar)
}
}

View File

@@ -0,0 +1,18 @@
// Copyright 2000-2022 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license.
package com.intellij.execution.wsl.sync
import com.intellij.execution.wsl.AbstractWslDistribution
import com.intellij.execution.wsl.getWslPath
import java.nio.file.Files
import java.nio.file.Path
typealias FilePathRelativeToDir = String
typealias WindowsFilePath = Path
typealias LinuxFilePath = String
internal const val AVG_NUM_FILES = 10_000
internal fun createTmpWinFile(distro: AbstractWslDistribution): Pair<WindowsFilePath, LinuxFilePath> {
val file = Files.createTempFile("intellij", "tmp")
return Pair(file, distro.getWslPath(file))
}