[properties] add UTF-8 to ISO-8859-1 fallback for properties files IDEA-360433

GitOrigin-RevId: f93174e21b407022c5a9f356c220aa3ac443a0b0
This commit is contained in:
Aleksey Dobrynin
2024-10-10 17:08:53 +02:00
committed by intellij-monorepo-bot
parent a082ee2095
commit 338475c607
4 changed files with 106 additions and 43 deletions

View File

@@ -1,12 +1,15 @@
// Copyright 2000-2024 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license.
package com.intellij.compiler;
import com.intellij.openapi.application.WriteAction;
import com.intellij.openapi.util.Ref;
import com.intellij.openapi.util.registry.Registry;
import com.intellij.openapi.util.registry.RegistryValue;
import com.intellij.openapi.vfs.VirtualFile;
import com.intellij.openapi.vfs.encoding.EncodingProjectManager;
import com.intellij.testFramework.JavaPsiTestCase;
import com.intellij.testFramework.PsiTestUtil;
import com.intellij.util.ArrayUtil;
import org.jetbrains.annotations.NotNull;
import java.io.IOException;
@@ -51,6 +54,40 @@ public class CompilerEncodingServiceTest extends JavaPsiTestCase {
assertSameElements(getService().getAllModuleEncodings(myModule), getProjectDefault());
}
@SuppressWarnings({"TextBlockMigration", "NonAsciiCharacters"})
public void testPropertiesAutoEncoding() throws IOException {
//
final Ref<byte[]> content = Ref.create();
final VirtualFile file = createFile("test.properties");
WriteAction.run(() -> {
content.set(("one=1\n" +
"two=2\n").getBytes(StandardCharsets.ISO_8859_1));
file.setBinaryContent(content.get());
});
file.setCharset(null);
assertEquals(StandardCharsets.UTF_8, file.getCharset());
//
WriteAction.run(() -> {
content.set(ArrayUtil.mergeArrays(content.get(), ("three=3\n" +
"four=4\n").getBytes(StandardCharsets.UTF_8)));
file.setBinaryContent(content.get());
});
file.setCharset(null);
assertEquals(StandardCharsets.UTF_8, file.getCharset());
//
WriteAction.run(() -> {
content.set(ArrayUtil.mergeArrays(content.get(), ("five=fünf\n" +
"six=sechs\n").getBytes(StandardCharsets.ISO_8859_1)));
file.setBinaryContent(content.get());
});
file.setCharset(null);
assertEquals(StandardCharsets.ISO_8859_1, file.getCharset());
}
public void testPropertiesEncodingFeatureFlagTest() {
RegistryValue registryValue = Registry.get("properties.file.encoding.legacy.support");
try {
@@ -60,7 +97,8 @@ public class CompilerEncodingServiceTest extends JavaPsiTestCase {
EncodingProjectManager.getInstance(myProject).setEncoding(file, WINDOWS_1251);
assertSameElements(getService().getAllModuleEncodings(myModule), getProjectDefault());
} finally {
}
finally {
registryValue.resetToDefault();
}
}
@@ -101,7 +139,8 @@ public class CompilerEncodingServiceTest extends JavaPsiTestCase {
EncodingProjectManager.getInstance(myProject).setEncoding(fileB, WINDOWS_1252);
assertSameElements(getService().getAllModuleEncodings(myModule), projectDefaultPlus(WINDOWS_1251));
} finally {
}
finally {
registryValue.resetToDefault();
}
}

View File

@@ -1896,6 +1896,7 @@ f:com.intellij.openapi.fileTypes.CharsetUtil
- <init>():V
- s:extractCharsetFromFileContent(com.intellij.openapi.project.Project,com.intellij.openapi.vfs.VirtualFile,com.intellij.openapi.fileTypes.FileType,java.lang.CharSequence):java.nio.charset.Charset
- s:findUnmappableCharacters(java.lang.CharSequence,java.nio.charset.Charset):com.intellij.openapi.util.TextRange
- s:findUnmappableCharacters(java.nio.ByteBuffer,java.nio.charset.Charset):com.intellij.openapi.util.TextRange
com.intellij.openapi.fileTypes.DirectoryFileType
- com.intellij.openapi.fileTypes.FileType
com.intellij.openapi.fileTypes.FileType

View File

@@ -2,7 +2,6 @@
package com.intellij.openapi.fileTypes;
import com.intellij.openapi.project.Project;
import com.intellij.openapi.util.Ref;
import com.intellij.openapi.util.TextRange;
import com.intellij.openapi.util.text.StringUtil;
import com.intellij.openapi.vfs.VirtualFile;
@@ -48,40 +47,22 @@ public final class CharsetUtil {
* @return a {@code TextRange} representing the range of unmappable characters, or {@code null} if all characters can be mapped
*/
public static @Nullable TextRange findUnmappableCharacters(@Nullable CharSequence text, @NotNull Charset charset) {
if(text == null || text.length() == 0) return null;
return findUnmappableRange(CharBuffer.wrap(text), Ref.create(), CharBuffer.allocate(text.length()), charset);
}
if (text == null || text.length() == 0) return null;
/**
* Identifies the range of characters that either fail to encode or decode properly with the specified charset.
*
* @param inputBuffer the input character buffer to be checked
* @param encodedBufferRef a reference to the output byte buffer for storing encoded bytes
* @param decodedBuffer a character buffer to hold the decoded characters
* @param charset the charset used for encoding and decoding
* @return a {@code TextRange} object representing the range of unmappable characters, or {@code null} if all characters are mappable
*/
private static @Nullable TextRange findUnmappableRange(@NotNull CharBuffer inputBuffer,
@NotNull Ref<ByteBuffer> encodedBufferRef,
@NotNull CharBuffer decodedBuffer,
@NotNull Charset charset) {
CharBuffer inputBuffer = text instanceof CharBuffer ? (CharBuffer)text : CharBuffer.wrap(text);
CharsetEncoder encoder = charset.newEncoder()
.onUnmappableCharacter(CodingErrorAction.REPORT)
.onMalformedInput(CodingErrorAction.REPORT);
int remainingChars = inputBuffer.limit();
ByteBuffer encodedBuffer = encodedBufferRef.get();
if (encodedBuffer == null) {
encodedBufferRef.set(encodedBuffer = ByteBuffer.allocate((int)(encoder.averageBytesPerChar() * remainingChars)));
}
encodedBuffer.rewind();
encodedBuffer.limit(encodedBuffer.capacity());
int remainingChars = inputBuffer.remaining();
ByteBuffer encodedBuffer = ByteBuffer.allocate((int)(encoder.maxBytesPerChar() * remainingChars));
inputBuffer.rewind();
inputBuffer.position(0);
encodedBuffer.clear();
CoderResult encodeResult;
while (true) {
encodeResult = inputBuffer.hasRemaining() ? encoder.encode(inputBuffer, encodedBuffer, true) : CoderResult.UNDERFLOW;
encodeResult = encoder.encode(inputBuffer, encodedBuffer, true); // Кодируем символы
if (encodeResult.isUnderflow()) {
encodeResult = encoder.flush(encodedBuffer);
}
@@ -89,27 +70,22 @@ public final class CharsetUtil {
break;
}
ByteBuffer tempBuffer = ByteBuffer.allocate(3 * encodedBuffer.capacity() / 2 + 1);
ByteBuffer tempBuffer = ByteBuffer.allocate(2 * encodedBuffer.capacity());
encodedBuffer.flip();
tempBuffer.put(encodedBuffer);
encodedBufferRef.set(encodedBuffer = tempBuffer);
encodedBuffer = tempBuffer;
}
if (encodeResult.isError()) {
return TextRange.from(inputBuffer.position(), encodeResult.length());
}
int encodedLength = encodedBuffer.position();
CharsetDecoder decoder = charset.newDecoder()
.onUnmappableCharacter(CodingErrorAction.REPORT)
.onMalformedInput(CodingErrorAction.REPORT);
encodedBuffer.rewind();
encodedBuffer.limit(encodedLength);
decodedBuffer.rewind();
encodedBuffer.flip();
CharBuffer decodedBuffer = CharBuffer.allocate(encodedBuffer.remaining());
TextRange range = findUnmappableRange(encodedBuffer, charset, decodedBuffer);
CoderResult decodeResult = decoder.decode(encodedBuffer, decodedBuffer, true);
if (decodeResult.isError()) {
return TextRange.from(decodedBuffer.position(), decodeResult.length());
if (range != null) {
return range;
}
if (decodedBuffer.position() != remainingChars) {
@@ -117,9 +93,48 @@ public final class CharsetUtil {
}
inputBuffer.rewind();
inputBuffer.position(0);
decodedBuffer.rewind();
int commonPrefixLength = StringUtil.commonPrefixLength(inputBuffer, decodedBuffer);
return commonPrefixLength == remainingChars ? null : TextRange.from(commonPrefixLength, 1);
}
/**
* Checks if the given byte buffer contains unmappable characters for the specified charset.
*
* @param byteBuffer the byte buffer to be checked
* @param charset the charset to be used for decoding
* @return a {@code TextRange} representing the range of unmappable characters, or {@code null} if all characters can be mapped
*/
public static @Nullable TextRange findUnmappableCharacters(@NotNull ByteBuffer byteBuffer, @NotNull Charset charset) {
return findUnmappableRange(byteBuffer, charset, CharBuffer.allocate(byteBuffer.remaining()));
}
/**
* Identifies the range of unmappable characters in the byte buffer during decoding with the specified charset.
*
* @param byteBuffer the input byte buffer to be checked
* @param charset the charset used for decoding
* @param decodedBuffer the buffer to store the result of decoding; must have enough capacity to hold the decoded characters
* @return a {@code TextRange} object representing the range of unmappable characters, or {@code null} if all characters are mappable
*/
private static @Nullable TextRange findUnmappableRange(@NotNull ByteBuffer byteBuffer,
@NotNull Charset charset,
@NotNull CharBuffer decodedBuffer) {
CharsetDecoder decoder = charset.newDecoder()
.onUnmappableCharacter(CodingErrorAction.REPORT)
.onMalformedInput(CodingErrorAction.REPORT);
CoderResult result = decoder.decode(byteBuffer, decodedBuffer, true);
if (result.isError()) {
return TextRange.from(byteBuffer.position(), result.length());
}
result = decoder.flush(decodedBuffer);
if (result.isError()) {
return TextRange.from(byteBuffer.position(), result.length());
}
return null;
}
}

View File

@@ -2,6 +2,7 @@
package com.intellij.lang.properties;
import com.intellij.lang.properties.charset.Native2AsciiCharset;
import com.intellij.openapi.fileTypes.CharsetUtil;
import com.intellij.openapi.fileTypes.LanguageFileType;
import com.intellij.openapi.util.registry.Registry;
import com.intellij.openapi.vfs.VirtualFile;
@@ -11,6 +12,7 @@ import com.intellij.ui.PlatformIcons;
import org.jetbrains.annotations.NotNull;
import javax.swing.*;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
@@ -48,6 +50,11 @@ public final class PropertiesFileType extends LanguageFileType {
Charset charset = EncodingRegistry.getInstance().getDefaultCharsetForPropertiesFiles(file);
if (charset == null) {
charset = getDefaultCharset();
if (content.length > 0 &&
StandardCharsets.UTF_8.equals(charset) &&
CharsetUtil.findUnmappableCharacters(ByteBuffer.wrap(content), StandardCharsets.UTF_8) != null) {
charset = StandardCharsets.ISO_8859_1;
}
}
if (EncodingRegistry.getInstance().isNative2Ascii(file)) {
charset = Native2AsciiCharset.wrap(charset);
@@ -56,9 +63,10 @@ public final class PropertiesFileType extends LanguageFileType {
}
public @NotNull Charset getDefaultCharset() {
if(Registry.is("properties.file.encoding.legacy.support", false)) {
if (Registry.is("properties.file.encoding.legacy.support", false)) {
return StandardCharsets.ISO_8859_1;
} else {
}
else {
return StandardCharsets.UTF_8;
}
}