mirror of
https://gitflic.ru/project/openide/openide.git
synced 2025-12-15 11:53:49 +07:00
[properties] add UTF-8 to ISO-8859-1 fallback for properties files IDEA-360433
GitOrigin-RevId: f93174e21b407022c5a9f356c220aa3ac443a0b0
This commit is contained in:
committed by
intellij-monorepo-bot
parent
a082ee2095
commit
338475c607
@@ -1,12 +1,15 @@
|
||||
// Copyright 2000-2024 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license.
|
||||
package com.intellij.compiler;
|
||||
|
||||
import com.intellij.openapi.application.WriteAction;
|
||||
import com.intellij.openapi.util.Ref;
|
||||
import com.intellij.openapi.util.registry.Registry;
|
||||
import com.intellij.openapi.util.registry.RegistryValue;
|
||||
import com.intellij.openapi.vfs.VirtualFile;
|
||||
import com.intellij.openapi.vfs.encoding.EncodingProjectManager;
|
||||
import com.intellij.testFramework.JavaPsiTestCase;
|
||||
import com.intellij.testFramework.PsiTestUtil;
|
||||
import com.intellij.util.ArrayUtil;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
|
||||
import java.io.IOException;
|
||||
@@ -51,6 +54,40 @@ public class CompilerEncodingServiceTest extends JavaPsiTestCase {
|
||||
assertSameElements(getService().getAllModuleEncodings(myModule), getProjectDefault());
|
||||
}
|
||||
|
||||
@SuppressWarnings({"TextBlockMigration", "NonAsciiCharacters"})
|
||||
public void testPropertiesAutoEncoding() throws IOException {
|
||||
//
|
||||
final Ref<byte[]> content = Ref.create();
|
||||
final VirtualFile file = createFile("test.properties");
|
||||
WriteAction.run(() -> {
|
||||
content.set(("one=1\n" +
|
||||
"two=2\n").getBytes(StandardCharsets.ISO_8859_1));
|
||||
file.setBinaryContent(content.get());
|
||||
});
|
||||
file.setCharset(null);
|
||||
assertEquals(StandardCharsets.UTF_8, file.getCharset());
|
||||
|
||||
//
|
||||
WriteAction.run(() -> {
|
||||
content.set(ArrayUtil.mergeArrays(content.get(), ("three=3️⃣\n" +
|
||||
"four=4️⃣\n").getBytes(StandardCharsets.UTF_8)));
|
||||
file.setBinaryContent(content.get());
|
||||
});
|
||||
file.setCharset(null);
|
||||
assertEquals(StandardCharsets.UTF_8, file.getCharset());
|
||||
|
||||
//
|
||||
WriteAction.run(() -> {
|
||||
content.set(ArrayUtil.mergeArrays(content.get(), ("five=fünf\n" +
|
||||
"six=sechs\n").getBytes(StandardCharsets.ISO_8859_1)));
|
||||
|
||||
file.setBinaryContent(content.get());
|
||||
});
|
||||
file.setCharset(null);
|
||||
assertEquals(StandardCharsets.ISO_8859_1, file.getCharset());
|
||||
}
|
||||
|
||||
|
||||
public void testPropertiesEncodingFeatureFlagTest() {
|
||||
RegistryValue registryValue = Registry.get("properties.file.encoding.legacy.support");
|
||||
try {
|
||||
@@ -60,7 +97,8 @@ public class CompilerEncodingServiceTest extends JavaPsiTestCase {
|
||||
EncodingProjectManager.getInstance(myProject).setEncoding(file, WINDOWS_1251);
|
||||
|
||||
assertSameElements(getService().getAllModuleEncodings(myModule), getProjectDefault());
|
||||
} finally {
|
||||
}
|
||||
finally {
|
||||
registryValue.resetToDefault();
|
||||
}
|
||||
}
|
||||
@@ -101,7 +139,8 @@ public class CompilerEncodingServiceTest extends JavaPsiTestCase {
|
||||
EncodingProjectManager.getInstance(myProject).setEncoding(fileB, WINDOWS_1252);
|
||||
|
||||
assertSameElements(getService().getAllModuleEncodings(myModule), projectDefaultPlus(WINDOWS_1251));
|
||||
} finally {
|
||||
}
|
||||
finally {
|
||||
registryValue.resetToDefault();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1896,6 +1896,7 @@ f:com.intellij.openapi.fileTypes.CharsetUtil
|
||||
- <init>():V
|
||||
- s:extractCharsetFromFileContent(com.intellij.openapi.project.Project,com.intellij.openapi.vfs.VirtualFile,com.intellij.openapi.fileTypes.FileType,java.lang.CharSequence):java.nio.charset.Charset
|
||||
- s:findUnmappableCharacters(java.lang.CharSequence,java.nio.charset.Charset):com.intellij.openapi.util.TextRange
|
||||
- s:findUnmappableCharacters(java.nio.ByteBuffer,java.nio.charset.Charset):com.intellij.openapi.util.TextRange
|
||||
com.intellij.openapi.fileTypes.DirectoryFileType
|
||||
- com.intellij.openapi.fileTypes.FileType
|
||||
com.intellij.openapi.fileTypes.FileType
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
package com.intellij.openapi.fileTypes;
|
||||
|
||||
import com.intellij.openapi.project.Project;
|
||||
import com.intellij.openapi.util.Ref;
|
||||
import com.intellij.openapi.util.TextRange;
|
||||
import com.intellij.openapi.util.text.StringUtil;
|
||||
import com.intellij.openapi.vfs.VirtualFile;
|
||||
@@ -48,40 +47,22 @@ public final class CharsetUtil {
|
||||
* @return a {@code TextRange} representing the range of unmappable characters, or {@code null} if all characters can be mapped
|
||||
*/
|
||||
public static @Nullable TextRange findUnmappableCharacters(@Nullable CharSequence text, @NotNull Charset charset) {
|
||||
if(text == null || text.length() == 0) return null;
|
||||
return findUnmappableRange(CharBuffer.wrap(text), Ref.create(), CharBuffer.allocate(text.length()), charset);
|
||||
}
|
||||
if (text == null || text.length() == 0) return null;
|
||||
|
||||
/**
|
||||
* Identifies the range of characters that either fail to encode or decode properly with the specified charset.
|
||||
*
|
||||
* @param inputBuffer the input character buffer to be checked
|
||||
* @param encodedBufferRef a reference to the output byte buffer for storing encoded bytes
|
||||
* @param decodedBuffer a character buffer to hold the decoded characters
|
||||
* @param charset the charset used for encoding and decoding
|
||||
* @return a {@code TextRange} object representing the range of unmappable characters, or {@code null} if all characters are mappable
|
||||
*/
|
||||
private static @Nullable TextRange findUnmappableRange(@NotNull CharBuffer inputBuffer,
|
||||
@NotNull Ref<ByteBuffer> encodedBufferRef,
|
||||
@NotNull CharBuffer decodedBuffer,
|
||||
@NotNull Charset charset) {
|
||||
CharBuffer inputBuffer = text instanceof CharBuffer ? (CharBuffer)text : CharBuffer.wrap(text);
|
||||
CharsetEncoder encoder = charset.newEncoder()
|
||||
.onUnmappableCharacter(CodingErrorAction.REPORT)
|
||||
.onMalformedInput(CodingErrorAction.REPORT);
|
||||
int remainingChars = inputBuffer.limit();
|
||||
|
||||
ByteBuffer encodedBuffer = encodedBufferRef.get();
|
||||
if (encodedBuffer == null) {
|
||||
encodedBufferRef.set(encodedBuffer = ByteBuffer.allocate((int)(encoder.averageBytesPerChar() * remainingChars)));
|
||||
}
|
||||
encodedBuffer.rewind();
|
||||
encodedBuffer.limit(encodedBuffer.capacity());
|
||||
int remainingChars = inputBuffer.remaining();
|
||||
ByteBuffer encodedBuffer = ByteBuffer.allocate((int)(encoder.maxBytesPerChar() * remainingChars));
|
||||
inputBuffer.rewind();
|
||||
inputBuffer.position(0);
|
||||
encodedBuffer.clear();
|
||||
|
||||
CoderResult encodeResult;
|
||||
|
||||
while (true) {
|
||||
encodeResult = inputBuffer.hasRemaining() ? encoder.encode(inputBuffer, encodedBuffer, true) : CoderResult.UNDERFLOW;
|
||||
encodeResult = encoder.encode(inputBuffer, encodedBuffer, true); // Кодируем символы
|
||||
if (encodeResult.isUnderflow()) {
|
||||
encodeResult = encoder.flush(encodedBuffer);
|
||||
}
|
||||
@@ -89,27 +70,22 @@ public final class CharsetUtil {
|
||||
break;
|
||||
}
|
||||
|
||||
ByteBuffer tempBuffer = ByteBuffer.allocate(3 * encodedBuffer.capacity() / 2 + 1);
|
||||
ByteBuffer tempBuffer = ByteBuffer.allocate(2 * encodedBuffer.capacity());
|
||||
encodedBuffer.flip();
|
||||
tempBuffer.put(encodedBuffer);
|
||||
encodedBufferRef.set(encodedBuffer = tempBuffer);
|
||||
encodedBuffer = tempBuffer;
|
||||
}
|
||||
|
||||
if (encodeResult.isError()) {
|
||||
return TextRange.from(inputBuffer.position(), encodeResult.length());
|
||||
}
|
||||
|
||||
int encodedLength = encodedBuffer.position();
|
||||
CharsetDecoder decoder = charset.newDecoder()
|
||||
.onUnmappableCharacter(CodingErrorAction.REPORT)
|
||||
.onMalformedInput(CodingErrorAction.REPORT);
|
||||
encodedBuffer.rewind();
|
||||
encodedBuffer.limit(encodedLength);
|
||||
decodedBuffer.rewind();
|
||||
encodedBuffer.flip();
|
||||
CharBuffer decodedBuffer = CharBuffer.allocate(encodedBuffer.remaining());
|
||||
TextRange range = findUnmappableRange(encodedBuffer, charset, decodedBuffer);
|
||||
|
||||
CoderResult decodeResult = decoder.decode(encodedBuffer, decodedBuffer, true);
|
||||
if (decodeResult.isError()) {
|
||||
return TextRange.from(decodedBuffer.position(), decodeResult.length());
|
||||
if (range != null) {
|
||||
return range;
|
||||
}
|
||||
|
||||
if (decodedBuffer.position() != remainingChars) {
|
||||
@@ -117,9 +93,48 @@ public final class CharsetUtil {
|
||||
}
|
||||
|
||||
inputBuffer.rewind();
|
||||
inputBuffer.position(0);
|
||||
decodedBuffer.rewind();
|
||||
int commonPrefixLength = StringUtil.commonPrefixLength(inputBuffer, decodedBuffer);
|
||||
|
||||
return commonPrefixLength == remainingChars ? null : TextRange.from(commonPrefixLength, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the given byte buffer contains unmappable characters for the specified charset.
|
||||
*
|
||||
* @param byteBuffer the byte buffer to be checked
|
||||
* @param charset the charset to be used for decoding
|
||||
* @return a {@code TextRange} representing the range of unmappable characters, or {@code null} if all characters can be mapped
|
||||
*/
|
||||
public static @Nullable TextRange findUnmappableCharacters(@NotNull ByteBuffer byteBuffer, @NotNull Charset charset) {
|
||||
return findUnmappableRange(byteBuffer, charset, CharBuffer.allocate(byteBuffer.remaining()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Identifies the range of unmappable characters in the byte buffer during decoding with the specified charset.
|
||||
*
|
||||
* @param byteBuffer the input byte buffer to be checked
|
||||
* @param charset the charset used for decoding
|
||||
* @param decodedBuffer the buffer to store the result of decoding; must have enough capacity to hold the decoded characters
|
||||
* @return a {@code TextRange} object representing the range of unmappable characters, or {@code null} if all characters are mappable
|
||||
*/
|
||||
private static @Nullable TextRange findUnmappableRange(@NotNull ByteBuffer byteBuffer,
|
||||
@NotNull Charset charset,
|
||||
@NotNull CharBuffer decodedBuffer) {
|
||||
CharsetDecoder decoder = charset.newDecoder()
|
||||
.onUnmappableCharacter(CodingErrorAction.REPORT)
|
||||
.onMalformedInput(CodingErrorAction.REPORT);
|
||||
|
||||
CoderResult result = decoder.decode(byteBuffer, decodedBuffer, true);
|
||||
if (result.isError()) {
|
||||
return TextRange.from(byteBuffer.position(), result.length());
|
||||
}
|
||||
|
||||
result = decoder.flush(decodedBuffer);
|
||||
if (result.isError()) {
|
||||
return TextRange.from(byteBuffer.position(), result.length());
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
package com.intellij.lang.properties;
|
||||
|
||||
import com.intellij.lang.properties.charset.Native2AsciiCharset;
|
||||
import com.intellij.openapi.fileTypes.CharsetUtil;
|
||||
import com.intellij.openapi.fileTypes.LanguageFileType;
|
||||
import com.intellij.openapi.util.registry.Registry;
|
||||
import com.intellij.openapi.vfs.VirtualFile;
|
||||
@@ -11,6 +12,7 @@ import com.intellij.ui.PlatformIcons;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
|
||||
import javax.swing.*;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
@@ -48,6 +50,11 @@ public final class PropertiesFileType extends LanguageFileType {
|
||||
Charset charset = EncodingRegistry.getInstance().getDefaultCharsetForPropertiesFiles(file);
|
||||
if (charset == null) {
|
||||
charset = getDefaultCharset();
|
||||
if (content.length > 0 &&
|
||||
StandardCharsets.UTF_8.equals(charset) &&
|
||||
CharsetUtil.findUnmappableCharacters(ByteBuffer.wrap(content), StandardCharsets.UTF_8) != null) {
|
||||
charset = StandardCharsets.ISO_8859_1;
|
||||
}
|
||||
}
|
||||
if (EncodingRegistry.getInstance().isNative2Ascii(file)) {
|
||||
charset = Native2AsciiCharset.wrap(charset);
|
||||
@@ -56,9 +63,10 @@ public final class PropertiesFileType extends LanguageFileType {
|
||||
}
|
||||
|
||||
public @NotNull Charset getDefaultCharset() {
|
||||
if(Registry.is("properties.file.encoding.legacy.support", false)) {
|
||||
if (Registry.is("properties.file.encoding.legacy.support", false)) {
|
||||
return StandardCharsets.ISO_8859_1;
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
return StandardCharsets.UTF_8;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user