[properties] add UTF-8 to ISO-8859-1 fallback for properties files IDEA-360433

GitOrigin-RevId: f93174e21b407022c5a9f356c220aa3ac443a0b0
2025-12-15 11:53:49 +07:00 · 2024-10-10 17:08:53 +02:00
parent a082ee2095
commit 338475c607
4 changed files with 106 additions and 43 deletions
--- a/java/compiler/tests/com/intellij/compiler/CompilerEncodingServiceTest.java
+++ b/java/compiler/tests/com/intellij/compiler/CompilerEncodingServiceTest.java
@@ -1,12 +1,15 @@
 // Copyright 2000-2024 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license.
 package com.intellij.compiler;

+import com.intellij.openapi.application.WriteAction;
+import com.intellij.openapi.util.Ref;
 import com.intellij.openapi.util.registry.Registry;
 import com.intellij.openapi.util.registry.RegistryValue;
 import com.intellij.openapi.vfs.VirtualFile;
 import com.intellij.openapi.vfs.encoding.EncodingProjectManager;
 import com.intellij.testFramework.JavaPsiTestCase;
 import com.intellij.testFramework.PsiTestUtil;
+import com.intellij.util.ArrayUtil;
 import org.jetbrains.annotations.NotNull;

 import java.io.IOException;
@@ -51,6 +54,40 @@ public class CompilerEncodingServiceTest extends JavaPsiTestCase {
    assertSameElements(getService().getAllModuleEncodings(myModule), getProjectDefault());
  }

+  @SuppressWarnings({"TextBlockMigration", "NonAsciiCharacters"})
+  public void testPropertiesAutoEncoding() throws IOException {
+    //
+    final Ref<byte[]> content = Ref.create();
+    final VirtualFile file = createFile("test.properties");
+    WriteAction.run(() -> {
+      content.set(("one=1\n" +
+                   "two=2\n").getBytes(StandardCharsets.ISO_8859_1));
+      file.setBinaryContent(content.get());
+    });
+    file.setCharset(null);
+    assertEquals(StandardCharsets.UTF_8, file.getCharset());
+
+    //
+    WriteAction.run(() -> {
+      content.set(ArrayUtil.mergeArrays(content.get(), ("three=3️⃣\n" +
+                                                        "four=4️⃣\n").getBytes(StandardCharsets.UTF_8)));
+      file.setBinaryContent(content.get());
+    });
+    file.setCharset(null);
+    assertEquals(StandardCharsets.UTF_8, file.getCharset());
+
+    //
+    WriteAction.run(() -> {
+      content.set(ArrayUtil.mergeArrays(content.get(), ("five=fünf\n" +
+                                                        "six=sechs\n").getBytes(StandardCharsets.ISO_8859_1)));
+
+      file.setBinaryContent(content.get());
+    });
+    file.setCharset(null);
+    assertEquals(StandardCharsets.ISO_8859_1, file.getCharset());
+  }
+
+
  public void testPropertiesEncodingFeatureFlagTest() {
    RegistryValue registryValue = Registry.get("properties.file.encoding.legacy.support");
    try {
@@ -60,7 +97,8 @@ public class CompilerEncodingServiceTest extends JavaPsiTestCase {
      EncodingProjectManager.getInstance(myProject).setEncoding(file, WINDOWS_1251);

      assertSameElements(getService().getAllModuleEncodings(myModule), getProjectDefault());
-    } finally {
+    }
+    finally {
      registryValue.resetToDefault();
    }
  }
@@ -101,7 +139,8 @@ public class CompilerEncodingServiceTest extends JavaPsiTestCase {
      EncodingProjectManager.getInstance(myProject).setEncoding(fileB, WINDOWS_1252);

      assertSameElements(getService().getAllModuleEncodings(myModule), projectDefaultPlus(WINDOWS_1251));
-    } finally {
+    }
+    finally {
      registryValue.resetToDefault();
    }
  }
--- a/platform/core-api/api-dump-unreviewed.txt
+++ b/platform/core-api/api-dump-unreviewed.txt
@@ -1896,6 +1896,7 @@ f:com.intellij.openapi.fileTypes.CharsetUtil
 - <init>():V
 - s:extractCharsetFromFileContent(com.intellij.openapi.project.Project,com.intellij.openapi.vfs.VirtualFile,com.intellij.openapi.fileTypes.FileType,java.lang.CharSequence):java.nio.charset.Charset
 - s:findUnmappableCharacters(java.lang.CharSequence,java.nio.charset.Charset):com.intellij.openapi.util.TextRange
+- s:findUnmappableCharacters(java.nio.ByteBuffer,java.nio.charset.Charset):com.intellij.openapi.util.TextRange
 com.intellij.openapi.fileTypes.DirectoryFileType
 - com.intellij.openapi.fileTypes.FileType
 com.intellij.openapi.fileTypes.FileType
--- a/platform/core-api/src/com/intellij/openapi/fileTypes/CharsetUtil.java
+++ b/platform/core-api/src/com/intellij/openapi/fileTypes/CharsetUtil.java
@@ -2,7 +2,6 @@
 package com.intellij.openapi.fileTypes;

 import com.intellij.openapi.project.Project;
-import com.intellij.openapi.util.Ref;
 import com.intellij.openapi.util.TextRange;
 import com.intellij.openapi.util.text.StringUtil;
 import com.intellij.openapi.vfs.VirtualFile;
@@ -48,40 +47,22 @@ public final class CharsetUtil {
   * @return a {@code TextRange} representing the range of unmappable characters, or {@code null} if all characters can be mapped
   */
  public static @Nullable TextRange findUnmappableCharacters(@Nullable CharSequence text, @NotNull Charset charset) {
-    if(text == null || text.length() == 0) return null;
-    return findUnmappableRange(CharBuffer.wrap(text), Ref.create(), CharBuffer.allocate(text.length()), charset);
-  }
+    if (text == null || text.length() == 0) return null;

-  /**
-   * Identifies the range of characters that either fail to encode or decode properly with the specified charset.
-   *
-   * @param inputBuffer  the input character buffer to be checked
-   * @param encodedBufferRef a reference to the output byte buffer for storing encoded bytes
-   * @param decodedBuffer a character buffer to hold the decoded characters
-   * @param charset the charset used for encoding and decoding
-   * @return a {@code TextRange} object representing the range of unmappable characters, or {@code null} if all characters are mappable
-   */
-  private static @Nullable TextRange findUnmappableRange(@NotNull CharBuffer inputBuffer,
-                                                         @NotNull Ref<ByteBuffer> encodedBufferRef,
-                                                         @NotNull CharBuffer decodedBuffer,
-                                                         @NotNull Charset charset) {
+    CharBuffer inputBuffer = text instanceof CharBuffer ? (CharBuffer)text : CharBuffer.wrap(text);
    CharsetEncoder encoder = charset.newEncoder()
      .onUnmappableCharacter(CodingErrorAction.REPORT)
      .onMalformedInput(CodingErrorAction.REPORT);
-    int remainingChars = inputBuffer.limit();

-    ByteBuffer encodedBuffer = encodedBufferRef.get();
-    if (encodedBuffer == null) {
-      encodedBufferRef.set(encodedBuffer = ByteBuffer.allocate((int)(encoder.averageBytesPerChar() * remainingChars)));
-    }
-    encodedBuffer.rewind();
-    encodedBuffer.limit(encodedBuffer.capacity());
+    int remainingChars = inputBuffer.remaining();
+    ByteBuffer encodedBuffer = ByteBuffer.allocate((int)(encoder.maxBytesPerChar() * remainingChars));
    inputBuffer.rewind();
-    inputBuffer.position(0);
+    encodedBuffer.clear();
+
    CoderResult encodeResult;

    while (true) {
-      encodeResult = inputBuffer.hasRemaining() ? encoder.encode(inputBuffer, encodedBuffer, true) : CoderResult.UNDERFLOW;
+      encodeResult = encoder.encode(inputBuffer, encodedBuffer, true);  // Кодируем символы
      if (encodeResult.isUnderflow()) {
        encodeResult = encoder.flush(encodedBuffer);
      }
@@ -89,27 +70,22 @@ public final class CharsetUtil {
        break;
      }

-      ByteBuffer tempBuffer = ByteBuffer.allocate(3 * encodedBuffer.capacity() / 2 + 1);
+      ByteBuffer tempBuffer = ByteBuffer.allocate(2 * encodedBuffer.capacity());
      encodedBuffer.flip();
      tempBuffer.put(encodedBuffer);
-      encodedBufferRef.set(encodedBuffer = tempBuffer);
+      encodedBuffer = tempBuffer;
    }

    if (encodeResult.isError()) {
      return TextRange.from(inputBuffer.position(), encodeResult.length());
    }

-    int encodedLength = encodedBuffer.position();
-    CharsetDecoder decoder = charset.newDecoder()
-      .onUnmappableCharacter(CodingErrorAction.REPORT)
-      .onMalformedInput(CodingErrorAction.REPORT);
-    encodedBuffer.rewind();
-    encodedBuffer.limit(encodedLength);
-    decodedBuffer.rewind();
+    encodedBuffer.flip();
+    CharBuffer decodedBuffer = CharBuffer.allocate(encodedBuffer.remaining());
+    TextRange range = findUnmappableRange(encodedBuffer, charset, decodedBuffer);

-    CoderResult decodeResult = decoder.decode(encodedBuffer, decodedBuffer, true);
-    if (decodeResult.isError()) {
-      return TextRange.from(decodedBuffer.position(), decodeResult.length());
+    if (range != null) {
+      return range;
    }

    if (decodedBuffer.position() != remainingChars) {
@@ -117,9 +93,48 @@ public final class CharsetUtil {
    }

    inputBuffer.rewind();
-    inputBuffer.position(0);
    decodedBuffer.rewind();
    int commonPrefixLength = StringUtil.commonPrefixLength(inputBuffer, decodedBuffer);
+
    return commonPrefixLength == remainingChars ? null : TextRange.from(commonPrefixLength, 1);
  }
+
+  /**
+   * Checks if the given byte buffer contains unmappable characters for the specified charset.
+   *
+   * @param byteBuffer the byte buffer to be checked
+   * @param charset    the charset to be used for decoding
+   * @return a {@code TextRange} representing the range of unmappable characters, or {@code null} if all characters can be mapped
+   */
+  public static @Nullable TextRange findUnmappableCharacters(@NotNull ByteBuffer byteBuffer, @NotNull Charset charset) {
+    return findUnmappableRange(byteBuffer, charset, CharBuffer.allocate(byteBuffer.remaining()));
+  }
+
+  /**
+   * Identifies the range of unmappable characters in the byte buffer during decoding with the specified charset.
+   *
+   * @param byteBuffer    the input byte buffer to be checked
+   * @param charset       the charset used for decoding
+   * @param decodedBuffer the buffer to store the result of decoding; must have enough capacity to hold the decoded characters
+   * @return a {@code TextRange} object representing the range of unmappable characters, or {@code null} if all characters are mappable
+   */
+  private static @Nullable TextRange findUnmappableRange(@NotNull ByteBuffer byteBuffer,
+                                                         @NotNull Charset charset,
+                                                         @NotNull CharBuffer decodedBuffer) {
+    CharsetDecoder decoder = charset.newDecoder()
+      .onUnmappableCharacter(CodingErrorAction.REPORT)
+      .onMalformedInput(CodingErrorAction.REPORT);
+
+    CoderResult result = decoder.decode(byteBuffer, decodedBuffer, true);
+    if (result.isError()) {
+      return TextRange.from(byteBuffer.position(), result.length());
+    }
+
+    result = decoder.flush(decodedBuffer);
+    if (result.isError()) {
+      return TextRange.from(byteBuffer.position(), result.length());
+    }
+
+    return null;
+  }
 }
--- a/plugins/properties/properties-psi-api/src/com/intellij/lang/properties/PropertiesFileType.java
+++ b/plugins/properties/properties-psi-api/src/com/intellij/lang/properties/PropertiesFileType.java
@@ -2,6 +2,7 @@
 package com.intellij.lang.properties;

 import com.intellij.lang.properties.charset.Native2AsciiCharset;
+import com.intellij.openapi.fileTypes.CharsetUtil;
 import com.intellij.openapi.fileTypes.LanguageFileType;
 import com.intellij.openapi.util.registry.Registry;
 import com.intellij.openapi.vfs.VirtualFile;
@@ -11,6 +12,7 @@ import com.intellij.ui.PlatformIcons;
 import org.jetbrains.annotations.NotNull;

 import javax.swing.*;
+import java.nio.ByteBuffer;
 import java.nio.charset.Charset;
 import java.nio.charset.StandardCharsets;

@@ -48,6 +50,11 @@ public final class PropertiesFileType extends LanguageFileType {
    Charset charset = EncodingRegistry.getInstance().getDefaultCharsetForPropertiesFiles(file);
    if (charset == null) {
      charset = getDefaultCharset();
+      if (content.length > 0 &&
+          StandardCharsets.UTF_8.equals(charset) &&
+          CharsetUtil.findUnmappableCharacters(ByteBuffer.wrap(content), StandardCharsets.UTF_8) != null) {
+        charset = StandardCharsets.ISO_8859_1;
+      }
    }
    if (EncodingRegistry.getInstance().isNative2Ascii(file)) {
      charset = Native2AsciiCharset.wrap(charset);
@@ -56,9 +63,10 @@ public final class PropertiesFileType extends LanguageFileType {
  }

  public @NotNull Charset getDefaultCharset() {
-    if(Registry.is("properties.file.encoding.legacy.support", false)) {
+    if (Registry.is("properties.file.encoding.legacy.support", false)) {
      return StandardCharsets.ISO_8859_1;
-    } else {
+    }
+    else {
      return StandardCharsets.UTF_8;
    }
  }