Non-ASCII characters inspection

This commit is contained in:
Alexey Kudravtsev
2017-05-05 13:55:17 +03:00
parent 3c2d0f71c1
commit 759648df75
7 changed files with 426 additions and 0 deletions

View File

@@ -0,0 +1,9 @@
class X {
int <warning descr="Non-ASCII characters in an identifier">Ж</warning> = 0;
class <warning descr="Identifier contains symbols from different languages: [LATIN, CYRILLIC]"><warning descr="Non-ASCII characters in an identifier">InnerП</warning></warning> {}
// comment<warning descr="Non-ASCII characters in a comment">жп</warning> 234
String s = "12<warning descr="Non-ASCII characters in a string literal">л</warning>3<warning descr="Non-ASCII characters in a string literal">орыва</warning>0";
void <warning descr="Non-ASCII characters in an identifier">жжж</warning>() {
жжж();
}
}

View File

@@ -0,0 +1,56 @@
/*
* Copyright 2000-2017 JetBrains s.r.o.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Created by IntelliJ IDEA.
* User: max
* Date: Apr 11, 2002
* Time: 7:51:16 PM
* To change template for new class use
* Code Style | Class Templates options (Tools | IDE Options).
*/
package com.intellij.codeInsight.daemon;
import com.intellij.codeInspection.NonAsciiCharactersInspection;
import com.intellij.codeInspection.LocalInspectionTool;
import com.intellij.util.ui.UIUtil;
import org.jetbrains.annotations.NonNls;
import org.jetbrains.annotations.NotNull;
public class NonAsciiCharactersTest extends DaemonAnalyzerTestCase {
@NonNls private static final String BASE_PATH = "/codeInsight/daemonCodeAnalyzer/nonAsciiCharacters";
@NotNull
@Override
protected LocalInspectionTool[] configureLocalInspectionTools() {
NonAsciiCharactersInspection inspection = new NonAsciiCharactersInspection();
inspection.CHECK_FOR_DIFFERENT_LANGUAGES_IN_IDENTIFIER_NAME = true;
inspection.CHECK_FOR_NOT_ASCII_IDENTIFIER_NAME = true;
inspection.CHECK_FOR_NOT_ASCII_COMMENT = true;
inspection.CHECK_FOR_NOT_ASCII_STRING_LITERAL = true;
inspection.CHECK_FOR_FILES_CONTAINING_BOM = true;
return new LocalInspectionTool[]{inspection};
}
private void doTest() throws Exception {
doTest(BASE_PATH + "/" + getTestName(false)+".java", true, false);
UIUtil.dispatchAllInvocationEvents();
}
public void testSimple() throws Exception {
doTest();
}
}

View File

@@ -0,0 +1,111 @@
<?xml version="1.0" encoding="UTF-8"?>
<form xmlns="http://www.intellij.com/uidesigner/form/" version="1" bind-to-class="com.intellij.codeInspection.NonAsciiCharactersInspection">
<grid id="27dc6" binding="myPanel" layout-manager="GridLayoutManager" row-count="7" column-count="2" same-size-horizontally="false" same-size-vertically="false" hgap="-1" vgap="-1">
<margin top="0" left="0" bottom="0" right="0"/>
<constraints>
<xy x="20" y="20" width="640" height="400"/>
</constraints>
<properties/>
<border type="none"/>
<children>
<component id="be8a8" class="com.intellij.ui.TitledSeparator">
<constraints>
<grid row="0" column="0" row-span="1" col-span="1" vsize-policy="0" hsize-policy="3" anchor="8" fill="0" indent="0" use-parent-layout="false"/>
</constraints>
<properties>
<text value="Warn of:"/>
</properties>
</component>
<hspacer id="9623b">
<constraints>
<grid row="0" column="1" row-span="1" col-span="1" vsize-policy="1" hsize-policy="6" anchor="0" fill="1" indent="0" use-parent-layout="false"/>
</constraints>
</hspacer>
<vspacer id="8f7a7">
<constraints>
<grid row="6" column="0" row-span="1" col-span="1" vsize-policy="6" hsize-policy="1" anchor="0" fill="2" indent="0" use-parent-layout="false"/>
</constraints>
</vspacer>
<component id="d43de" class="com.intellij.ui.components.JBCheckBox" binding="myASCIIIdentifiers">
<constraints>
<grid row="1" column="0" row-span="1" col-span="1" vsize-policy="0" hsize-policy="0" anchor="8" fill="0" indent="1" use-parent-layout="false"/>
</constraints>
<properties>
<text value="Non-ASCII characters in Identifiers"/>
</properties>
</component>
<component id="e481e" class="com.intellij.ui.components.JBCheckBox" binding="myASCIIComments">
<constraints>
<grid row="2" column="0" row-span="1" col-span="1" vsize-policy="0" hsize-policy="0" anchor="8" fill="0" indent="1" use-parent-layout="false"/>
</constraints>
<properties>
<text value="Non-ASCII characters in comments"/>
</properties>
</component>
<component id="3c6a" class="com.intellij.ui.components.JBCheckBox" binding="myASCIIStringLiterals">
<constraints>
<grid row="3" column="0" row-span="1" col-span="1" vsize-policy="0" hsize-policy="0" anchor="8" fill="0" indent="1" use-parent-layout="false"/>
</constraints>
<properties>
<text value="Non-ASCII characters in strings"/>
</properties>
</component>
<component id="21230" class="com.intellij.ui.components.JBCheckBox" binding="myAlienIdentifiers">
<constraints>
<grid row="4" column="0" row-span="1" col-span="1" vsize-policy="0" hsize-policy="0" anchor="8" fill="0" indent="1" use-parent-layout="false"/>
</constraints>
<properties>
<text value="Different languages in identifiers"/>
</properties>
</component>
<component id="92c40" class="com.intellij.ui.components.JBCheckBox" binding="myFilesContainingBOM">
<constraints>
<grid row="5" column="0" row-span="1" col-span="1" vsize-policy="0" hsize-policy="0" anchor="8" fill="0" indent="1" use-parent-layout="false"/>
</constraints>
<properties>
<text value="Files containing BOM"/>
</properties>
</component>
<component id="539c6" class="com.intellij.ui.components.JBLabel">
<constraints>
<grid row="1" column="1" row-span="1" col-span="1" vsize-policy="0" hsize-policy="0" anchor="8" fill="0" indent="0" use-parent-layout="false"/>
</constraints>
<properties>
<text value="&lt;html&gt;E.g. &lt;code&gt;&quot;int Поле = 0;&quot;&lt;/code&gt;"/>
</properties>
</component>
<component id="9077a" class="com.intellij.ui.components.JBLabel">
<constraints>
<grid row="2" column="1" row-span="1" col-span="1" vsize-policy="0" hsize-policy="0" anchor="8" fill="0" indent="0" use-parent-layout="false"/>
</constraints>
<properties>
<text value="&lt;html&gt;E.g. &lt;code&gt;&quot;// hello გენაცვალი&lt;/code&gt;"/>
</properties>
</component>
<component id="8797f" class="com.intellij.ui.components.JBLabel">
<constraints>
<grid row="3" column="1" row-span="1" col-span="1" vsize-policy="0" hsize-policy="0" anchor="8" fill="0" indent="0" use-parent-layout="false"/>
</constraints>
<properties>
<text value="&lt;html&gt;E.g. &lt;code&gt;&quot;println(&quot;ษ ฤๅษี&quot;)&lt;/code&gt;"/>
</properties>
</component>
<component id="394d1" class="com.intellij.ui.components.JBLabel">
<constraints>
<grid row="4" column="1" row-span="1" col-span="1" vsize-policy="0" hsize-policy="0" anchor="8" fill="0" indent="0" use-parent-layout="false"/>
</constraints>
<properties>
<text value="&lt;html&gt;E.g. &lt;code&gt;&quot;void printЕрунда()&quot;&lt;/code&gt;"/>
</properties>
</component>
<component id="39165" class="com.intellij.ui.components.JBLabel">
<constraints>
<grid row="5" column="1" row-span="1" col-span="1" vsize-policy="0" hsize-policy="0" anchor="8" fill="0" indent="0" use-parent-layout="false"/>
</constraints>
<properties>
<text value="&lt;html&gt;E.g. files starting with &lt;code&gt;FEFF&lt;/code&gt;"/>
</properties>
</component>
</children>
</grid>
</form>

View File

@@ -0,0 +1,228 @@
/*
* Copyright 2000-2017 JetBrains s.r.o.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.intellij.codeInspection;
import com.intellij.lang.injection.InjectedLanguageManager;
import com.intellij.lang.properties.charset.Native2AsciiCharset;
import com.intellij.openapi.fileEditor.impl.LoadTextUtil;
import com.intellij.openapi.util.TextRange;
import com.intellij.openapi.vfs.CharsetToolkit;
import com.intellij.openapi.vfs.VirtualFile;
import com.intellij.psi.*;
import com.intellij.ui.components.JBCheckBox;
import com.intellij.util.ReflectionUtil;
import com.intellij.util.io.IOUtil;
import gnu.trove.THashMap;
import org.jetbrains.annotations.Nls;
import org.jetbrains.annotations.NonNls;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import javax.swing.*;
import java.nio.charset.Charset;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
public class NonAsciiCharactersInspection extends LocalInspectionTool {
public boolean CHECK_FOR_NOT_ASCII_IDENTIFIER_NAME = true;
public boolean CHECK_FOR_NOT_ASCII_STRING_LITERAL;
public boolean CHECK_FOR_NOT_ASCII_COMMENT;
public boolean CHECK_FOR_DIFFERENT_LANGUAGES_IN_IDENTIFIER_NAME = true;
public boolean CHECK_FOR_FILES_CONTAINING_BOM;
private JBCheckBox myASCIIIdentifiers;
private JBCheckBox myASCIIComments;
private JBCheckBox myASCIIStringLiterals;
private JBCheckBox myAlienIdentifiers;
private JPanel myPanel;
private JBCheckBox myFilesContainingBOM;
private final Map<JCheckBox, String> myBindings = new THashMap<>();
public NonAsciiCharactersInspection() {
bind(myASCIIIdentifiers, "CHECK_FOR_NOT_ASCII_IDENTIFIER_NAME");
bind(myASCIIStringLiterals, "CHECK_FOR_NOT_ASCII_STRING_LITERAL");
bind(myASCIIComments, "CHECK_FOR_NOT_ASCII_COMMENT");
bind(myAlienIdentifiers, "CHECK_FOR_DIFFERENT_LANGUAGES_IN_IDENTIFIER_NAME");
bind(myFilesContainingBOM, "CHECK_FOR_FILES_CONTAINING_BOM");
}
@Override
@Nls
@NotNull
public String getGroupDisplayName() {
return InspectionsBundle.message("group.names.internationalization.issues");
}
@Override
@Nls
@NotNull
public String getDisplayName() {
return InspectionsBundle.message("non.ascii.characters");
}
@Override
@NonNls
@NotNull
public String getShortName() {
return "NonAsciiCharacters";
}
@NotNull
@Override
public PsiElementVisitor buildVisitor(@NotNull ProblemsHolder holder,
boolean isOnTheFly,
@NotNull LocalInspectionToolSession session) {
if (!isFileWorthIt(session.getFile())) return PsiElementVisitor.EMPTY_VISITOR;
return new PsiElementVisitor() {
@Override
public void visitElement(PsiElement element) {
if (CHECK_FOR_NOT_ASCII_IDENTIFIER_NAME || CHECK_FOR_DIFFERENT_LANGUAGES_IN_IDENTIFIER_NAME) {
PsiElement parent = element.getParent();
if (parent instanceof PsiNameIdentifierOwner && ((PsiNameIdentifierOwner)parent).getNameIdentifier() == element) {
String text = element.getText();
if (CHECK_FOR_NOT_ASCII_IDENTIFIER_NAME) {
checkAscii(element, text, holder, "an identifier");
}
if (CHECK_FOR_DIFFERENT_LANGUAGES_IN_IDENTIFIER_NAME) {
checkSameLanguage(element, text, holder);
}
}
}
if (CHECK_FOR_NOT_ASCII_COMMENT) {
if (element instanceof PsiComment) {
checkAsciiRange(element, element.getText(), holder, "a comment");
}
}
if (CHECK_FOR_NOT_ASCII_STRING_LITERAL) {
if (element instanceof PsiLiteralValue) {
checkAsciiRange(element, element.getText(), holder, "a string literal");
}
}
}
@Override
public void visitFile(PsiFile file) {
super.visitFile(file);
if (CHECK_FOR_FILES_CONTAINING_BOM) {
VirtualFile virtualFile = file.getVirtualFile();
byte[] bom = virtualFile == null ? null : virtualFile.getBOM();
if (bom != null) {
String hex = IntStream.range(0, bom.length)
.map(i -> bom[i])
.mapToObj(b -> Integer.toString(b & 0x00ff, 16).toUpperCase())
.collect(Collectors.joining());
Charset charsetFromBOM = CharsetToolkit.guessFromBOM(bom);
holder.registerProblem(file, "File contains BOM: '" + hex +"'"+
(charsetFromBOM == null ? "" : " (charset '"+charsetFromBOM.displayName()+"' signature)"),
ProblemHighlightType.GENERIC_ERROR_OR_WARNING);
}
}
}
};
}
private static boolean isFileWorthIt(PsiFile file) {
if (InjectedLanguageManager.getInstance(file.getProject()).isInjectedFragment(file)) return false;
VirtualFile virtualFile = file.getVirtualFile();
if (virtualFile == null) return false;
CharSequence text = file.getViewProvider().getContents();
Charset charset = LoadTextUtil.extractCharsetFromFileContent(file.getProject(), virtualFile, text);
// no sense in checking transparently decoded file: all characters there are already safely encoded
return !(charset instanceof Native2AsciiCharset);
}
private static void checkSameLanguage(PsiElement element,
String text,
ProblemsHolder holder) {
Set<Character.UnicodeScript> scripts = text.codePoints()
.mapToObj(Character.UnicodeScript::of)
.filter(script -> !script.equals(Character.UnicodeScript.COMMON))
.collect(Collectors.toSet());
if (scripts.size() > 1) {
List<Character.UnicodeScript> list = new ArrayList<>(scripts);
Collections.sort(list); // a little bit of stability
holder.registerProblem(element, "Identifier contains symbols from different languages: " + list,
ProblemHighlightType.GENERIC_ERROR_OR_WARNING);
}
}
private static void checkAscii(PsiElement element,
String text,
ProblemsHolder holder,
String where) {
if (!IOUtil.isAscii(text)) {
holder.registerProblem(element, "Non-ASCII characters in " + where, ProblemHighlightType.GENERIC_ERROR_OR_WARNING);
}
}
private static void checkAsciiRange(PsiElement element,
String text,
ProblemsHolder holder,
String where) {
int errorCount = 0;
int start = -1;
for (int i = 0; i <= text.length(); i++) {
char c = i >= text.length() ? 0 : text.charAt(i);
if (i == text.length() || c<128) {
if (start != -1) {
TextRange range = new TextRange(start, i);
String message = "Non-ASCII characters in " + where;
holder.registerProblem(element, range, message);
start = -1;
//do not report too many errors
if (errorCount++ > 200) break;
}
}
else if (start == -1) {
start = i;
}
}
}
@Nullable
@Override
public JComponent createOptionsPanel() {
reset();
return myPanel;
}
private void bind(JCheckBox checkBox, String property) {
myBindings.put(checkBox, property);
reset(checkBox, property);
checkBox.addChangeListener(__ -> {
boolean selected = checkBox.isSelected();
ReflectionUtil.setField(getClass(), this, boolean.class, property, selected);
});
}
private void reset(JCheckBox checkBox, String property) {
checkBox.setSelected(ReflectionUtil.getField(getClass(), this, boolean.class, property));
}
private void reset() {
for (Map.Entry<JCheckBox, String> entry : myBindings.entrySet()) {
JCheckBox checkBox = entry.getKey();
String property = entry.getValue();
reset(checkBox, property);
}
}
}

View File

@@ -0,0 +1,18 @@
<html>
<body>
This inspection warns you of the code accidentally written in the wrong encoding.<br>
For example, <br>
<ul>
<li>Non-ASCII characters used in the names/strings/comments or</li>
<li>Identifiers written in different languages
(e.g. when in the variable "<code>my<span color="red">C</span>ollection</code>"
the letter <code><span color="red">C</span></code> written in Cyrillic) or</li>
<li>Unicode symbols used in comments or strings
(e.g. long dashes and arrows in the comment
"<code>// a <span color="red"></span> !a <span color="red"></span> please note</code>")</li>
</ul>
</body>
</html>

View File

@@ -714,6 +714,7 @@ cleanup.in.scope=Cleanup code on...
severities.default.settings.message=Edit Settings|Colors \\& Fonts
unsupported.character.for.the.charset=Unsupported characters for the charset ''{0}''
lossy.encoding=Lossy encoding
non.ascii.characters=Non-ASCII characters
inspection.duplicated.code.display.name=Duplicated Code
inconsistent.line.separators=Inconsistent line separators
inspection.unused.symbol.public.method.parameters=Check parameters in &Non-private methods

View File

@@ -831,6 +831,9 @@
<localInspection shortName="LossyEncoding" bundle="messages.InspectionsBundle" key="lossy.encoding"
groupKey="group.names.internationalization.issues" enabledByDefault="true" level="WARNING"
implementationClass="com.intellij.codeInspection.LossyEncodingInspection"/>
<localInspection shortName="NonAsciiCharacters" bundle="messages.InspectionsBundle" key="non.ascii.characters"
groupKey="group.names.internationalization.issues" enabledByDefault="true" level="WARNING"
implementationClass="com.intellij.codeInspection.NonAsciiCharactersInspection"/>
<localInspection shortName="ProblematicWhitespace" bundle="messages.InspectionsBundle" key="problematic.whitespace.display.name"
groupKey="inspection.general.tools.group.name" enabledByDefault="false" level="WARNING"
implementationClass="com.intellij.codeInspection.ProblematicWhitespaceInspection"/>