Unicode highlighting moved to annotator, added a bit of tests; LanguageLevel linked to FutureFeature.

This commit is contained in:
Dmitry Cheryasov
2011-05-06 18:09:17 +04:00
parent dca0b1a7ad
commit 93b2cd0508
8 changed files with 195 additions and 28 deletions

View File

@@ -6,10 +6,8 @@ import com.intellij.openapi.editor.HighlighterColors;
import com.intellij.openapi.editor.colors.TextAttributesKey;
import com.intellij.openapi.editor.markup.TextAttributes;
import com.intellij.openapi.fileTypes.SyntaxHighlighterBase;
import com.intellij.psi.StringEscapesTokenTypes;
import com.intellij.psi.tree.IElementType;
import com.jetbrains.python.PyTokenTypes;
import com.jetbrains.python.lexer.PyStringLiteralLexer;
import com.jetbrains.python.lexer.PythonHighlightingLexer;
import com.jetbrains.python.psi.LanguageLevel;
import org.jetbrains.annotations.NotNull;
@@ -29,12 +27,7 @@ public class PyHighlighter extends SyntaxHighlighterBase {
@NotNull
public Lexer getHighlightingLexer() {
LayeredLexer ret = new LayeredLexer(new PythonHighlightingLexer(myLanguageLevel));
ret.registerSelfStoppingLayer(
new PyStringLiteralLexer(PyTokenTypes.STRING_LITERAL, myLanguageLevel.isPy3K()),
new IElementType[]{PyTokenTypes.STRING_LITERAL}, IElementType.EMPTY_ARRAY
);
return ret;
return new LayeredLexer(new PythonHighlightingLexer(myLanguageLevel));
}
private static TextAttributesKey _copy(String name, TextAttributesKey src) {
@@ -95,7 +88,10 @@ public class PyHighlighter extends SyntaxHighlighterBase {
public static final TextAttributesKey PY_INVALID_STRING_ESCAPE = _copy("PY.INVALID_STRING_ESCAPE", INVALID_STRING_ESCAPE);
/**
* The 'heavy' constructor that initializes everything. PySyntaxHighlighterFactory caches such instances per level.
* @param languageLevel
*/
public PyHighlighter(LanguageLevel languageLevel) {
myLanguageLevel = languageLevel;
keys = new HashMap<IElementType, TextAttributesKey>();
@@ -122,10 +118,6 @@ public class PyHighlighter extends SyntaxHighlighterBase {
keys.put(PyTokenTypes.END_OF_LINE_COMMENT, PY_LINE_COMMENT);
keys.put(PyTokenTypes.BAD_CHARACTER, HighlighterColors.BAD_CHARACTER);
keys.put(StringEscapesTokenTypes.VALID_STRING_ESCAPE_TOKEN, PY_VALID_STRING_ESCAPE);
keys.put(StringEscapesTokenTypes.INVALID_CHARACTER_ESCAPE_TOKEN, PY_INVALID_STRING_ESCAPE);
keys.put(StringEscapesTokenTypes.INVALID_UNICODE_ESCAPE_TOKEN, PY_INVALID_STRING_ESCAPE);
}
@NotNull

View File

@@ -5,7 +5,6 @@ import com.intellij.openapi.fileTypes.SyntaxHighlighterFactory;
import com.intellij.openapi.project.Project;
import com.intellij.openapi.vfs.VirtualFile;
import com.intellij.util.containers.FactoryMap;
import com.jetbrains.python.highlighting.PyHighlighter;
import com.jetbrains.python.psi.LanguageLevel;
import org.jetbrains.annotations.NotNull;

View File

@@ -2,11 +2,15 @@ package com.jetbrains.python.inspections;
import com.intellij.codeInspection.ProblemsHolder;
import com.intellij.psi.PsiElementVisitor;
import com.intellij.psi.PsiFile;
import com.jetbrains.python.PyBundle;
import com.jetbrains.python.psi.PyFile;
import com.jetbrains.python.psi.PyStringLiteralExpression;
import org.jetbrains.annotations.Nls;
import org.jetbrains.annotations.NotNull;
import static com.jetbrains.python.psi.FutureFeature.UNICODE_LITERALS;
/**
* @author Alexey.Ivanov
*/
@@ -32,7 +36,16 @@ public class PyByteLiteralInspection extends PyInspection {
@Override
public void visitPyStringLiteralExpression(PyStringLiteralExpression node) {
if (Character.toLowerCase(node.getText().charAt(0)) == 'b') {
PsiFile file = node.getContainingFile(); // can't cache this in the instance, alas
boolean default_bytes = false;
if (file instanceof PyFile) {
PyFile pyfile = (PyFile)file;
default_bytes = (!UNICODE_LITERALS.requiredAt(pyfile.getLanguageLevel()) &&
!pyfile.hasImportFromFuture(UNICODE_LITERALS)
);
}
char first_char = Character.toLowerCase(node.getText().charAt(0));
if (first_char == 'b' || (default_bytes && first_char != 'u')) {
String value = node.getStringValue();
int length = value.length();
for (int i = 0; i < length; ++i) {

View File

@@ -20,8 +20,8 @@ public enum FutureFeature {
;
// TODO: link it to LanguageLevel
private final String myName;
private final int myProposed;
private final int myIncluded;
private final int myOptionalVersion;
private final int myRequiredVersion;
/**
* @param name what is imported from __future__
@@ -30,21 +30,46 @@ public enum FutureFeature {
*/
FutureFeature(final @NotNull String name, final int proposed, final int included) {
myName = name;
myProposed = proposed;
myIncluded = included;
myOptionalVersion = proposed;
myRequiredVersion = included;
}
/**
* @return the Python importable name of the feature.
*/
@Override
public String toString() {
return myName;
}
public int getProposedVersion() {
return myProposed;
/**
* @return Version since which it is possible to import the feature from __future__
*/
public int getOptionalVersion() {
return myOptionalVersion;
}
public int getIncludedVersion() {
return myIncluded;
/**
* @return Version since which the feature is built into the language (required from the language).
*/
public int getRequiredVersion() {
return myRequiredVersion;
}
/**
* @param level
* @return true iff the feature can either be imported from __future__ at given level, or is already built-in.
*/
public boolean availableAt(LanguageLevel level) {
return level.getVersion() >= myOptionalVersion;
}
/**
* @param level
* @return true iff the feature is already present (required) at given level, and there's no need to import it.
*/
public boolean requiredAt(LanguageLevel level) {
return level.getVersion() >= myRequiredVersion;
}
public static final FutureFeature[] ALL = {

View File

@@ -25,11 +25,11 @@ public enum LanguageLevel {
}
private final int myVersion;
private final boolean myHasWithStatement;
private final boolean myHasPrintStatement;
private final boolean mySupportsSetLiterals;
private final boolean myIsPy3K;
LanguageLevel(int version, boolean hasWithStatement, boolean hasPrintStatement, boolean supportsSetLiterals, boolean isPy3K) {
myVersion = version;
myHasWithStatement = hasWithStatement;
@@ -38,6 +38,13 @@ public enum LanguageLevel {
myIsPy3K = isPy3K;
}
/**
* @return an int where major and minor version are represented decimally: "version 2.5" is 25.
*/
public int getVersion() {
return myVersion;
}
public boolean hasWithStatement() {
return myHasWithStatement;
}

View File

@@ -1,6 +1,8 @@
package com.jetbrains.python.validation;
import com.intellij.ide.util.treeView.smartTree.TreeElement;
import com.intellij.lang.ASTNode;
import com.intellij.openapi.util.TextRange;
import com.intellij.psi.PsiElement;
import com.intellij.psi.PsiFile;
import com.jetbrains.python.highlighting.PyHighlighter;
@@ -11,6 +13,8 @@ import com.jetbrains.python.psi.PyStringLiteralExpression;
import org.jetbrains.annotations.NotNull;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Marks string literals as byte or Unicode.
@@ -22,6 +26,9 @@ public class UnicodeOrByteLiteralAnnotator extends PyAnnotator {
private LanguageLevel myLanguageLevel = null;
private Boolean myUnicodeImported = null;
private static final Pattern N_ESC_PATTERN = Pattern.compile("N\\{([A-Za-z][A-Za-z_ 0-9]*\\}?)"); // N{whatever; $1 ends with '}' in correct case.
private static final String ALLOWED_ESCAPES = "\nabfnNrtuUvx\\01234567"; // chars allowed after backslash
private boolean isDefaultUnicode(@NotNull PsiElement node) {
boolean ret;
if (myLanguageLevel == null) {
@@ -48,14 +55,103 @@ public class UnicodeOrByteLiteralAnnotator extends PyAnnotator {
public void visitPyStringLiteralExpression(PyStringLiteralExpression expr) {
List<ASTNode> literal_nodes = expr.getStringNodes();
for (ASTNode node : literal_nodes) {
CharSequence chars = node.getChars();
if (chars.length() > 0) {
char first_char = Character.toLowerCase(chars.charAt(0));
int start = node.getStartOffset();
CharSequence text = node.getChars();
int length = text.length();
if (length > 0) {
char first_char = Character.toLowerCase(text.charAt(0));
boolean is_unicode = isDefaultUnicode(expr);
is_unicode |= (first_char == 'u');
is_unicode &= (first_char != 'b');
if (is_unicode) getHolder().createInfoAnnotation(node, null).setTextAttributes(PyHighlighter.PY_UNICODE_STRING);
if (is_unicode) {
getHolder().createInfoAnnotation(node, null).setTextAttributes(PyHighlighter.PY_UNICODE_STRING);
}
// highlight escapes
Matcher n_matcher = N_ESC_PATTERN.matcher(text);
int pos = 0;
while(pos < length) {
// find a backslash
while (pos < length && text.charAt(pos) != '\\') pos += 1;
if (pos < length) {
if (pos < length-1) {
// pos is where the backslash is
char escaped_char = text.charAt(pos + 1);
if (ALLOWED_ESCAPES.indexOf(escaped_char) >= 0) {
if (escaped_char == 'x') {
int span = 4; // 4 = len("\\xNN")
checkHexEscape(start, text, length, pos, span);
}
else if (is_unicode && escaped_char == 'u') {
int span = 6; // 6 = len("\\uNNNN")
checkHexEscape(start, text, length, pos, span);
}
else if (is_unicode && escaped_char == 'U') {
int span = 10; // 10 = len("\\Unnnnnnnnn")
checkHexEscape(start, text, length, pos, span);
}
else if (is_unicode && escaped_char == 'N') {
if (n_matcher.find(pos+1)) {
if (n_matcher.group(1).endsWith("}")) markAsValidEscape(start + pos, start + n_matcher.end(1));
else markAsInvalidEscape(start + pos, start + n_matcher.end(1));
}
else markAsInvalidEscape(start + pos, start + pos + 2); // 3 is len("\\N")
}
else if (escaped_char >= '0' && escaped_char <= '7') {
int span = 4; // 4 = len("\\ooo")
if (pos < length-span) {
int end_pos = pos+span;
if (allOctal(text, pos + 2, end_pos)) markAsValidEscape(start+pos, start+end_pos);
else markAsInvalidEscape(start+pos, start+end_pos); // XXX: too much! e.g. \7 fails
}
}
else { // plain 1-char escape, unless it's Unicode-specific in byte-mode
if (is_unicode || "UuN".indexOf(escaped_char) < 0)
markAsValidEscape(start + pos, start+pos+2);
}
} // else: a non-interpreted sequente like \Q: not an error, just don't highlight
}
// else: lone backslash at EOL, we ignore it
}
pos += 1;
}
}
}
}
private void checkHexEscape(int start, CharSequence text, int length, int pos, int span) {
if (pos < length-span) {
int end_pos = pos+span;
if (allHex(text, pos+2, end_pos)) markAsValidEscape(start+pos, start+end_pos);
else markAsInvalidEscape(start+pos, start+end_pos);
}
else markAsInvalidEscape(start+pos, start+length-1);
}
private static boolean allOctal(CharSequence text, int start, int end) {
for (int i=start; i<end; i+=1) {
char c = text.charAt(i);
if (c < '0' || c > '7') return false;
}
return true;
}
private static boolean allHex(CharSequence text, int start, int end) {
for (int i=start; i<end; i+=1) {
if (! isHexDigit(text.charAt(i))) return false;
}
return true;
}
private static boolean isHexDigit(char c) {
return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
}
private void markAsValidEscape(int start, int end) {
getHolder().createInfoAnnotation(new TextRange(start, end), null).setTextAttributes(PyHighlighter.PY_VALID_STRING_ESCAPE);
}
private void markAsInvalidEscape(int start, int end) {
getHolder().createErrorAnnotation(new TextRange(start, end), "Invalid escape sequence").setTextAttributes(PyHighlighter.PY_INVALID_STRING_ESCAPE);
}
}

View File

@@ -0,0 +1,28 @@
_ = "no doc comments in this file"
z = (
"simple"
"escaped \u1234 correct"
"escaped \u123z incorrect"
"escaped \U12345678 correct"
"escaped \U1234567 incorrect"
"hex <info descr="null">\x12</info> correct"
"hex <error descr="Invalid escape sequence">\x1z</error> incorrect"
"named \N{comma} correct"
"named \N{123} incorrect"
"named \N{foo, incorrect"
"named incomplete \N{aa"
#"lone backslash \"
)
z = b"hex <info descr="null">\x12</info> correct"
z = b"hex <info descr="null">\x12</info>3 correct"
z = b"hex <error descr="Invalid escape sequence">\x1z</error> incorrect"
z = b"hex incomplete<error descr="Invalid escape sequence">\x</error>"
z = b"hex incomplete<error descr="Invalid escape sequence">\x1</error>"
z = b"one char <info descr="null">\n</info> correct"
z = b"one char \Q ignored"
z = b"octal <info descr="null">\007</info> correct"
#z = b"octal \27 correct"
#z = b"octal \7 correct"
z = b"octal <error descr="Invalid escape sequence">\008</error> incorrect"
z = b"non-octal \986 ignored"

View File

@@ -106,9 +106,11 @@ public class PythonHighlightingTest extends PyLightFixtureTestCase {
doTest(false, false);
}
/*
public void testStringEscapedOK() {
doTest();
}
*/
public void testStringMixedSeparatorsOK() { // PY-299
doTest();
@@ -164,6 +166,11 @@ public class PythonHighlightingTest extends PyLightFixtureTestCase {
doTest();
}
public void testUnicodeOrByte25() {
doTest(LanguageLevel.PYTHON25, true, true);
}
// ---
private void doTest(final LanguageLevel languageLevel, final boolean checkWarnings, final boolean checkInfos) {
PythonLanguageLevelPusher.setForcedLanguageLevel(myFixture.getProject(), languageLevel);
try {