Unicode highlighting moved to annotator, added a bit of tests; LanguageLevel linked to FutureFeature.

2026-01-06 03:21:12 +07:00 · 2011-05-06 18:09:17 +04:00
parent dca0b1a7ad
commit 93b2cd0508
8 changed files with 195 additions and 28 deletions
--- a/python/src/com/jetbrains/python/highlighting/PyHighlighter.java
+++ b/python/src/com/jetbrains/python/highlighting/PyHighlighter.java
@@ -6,10 +6,8 @@ import com.intellij.openapi.editor.HighlighterColors;
 import com.intellij.openapi.editor.colors.TextAttributesKey;
 import com.intellij.openapi.editor.markup.TextAttributes;
 import com.intellij.openapi.fileTypes.SyntaxHighlighterBase;
-import com.intellij.psi.StringEscapesTokenTypes;
 import com.intellij.psi.tree.IElementType;
 import com.jetbrains.python.PyTokenTypes;
-import com.jetbrains.python.lexer.PyStringLiteralLexer;
 import com.jetbrains.python.lexer.PythonHighlightingLexer;
 import com.jetbrains.python.psi.LanguageLevel;
 import org.jetbrains.annotations.NotNull;
@@ -29,12 +27,7 @@ public class PyHighlighter extends SyntaxHighlighterBase {

  @NotNull
  public Lexer getHighlightingLexer() {
-    LayeredLexer ret = new LayeredLexer(new PythonHighlightingLexer(myLanguageLevel));
-    ret.registerSelfStoppingLayer(
-      new PyStringLiteralLexer(PyTokenTypes.STRING_LITERAL, myLanguageLevel.isPy3K()),
-      new IElementType[]{PyTokenTypes.STRING_LITERAL}, IElementType.EMPTY_ARRAY
-    );
-    return ret;
+    return new LayeredLexer(new PythonHighlightingLexer(myLanguageLevel));
  }

  private static TextAttributesKey _copy(String name, TextAttributesKey src) {
@@ -95,7 +88,10 @@ public class PyHighlighter extends SyntaxHighlighterBase {

  public static final TextAttributesKey PY_INVALID_STRING_ESCAPE = _copy("PY.INVALID_STRING_ESCAPE", INVALID_STRING_ESCAPE);

-
+  /**
+   * The 'heavy' constructor that initializes everything. PySyntaxHighlighterFactory caches such instances per level.
+   * @param languageLevel
+   */
  public PyHighlighter(LanguageLevel languageLevel) {
    myLanguageLevel = languageLevel;
    keys = new HashMap<IElementType, TextAttributesKey>();
@@ -122,10 +118,6 @@ public class PyHighlighter extends SyntaxHighlighterBase {

    keys.put(PyTokenTypes.END_OF_LINE_COMMENT, PY_LINE_COMMENT);
    keys.put(PyTokenTypes.BAD_CHARACTER, HighlighterColors.BAD_CHARACTER);
-
-    keys.put(StringEscapesTokenTypes.VALID_STRING_ESCAPE_TOKEN, PY_VALID_STRING_ESCAPE);
-    keys.put(StringEscapesTokenTypes.INVALID_CHARACTER_ESCAPE_TOKEN, PY_INVALID_STRING_ESCAPE);
-    keys.put(StringEscapesTokenTypes.INVALID_UNICODE_ESCAPE_TOKEN, PY_INVALID_STRING_ESCAPE);
  }

  @NotNull
--- a/python/src/com/jetbrains/python/highlighting/PySyntaxHighlighterFactory.java
+++ b/python/src/com/jetbrains/python/highlighting/PySyntaxHighlighterFactory.java
@@ -5,7 +5,6 @@ import com.intellij.openapi.fileTypes.SyntaxHighlighterFactory;
 import com.intellij.openapi.project.Project;
 import com.intellij.openapi.vfs.VirtualFile;
 import com.intellij.util.containers.FactoryMap;
-import com.jetbrains.python.highlighting.PyHighlighter;
 import com.jetbrains.python.psi.LanguageLevel;
 import org.jetbrains.annotations.NotNull;

--- a/python/src/com/jetbrains/python/inspections/PyByteLiteralInspection.java
+++ b/python/src/com/jetbrains/python/inspections/PyByteLiteralInspection.java
@@ -2,11 +2,15 @@ package com.jetbrains.python.inspections;

 import com.intellij.codeInspection.ProblemsHolder;
 import com.intellij.psi.PsiElementVisitor;
+import com.intellij.psi.PsiFile;
 import com.jetbrains.python.PyBundle;
+import com.jetbrains.python.psi.PyFile;
 import com.jetbrains.python.psi.PyStringLiteralExpression;
 import org.jetbrains.annotations.Nls;
 import org.jetbrains.annotations.NotNull;

+import static com.jetbrains.python.psi.FutureFeature.UNICODE_LITERALS;
+
 /**
 * @author Alexey.Ivanov
 */
@@ -32,7 +36,16 @@ public class PyByteLiteralInspection extends PyInspection {

    @Override
    public void visitPyStringLiteralExpression(PyStringLiteralExpression node) {
-      if (Character.toLowerCase(node.getText().charAt(0)) == 'b') {
+      PsiFile file = node.getContainingFile(); // can't cache this in the instance, alas
+      boolean default_bytes = false;
+      if (file instanceof PyFile) {
+        PyFile pyfile = (PyFile)file;
+        default_bytes = (!UNICODE_LITERALS.requiredAt(pyfile.getLanguageLevel()) &&
+                         !pyfile.hasImportFromFuture(UNICODE_LITERALS)
+        );
+      }
+      char first_char = Character.toLowerCase(node.getText().charAt(0));
+      if (first_char == 'b' || (default_bytes && first_char != 'u')) {
        String value = node.getStringValue();
        int length = value.length();
        for (int i = 0; i < length; ++i) {
--- a/python/src/com/jetbrains/python/psi/FutureFeature.java
+++ b/python/src/com/jetbrains/python/psi/FutureFeature.java
@@ -20,8 +20,8 @@ public enum FutureFeature {
  ;
  // TODO: link it to LanguageLevel
  private final String myName;
-  private final int myProposed;
-  private final int myIncluded;
+  private final int myOptionalVersion;
+  private final int myRequiredVersion;

  /**
   * @param name what is imported from __future__
@@ -30,21 +30,46 @@ public enum FutureFeature {
   */
  FutureFeature(final @NotNull String name, final int proposed, final int included) {
    myName = name;
-    myProposed = proposed;
-    myIncluded = included;
+    myOptionalVersion = proposed;
+    myRequiredVersion = included;
  }

+  /**
+   * @return the Python importable name of the feature.
+   */
  @Override
  public String toString() {
    return myName;
  }

-  public int getProposedVersion() {
-    return myProposed;
+  /**
+   * @return Version since which it is possible to import the feature from __future__
+   */
+  public int getOptionalVersion() {
+    return myOptionalVersion;
  }

-  public int getIncludedVersion() {
-    return myIncluded;
+  /**
+   * @return Version since which the feature is built into the language (required from the language).
+   */
+  public int getRequiredVersion() {
+    return myRequiredVersion;
+  }
+
+  /**
+   * @param level
+   * @return true iff the feature can either be imported from __future__ at given level, or is already built-in.
+   */
+  public boolean availableAt(LanguageLevel level) {
+    return level.getVersion() >= myOptionalVersion;
+  }
+
+  /**
+   * @param level
+   * @return true iff the feature is already present (required) at given level, and there's no need to import it.
+   */
+  public boolean requiredAt(LanguageLevel level) {
+    return level.getVersion() >= myRequiredVersion;
  }

  public static final FutureFeature[] ALL = {
--- a/python/src/com/jetbrains/python/psi/LanguageLevel.java
+++ b/python/src/com/jetbrains/python/psi/LanguageLevel.java
@@ -25,11 +25,11 @@ public enum LanguageLevel {
  }

  private final int myVersion;
+
  private final boolean myHasWithStatement;
  private final boolean myHasPrintStatement;
  private final boolean mySupportsSetLiterals;
  private final boolean myIsPy3K;
-
  LanguageLevel(int version, boolean hasWithStatement, boolean hasPrintStatement, boolean supportsSetLiterals, boolean isPy3K) {
    myVersion = version;
    myHasWithStatement = hasWithStatement;
@@ -38,6 +38,13 @@ public enum LanguageLevel {
    myIsPy3K = isPy3K;
  }

+  /**
+   * @return an int where major and minor version are represented decimally: "version 2.5" is 25.
+   */
+  public int getVersion() {
+    return myVersion;
+  }
+
  public boolean hasWithStatement() {
    return myHasWithStatement;
  }
--- a/python/src/com/jetbrains/python/validation/UnicodeOrByteLiteralAnnotator.java
+++ b/python/src/com/jetbrains/python/validation/UnicodeOrByteLiteralAnnotator.java
@@ -1,6 +1,8 @@
 package com.jetbrains.python.validation;

+import com.intellij.ide.util.treeView.smartTree.TreeElement;
 import com.intellij.lang.ASTNode;
+import com.intellij.openapi.util.TextRange;
 import com.intellij.psi.PsiElement;
 import com.intellij.psi.PsiFile;
 import com.jetbrains.python.highlighting.PyHighlighter;
@@ -11,6 +13,8 @@ import com.jetbrains.python.psi.PyStringLiteralExpression;
 import org.jetbrains.annotations.NotNull;

 import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;

 /**
 * Marks string literals as byte or Unicode.
@@ -22,6 +26,9 @@ public class UnicodeOrByteLiteralAnnotator extends PyAnnotator {
  private LanguageLevel myLanguageLevel = null;
  private Boolean myUnicodeImported = null;

+  private static final Pattern N_ESC_PATTERN = Pattern.compile("N\\{([A-Za-z][A-Za-z_ 0-9]*\\}?)"); // N{whatever; $1 ends with '}' in correct case.
+  private static final String ALLOWED_ESCAPES = "\nabfnNrtuUvx\\01234567"; // chars allowed after backslash
+
  private boolean isDefaultUnicode(@NotNull PsiElement node) {
    boolean ret;
    if (myLanguageLevel == null) {
@@ -48,14 +55,103 @@ public class UnicodeOrByteLiteralAnnotator extends PyAnnotator {
  public void visitPyStringLiteralExpression(PyStringLiteralExpression expr) {
    List<ASTNode> literal_nodes = expr.getStringNodes();
    for (ASTNode node : literal_nodes) {
-      CharSequence chars = node.getChars();
-      if (chars.length() > 0) {
-        char first_char = Character.toLowerCase(chars.charAt(0));
+      int start = node.getStartOffset();
+      CharSequence text = node.getChars();
+      int length = text.length();
+      if (length > 0) {
+        char first_char = Character.toLowerCase(text.charAt(0));
        boolean is_unicode = isDefaultUnicode(expr);
        is_unicode |= (first_char == 'u');
        is_unicode &= (first_char != 'b');
-        if (is_unicode) getHolder().createInfoAnnotation(node, null).setTextAttributes(PyHighlighter.PY_UNICODE_STRING);
+        if (is_unicode) {
+          getHolder().createInfoAnnotation(node, null).setTextAttributes(PyHighlighter.PY_UNICODE_STRING);
+        }
+        // highlight escapes
+        Matcher n_matcher = N_ESC_PATTERN.matcher(text);
+        int pos = 0;
+        while(pos < length) {
+          // find a backslash
+          while (pos < length && text.charAt(pos) != '\\') pos += 1;
+          if (pos < length) {
+            if (pos < length-1) {
+              // pos is where the backslash is
+              char escaped_char = text.charAt(pos + 1);
+              if (ALLOWED_ESCAPES.indexOf(escaped_char) >= 0) {
+                if (escaped_char == 'x') {
+                  int span = 4; // 4 = len("\\xNN")
+                  checkHexEscape(start, text, length, pos, span);
+                }
+                else if (is_unicode && escaped_char == 'u') {
+                  int span = 6; // 6 = len("\\uNNNN")
+                  checkHexEscape(start, text, length, pos, span);
+                }
+                else if (is_unicode && escaped_char == 'U') {
+                  int span = 10; // 10 = len("\\Unnnnnnnnn")
+                  checkHexEscape(start, text, length, pos, span);
+                }
+                else if (is_unicode && escaped_char == 'N') {
+                  if (n_matcher.find(pos+1)) {
+                    if (n_matcher.group(1).endsWith("}")) markAsValidEscape(start + pos, start + n_matcher.end(1));
+                    else markAsInvalidEscape(start + pos, start + n_matcher.end(1));
+                  }
+                  else markAsInvalidEscape(start + pos, start + pos + 2); // 3 is len("\\N")
+                }
+                else if (escaped_char >= '0' && escaped_char <= '7') {
+                  int span = 4; // 4 = len("\\ooo")
+                  if (pos < length-span) {
+                    int end_pos = pos+span;
+                    if (allOctal(text, pos + 2, end_pos)) markAsValidEscape(start+pos, start+end_pos);
+                    else markAsInvalidEscape(start+pos, start+end_pos); // XXX: too much! e.g. \7 fails
+                  }
+                }
+                else { // plain 1-char escape, unless it's Unicode-specific in byte-mode
+                  if (is_unicode || "UuN".indexOf(escaped_char) < 0)
+                  markAsValidEscape(start + pos, start+pos+2);
+                }
+              } // else: a non-interpreted sequente like \Q: not an error, just don't highlight
+            }
+            // else: lone backslash at EOL, we ignore it
+          }
+          pos += 1;
+        }
+
      }
    }
  }
+
+  private void checkHexEscape(int start, CharSequence text, int length, int pos, int span) {
+    if (pos < length-span) {
+      int end_pos = pos+span;
+      if (allHex(text, pos+2, end_pos)) markAsValidEscape(start+pos, start+end_pos);
+      else markAsInvalidEscape(start+pos, start+end_pos);
+    }
+    else markAsInvalidEscape(start+pos, start+length-1);
+  }
+
+  private static boolean allOctal(CharSequence text, int start, int end) {
+    for (int i=start; i<end; i+=1) {
+      char c = text.charAt(i);
+      if (c < '0' || c > '7') return false;
+    }
+    return true;
+  }
+
+  private static boolean allHex(CharSequence text, int start, int end) {
+    for (int i=start; i<end; i+=1) {
+      if (! isHexDigit(text.charAt(i))) return false;
+    }
+    return true;
+  }
+
+  private static boolean isHexDigit(char c) {
+    return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
+  }
+
+  private void markAsValidEscape(int start, int end) {
+    getHolder().createInfoAnnotation(new TextRange(start, end), null).setTextAttributes(PyHighlighter.PY_VALID_STRING_ESCAPE);
+  }
+
+  private void markAsInvalidEscape(int start, int end) {
+    getHolder().createErrorAnnotation(new TextRange(start, end), "Invalid escape sequence").setTextAttributes(PyHighlighter.PY_INVALID_STRING_ESCAPE);
+  }
 }
--- a/python/testData/highlighting/unicodeOrByte25.py
+++ b/python/testData/highlighting/unicodeOrByte25.py
@@ -0,0 +1,28 @@
+_ = "no doc comments in this file"
+
+z = (
+    "simple"
+    "escaped \u1234 correct"
+    "escaped \u123z incorrect"
+    "escaped \U12345678 correct"
+    "escaped \U1234567 incorrect"
+    "hex <info descr="null">\x12</info> correct"
+    "hex <error descr="Invalid escape sequence">\x1z</error> incorrect"
+    "named \N{comma} correct"
+    "named \N{123} incorrect"
+    "named \N{foo, incorrect"
+    "named incomplete \N{aa"
+    #"lone backslash \"
+)
+z = b"hex <info descr="null">\x12</info> correct"
+z = b"hex <info descr="null">\x12</info>3 correct"
+z = b"hex <error descr="Invalid escape sequence">\x1z</error> incorrect"
+z = b"hex incomplete<error descr="Invalid escape sequence">\x</error>"
+z = b"hex incomplete<error descr="Invalid escape sequence">\x1</error>"
+z = b"one char <info descr="null">\n</info> correct"
+z = b"one char \Q ignored"
+z = b"octal <info descr="null">\007</info> correct"
+#z = b"octal \27 correct"
+#z = b"octal \7 correct"
+z = b"octal <error descr="Invalid escape sequence">\008</error> incorrect"
+z = b"non-octal \986 ignored"
--- a/python/testSrc/com/jetbrains/python/PythonHighlightingTest.java
+++ b/python/testSrc/com/jetbrains/python/PythonHighlightingTest.java
@@ -106,9 +106,11 @@ public class PythonHighlightingTest extends PyLightFixtureTestCase {
    doTest(false, false);
  }

+  /*
  public void testStringEscapedOK() {
    doTest();
  }
+  */

  public void testStringMixedSeparatorsOK() {   // PY-299
    doTest();
@@ -164,6 +166,11 @@ public class PythonHighlightingTest extends PyLightFixtureTestCase {
    doTest();
  }

+  public void testUnicodeOrByte25() {
+    doTest(LanguageLevel.PYTHON25, true, true);
+  }
+
+  // ---
  private void doTest(final LanguageLevel languageLevel, final boolean checkWarnings, final boolean checkInfos) {
    PythonLanguageLevelPusher.setForcedLanguageLevel(myFixture.getProject(), languageLevel);
    try {