mirror of
https://gitflic.ru/project/openide/openide.git
synced 2026-01-11 20:07:01 +07:00
Merge-request: IJ-MR-116296 Merged-by: Vladimir Koshelev <Vladimir.Koshelev@jetbrains.com> GitOrigin-RevId: e7559fb3215d757e6273543e4aa27d52df755e63
507 lines
16 KiB
Java
507 lines
16 KiB
Java
// Copyright 2000-2023 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license.
|
|
package com.jetbrains.python.lexer;
|
|
|
|
import com.intellij.lexer.FlexAdapter;
|
|
import com.intellij.lexer.FlexLexer;
|
|
import com.intellij.lexer.MergingLexerAdapter;
|
|
import com.intellij.psi.tree.IElementType;
|
|
import com.intellij.psi.tree.TokenSet;
|
|
import com.intellij.util.containers.Stack;
|
|
import com.jetbrains.python.PyTokenTypes;
|
|
import com.jetbrains.python.PythonDialectsTokenSetProvider;
|
|
import com.jetbrains.python.psi.PyStringLiteralUtil;
|
|
import it.unimi.dsi.fastutil.ints.IntArrayList;
|
|
import org.jetbrains.annotations.NotNull;
|
|
import org.jetbrains.annotations.Nullable;
|
|
|
|
import java.util.ArrayList;
|
|
import java.util.List;
|
|
|
|
public class PythonIndentingProcessor extends MergingLexerAdapter {
|
|
@SuppressWarnings("SSBasedInspection")
|
|
protected final IntArrayList myIndentStack = new IntArrayList();
|
|
protected int myBraceLevel;
|
|
protected boolean myLineHasSignificantTokens;
|
|
protected int myLastNewLineIndent = -1;
|
|
private int myCurrentNewLineIndent = 0;
|
|
|
|
protected List<PendingToken> myTokenQueue = new ArrayList<>();
|
|
private int myLineBreakBeforeFirstCommentIndex = -1;
|
|
protected boolean myProcessSpecialTokensPending = false;
|
|
|
|
private final Stack<FString> myFStringStack = new Stack<>();
|
|
|
|
private static final boolean DUMP_TOKENS = false;
|
|
private final TokenSet RECOVERY_TOKENS = PythonDialectsTokenSetProvider.getInstance().getUnbalancedBracesRecoveryTokens();
|
|
|
|
public PythonIndentingProcessor(FlexLexer lexer, TokenSet tokens) {
|
|
super(new FlexAdapter(lexer), tokens);
|
|
}
|
|
|
|
protected static class PendingToken {
|
|
private IElementType _type;
|
|
private final int _start;
|
|
private final int _end;
|
|
|
|
public PendingToken(IElementType type, int start, int end) {
|
|
_type = type;
|
|
_start = start;
|
|
_end = end;
|
|
}
|
|
|
|
public IElementType getType() {
|
|
return _type;
|
|
}
|
|
|
|
public int getStart() {
|
|
return _start;
|
|
}
|
|
|
|
public int getEnd() {
|
|
return _end;
|
|
}
|
|
|
|
public void setType(IElementType type) {
|
|
_type = type;
|
|
}
|
|
|
|
@Override
|
|
public String toString() {
|
|
return _type + ":" + _start + "-" + _end;
|
|
}
|
|
}
|
|
|
|
private static class PendingCommentToken extends PendingToken {
|
|
private final int myIndent;
|
|
|
|
PendingCommentToken(IElementType type, int start, int end, int indent) {
|
|
super(type, start, end);
|
|
myIndent = indent;
|
|
}
|
|
|
|
public int getIndent() {
|
|
return myIndent;
|
|
}
|
|
}
|
|
|
|
@Nullable
|
|
protected IElementType getBaseTokenType() {
|
|
return super.getTokenType();
|
|
}
|
|
|
|
protected int getBaseTokenStart() {
|
|
return super.getTokenStart();
|
|
}
|
|
|
|
protected int getBaseTokenEnd() {
|
|
return super.getTokenEnd();
|
|
}
|
|
|
|
@NotNull
|
|
protected String getBaseTokenText() {
|
|
return getBufferSequence().subSequence(getBaseTokenStart(), getBaseTokenEnd()).toString();
|
|
}
|
|
|
|
private boolean isBaseAt(IElementType tokenType) {
|
|
return getBaseTokenType() == tokenType;
|
|
}
|
|
|
|
@Override
|
|
public IElementType getTokenType() {
|
|
if (myTokenQueue.size() > 0) {
|
|
return myTokenQueue.get(0).getType();
|
|
}
|
|
return super.getTokenType();
|
|
}
|
|
|
|
@Override
|
|
public int getTokenStart() {
|
|
if (myTokenQueue.size() > 0) {
|
|
return myTokenQueue.get(0).getStart();
|
|
}
|
|
return super.getTokenStart();
|
|
}
|
|
|
|
@Override
|
|
public int getTokenEnd() {
|
|
if (myTokenQueue.size() > 0) {
|
|
return myTokenQueue.get(0).getEnd();
|
|
}
|
|
return super.getTokenEnd();
|
|
}
|
|
|
|
@Override
|
|
public void advance() {
|
|
if (getTokenType() == PyTokenTypes.LINE_BREAK) {
|
|
final String text = getTokenText();
|
|
int spaces = 0;
|
|
for (int i = text.length() - 1; i >= 0; i--) {
|
|
if (text.charAt(i) == ' ') {
|
|
spaces++;
|
|
}
|
|
else if (text.charAt(i) == '\t') {
|
|
spaces += 8;
|
|
}
|
|
}
|
|
myCurrentNewLineIndent = spaces;
|
|
}
|
|
else if (getTokenType() == PyTokenTypes.TAB) {
|
|
myCurrentNewLineIndent += 8;
|
|
}
|
|
if (myTokenQueue.size() > 0) {
|
|
myTokenQueue.remove(0);
|
|
if (myProcessSpecialTokensPending) {
|
|
myProcessSpecialTokensPending = false;
|
|
processSpecialTokens();
|
|
}
|
|
}
|
|
else {
|
|
advanceBase();
|
|
processSpecialTokens();
|
|
}
|
|
adjustBraceLevel();
|
|
if (DUMP_TOKENS) {
|
|
if (getTokenType() != null) {
|
|
System.out.print(getTokenStart() + "-" + getTokenEnd() + ":" + getTokenType());
|
|
if (getTokenType() == PyTokenTypes.LINE_BREAK) {
|
|
System.out.println("{" + myBraceLevel + "}");
|
|
}
|
|
else {
|
|
System.out.print(" ");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
protected void advanceBase() {
|
|
super.advance();
|
|
checkSignificantTokens();
|
|
checkFString();
|
|
}
|
|
|
|
private void checkFString() {
|
|
final String tokenText = getBaseTokenText();
|
|
if (isBaseAt(PyTokenTypes.FSTRING_START)) {
|
|
final int prefixLength = PyStringLiteralUtil.getPrefixLength(tokenText);
|
|
final String openingQuotes = tokenText.substring(prefixLength);
|
|
assert !openingQuotes.isEmpty();
|
|
myFStringStack.push(new FString(openingQuotes, new Stack<>()));
|
|
}
|
|
else if (isBaseAt(PyTokenTypes.FSTRING_END)) {
|
|
while (!myFStringStack.isEmpty()) {
|
|
final FString lastFString = myFStringStack.pop();
|
|
if (lastFString.quotes.equals(tokenText)) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
else if (isBaseAt(PyTokenTypes.FSTRING_FRAGMENT_START)) {
|
|
assert !myFStringStack.isEmpty();
|
|
myFStringStack.peek().fragments.push(FStringFragmentPart.EXPRESSION);
|
|
}
|
|
else if (isBaseAt(PyTokenTypes.FSTRING_FRAGMENT_END)) {
|
|
assert !myFStringStack.isEmpty();
|
|
FString topmostFString = myFStringStack.peek();
|
|
assert !topmostFString.fragments.isEmpty();
|
|
topmostFString.fragments.pop();
|
|
}
|
|
else if (isBaseAt(PyTokenTypes.FSTRING_FRAGMENT_FORMAT_START) || isBaseAt(PyTokenTypes.FSTRING_FRAGMENT_TYPE_CONVERSION)) {
|
|
assert !myFStringStack.isEmpty();
|
|
FString topmostFString = myFStringStack.peek();
|
|
assert !topmostFString.fragments.isEmpty();
|
|
topmostFString.fragments.pop();
|
|
topmostFString.fragments.push(FStringFragmentPart.TYPE_CONVERSION_OR_FORMAT);
|
|
}
|
|
}
|
|
|
|
protected void pushToken(IElementType type, int start, int end) {
|
|
myTokenQueue.add(new PendingToken(type, start, end));
|
|
}
|
|
|
|
@Override
|
|
public void start(@NotNull CharSequence buffer, int startOffset, int endOffset, int initialState) {
|
|
checkStartState(startOffset, initialState);
|
|
super.start(buffer, startOffset, endOffset, initialState);
|
|
setStartState();
|
|
}
|
|
|
|
protected void checkStartState(int startOffset, int initialState) {
|
|
if (DUMP_TOKENS) {
|
|
System.out.println("\n--- LEXER START---");
|
|
}
|
|
}
|
|
|
|
private void setStartState() {
|
|
myIndentStack.clear();
|
|
myIndentStack.push(0);
|
|
myBraceLevel = 0;
|
|
adjustBraceLevel();
|
|
myLineHasSignificantTokens = false;
|
|
checkSignificantTokens();
|
|
checkFString();
|
|
if (isBaseAt(PyTokenTypes.SPACE)) {
|
|
processIndent(0, PyTokenTypes.SPACE);
|
|
}
|
|
}
|
|
|
|
private void adjustBraceLevel() {
|
|
boolean insideFStringFragment = !myFStringStack.isEmpty() && !myFStringStack.peek().fragments.isEmpty();
|
|
final IElementType tokenType = getTokenType();
|
|
if (PyTokenTypes.OPEN_BRACES.contains(tokenType)) {
|
|
myBraceLevel++;
|
|
}
|
|
else if (PyTokenTypes.CLOSE_BRACES.contains(tokenType)) {
|
|
myBraceLevel--;
|
|
}
|
|
else if ((myBraceLevel != 0 || insideFStringFragment) && RECOVERY_TOKENS.contains(tokenType)) {
|
|
myBraceLevel = 0;
|
|
if (insideFStringFragment) {
|
|
myFStringStack.clear();
|
|
}
|
|
final int pos = getTokenStart();
|
|
pushToken(PyTokenTypes.STATEMENT_BREAK, pos, pos);
|
|
final int indents = myIndentStack.size();
|
|
for (int i = 0; i < indents - 1; i++) {
|
|
final int indent = myIndentStack.topInt();
|
|
if (myCurrentNewLineIndent >= indent) {
|
|
break;
|
|
}
|
|
if (myIndentStack.size() > 1) {
|
|
myIndentStack.pop();
|
|
pushToken(PyTokenTypes.DEDENT, pos, pos);
|
|
}
|
|
}
|
|
pushToken(PyTokenTypes.LINE_BREAK, pos, pos);
|
|
}
|
|
}
|
|
|
|
protected void checkSignificantTokens() {
|
|
IElementType tokenType = getBaseTokenType();
|
|
if (!PyTokenTypes.WHITESPACE_OR_LINEBREAK.contains(tokenType) && tokenType != getCommentTokenType()) {
|
|
myLineHasSignificantTokens = true;
|
|
}
|
|
}
|
|
|
|
protected void processSpecialTokens() {
|
|
int tokenStart = getBaseTokenStart();
|
|
if (isBaseAt(PyTokenTypes.LINE_BREAK)) {
|
|
processLineBreak(tokenStart);
|
|
if (isBaseAt(getCommentTokenType())) {
|
|
myLineBreakBeforeFirstCommentIndex = myTokenQueue.size() - 1;
|
|
while (isBaseAt(getCommentTokenType())) {
|
|
// comment at start of line; maybe we need to generate dedent before the comments
|
|
final int commentEnd = getBaseTokenEnd();
|
|
myTokenQueue.add(new PendingCommentToken(getBaseTokenType(), getBaseTokenStart(), commentEnd, myLastNewLineIndent));
|
|
advanceBase();
|
|
if (isBaseAt(PyTokenTypes.LINE_BREAK)) {
|
|
processLineBreak(getBaseTokenStart());
|
|
}
|
|
// Treat EOF as an indent of size 0
|
|
else if (getBaseTokenType() == null) {
|
|
closeDanglingSuitesWithComments(0, commentEnd);
|
|
}
|
|
else {
|
|
break;
|
|
}
|
|
}
|
|
myLineBreakBeforeFirstCommentIndex = -1;
|
|
}
|
|
}
|
|
else if (isBaseAt(PyTokenTypes.BACKSLASH)) {
|
|
processBackslash(tokenStart);
|
|
}
|
|
else if (isBaseAt(PyTokenTypes.SPACE)) {
|
|
processSpace();
|
|
}
|
|
}
|
|
|
|
private void processSpace() {
|
|
int start = getBaseTokenStart();
|
|
int end = getBaseTokenEnd();
|
|
while (getBaseTokenType() == PyTokenTypes.SPACE) {
|
|
end = getBaseTokenEnd();
|
|
advanceBase();
|
|
}
|
|
if (getBaseTokenType() == PyTokenTypes.LINE_BREAK) {
|
|
processLineBreak(start);
|
|
}
|
|
else if (getBaseTokenType() == PyTokenTypes.BACKSLASH) {
|
|
processBackslash(start);
|
|
}
|
|
else {
|
|
myTokenQueue.add(new PendingToken(PyTokenTypes.SPACE, start, end));
|
|
}
|
|
}
|
|
|
|
private void processBackslash(int tokenStart) {
|
|
PendingToken backslashToken = new PendingToken(getBaseTokenType(), tokenStart, getBaseTokenEnd());
|
|
myTokenQueue.add(backslashToken);
|
|
advanceBase();
|
|
while (PyTokenTypes.WHITESPACE.contains(getBaseTokenType())) {
|
|
pushCurrentToken();
|
|
advanceBase();
|
|
}
|
|
if (getBaseTokenType() == PyTokenTypes.LINE_BREAK) {
|
|
backslashToken.setType(PyTokenTypes.SPACE);
|
|
processInsignificantLineBreak(getBaseTokenStart(), true);
|
|
}
|
|
myProcessSpecialTokensPending = true;
|
|
}
|
|
|
|
protected void processLineBreak(int startPos) {
|
|
if (myBraceLevel == 0 && isOutsideFStringOrInsideItsLineBreakSensitiveTextPart()) {
|
|
if (myLineHasSignificantTokens) {
|
|
pushToken(PyTokenTypes.STATEMENT_BREAK, startPos, startPos);
|
|
}
|
|
myLineHasSignificantTokens = false;
|
|
advanceBase();
|
|
processIndent(startPos, PyTokenTypes.LINE_BREAK);
|
|
}
|
|
else {
|
|
processInsignificantLineBreak(startPos, false);
|
|
}
|
|
}
|
|
|
|
private boolean isOutsideFStringOrInsideItsLineBreakSensitiveTextPart() {
|
|
if (myFStringStack.isEmpty()) return true;
|
|
FString topmostFString = myFStringStack.peek();
|
|
// In triple-quoted f-strings one can put line breaks in any plain-text part
|
|
if (topmostFString.quotes.length() != 1) return false;
|
|
return topmostFString.fragments.isEmpty() || topmostFString.fragments.peek() == FStringFragmentPart.TYPE_CONVERSION_OR_FORMAT;
|
|
}
|
|
|
|
protected void processInsignificantLineBreak(int startPos,
|
|
boolean breakStatementOnLineBreak) {
|
|
// merge whitespace following the line break character into the
|
|
// line break token
|
|
int end = getBaseTokenEnd();
|
|
advanceBase();
|
|
while (getBaseTokenType() == PyTokenTypes.SPACE || getBaseTokenType() == PyTokenTypes.TAB ||
|
|
(!breakStatementOnLineBreak && getBaseTokenType() == PyTokenTypes.LINE_BREAK)) {
|
|
end = getBaseTokenEnd();
|
|
advanceBase();
|
|
}
|
|
myTokenQueue.add(new PendingToken(PyTokenTypes.LINE_BREAK, startPos, end));
|
|
myProcessSpecialTokensPending = true;
|
|
}
|
|
|
|
protected void processIndent(int whiteSpaceStart, IElementType whitespaceTokenType) {
|
|
int lastIndent = myIndentStack.topInt();
|
|
int indent = getNextLineIndent();
|
|
myLastNewLineIndent = indent;
|
|
// don't generate indent/dedent tokens if a line contains only end-of-line comment and whitespace
|
|
if (getBaseTokenType() == getCommentTokenType()) {
|
|
indent = lastIndent;
|
|
}
|
|
int whiteSpaceEnd = (getBaseTokenType() == null) ? super.getBufferEnd() : getBaseTokenStart();
|
|
if (indent > lastIndent) {
|
|
myIndentStack.push(indent);
|
|
myTokenQueue.add(new PendingToken(whitespaceTokenType, whiteSpaceStart, whiteSpaceEnd));
|
|
int insertIndex = skipPrecedingCommentsWithIndent(indent, myTokenQueue.size() - 1);
|
|
int indentOffset = insertIndex == myTokenQueue.size() ? whiteSpaceEnd : myTokenQueue.get(insertIndex).getStart();
|
|
myTokenQueue.add(insertIndex, new PendingToken(PyTokenTypes.INDENT, indentOffset, indentOffset));
|
|
}
|
|
else if (indent < lastIndent) {
|
|
closeDanglingSuitesWithComments(indent, whiteSpaceStart);
|
|
myTokenQueue.add(new PendingToken(whitespaceTokenType, whiteSpaceStart, whiteSpaceEnd));
|
|
}
|
|
else {
|
|
myTokenQueue.add(new PendingToken(whitespaceTokenType, whiteSpaceStart, whiteSpaceEnd));
|
|
}
|
|
}
|
|
|
|
private void closeDanglingSuitesWithComments(int indent, int whiteSpaceStart) {
|
|
int lastIndent = myIndentStack.topInt();
|
|
|
|
int insertIndex = myLineBreakBeforeFirstCommentIndex == -1 ? myTokenQueue.size() : myLineBreakBeforeFirstCommentIndex;
|
|
int lastSuiteIndent;
|
|
while (indent < lastIndent) {
|
|
lastSuiteIndent = myIndentStack.popInt();
|
|
lastIndent = myIndentStack.topInt();
|
|
int dedentOffset = whiteSpaceStart;
|
|
if (indent > lastIndent) {
|
|
myTokenQueue.add(new PendingToken(PyTokenTypes.INCONSISTENT_DEDENT, whiteSpaceStart, whiteSpaceStart));
|
|
insertIndex = myTokenQueue.size();
|
|
}
|
|
else {
|
|
insertIndex = skipPrecedingCommentsWithSameIndentOnSuiteClose(lastSuiteIndent, insertIndex);
|
|
}
|
|
if (insertIndex != myTokenQueue.size()) {
|
|
dedentOffset = myTokenQueue.get(insertIndex).getStart();
|
|
}
|
|
myTokenQueue.add(insertIndex, new PendingToken(PyTokenTypes.DEDENT, dedentOffset, dedentOffset));
|
|
insertIndex++;
|
|
}
|
|
}
|
|
|
|
protected int skipPrecedingCommentsWithIndent(int indent, int index) {
|
|
// insert the DEDENT before previous comments that have the same indent as the current token indent
|
|
boolean foundComment = false;
|
|
while(index > 0 && myTokenQueue.get(index - 1) instanceof PendingCommentToken commentToken) {
|
|
if (commentToken.getIndent() != indent) {
|
|
break;
|
|
}
|
|
foundComment = true;
|
|
index--;
|
|
if (index > 1 &&
|
|
myTokenQueue.get(index - 1).getType() == PyTokenTypes.LINE_BREAK &&
|
|
myTokenQueue.get(index - 2) instanceof PendingCommentToken) {
|
|
index--;
|
|
}
|
|
}
|
|
return foundComment ? index : myTokenQueue.size();
|
|
}
|
|
|
|
protected int skipPrecedingCommentsWithSameIndentOnSuiteClose(int indent, int anchorIndex) {
|
|
int result = anchorIndex;
|
|
for (int i = anchorIndex; i < myTokenQueue.size(); i++) {
|
|
final PendingToken token = myTokenQueue.get(i);
|
|
if (token instanceof PendingCommentToken) {
|
|
if (((PendingCommentToken)token).getIndent() < indent) {
|
|
break;
|
|
}
|
|
result = i + 1;
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
protected int getNextLineIndent() {
|
|
int indent = 0;
|
|
while (getBaseTokenType() != null && PyTokenTypes.WHITESPACE_OR_LINEBREAK.contains(getBaseTokenType())) {
|
|
if (getBaseTokenType() == PyTokenTypes.TAB) {
|
|
indent = ((indent / 8) + 1) * 8;
|
|
}
|
|
else if (getBaseTokenType() == PyTokenTypes.SPACE) {
|
|
indent++;
|
|
}
|
|
else if (getBaseTokenType() == PyTokenTypes.LINE_BREAK) {
|
|
indent = 0;
|
|
}
|
|
advanceBase();
|
|
}
|
|
if (getBaseTokenType() == null) {
|
|
return 0;
|
|
}
|
|
return indent;
|
|
}
|
|
|
|
private void pushCurrentToken() {
|
|
myTokenQueue.add(new PendingToken(getBaseTokenType(), getBaseTokenStart(), getBaseTokenEnd()));
|
|
}
|
|
|
|
|
|
protected IElementType getCommentTokenType() {
|
|
return PyTokenTypes.END_OF_LINE_COMMENT;
|
|
}
|
|
|
|
private record FString(@NotNull String quotes, @NotNull Stack<FStringFragmentPart> fragments) {
|
|
}
|
|
|
|
private enum FStringFragmentPart {
|
|
EXPRESSION,
|
|
TYPE_CONVERSION_OR_FORMAT,
|
|
}
|
|
|
|
}
|