[java-lexer] IDEA-356870 support '\u001a' at the end of the file

GitOrigin-RevId: 6c1ee14c81f071f1c24a435a84370fbf0dc94a73
This commit is contained in:
Mikhail Pyltsin
2025-05-09 14:48:15 +02:00
committed by intellij-monorepo-bot
parent b1cfbfa235
commit 599f689ba4
2 changed files with 37 additions and 4 deletions

View File

@@ -94,6 +94,16 @@ class JavaLexer(level: LanguageLevel) : Lexer {
val c = locateCharAt(myBufferIndex)
when (c) {
//allowed only at the end of files
'\u001a' -> {
if (myBufferIndex + mySymbolLength == myBufferEndOffset) {
myTokenType = WHITE_SPACE
myTokenEndOffset = myBufferIndex + mySymbolLength
}
else {
flexLocateToken()
}
}
' ', '\t', '\n', '\r', '\u000C' -> {
myTokenType = WHITE_SPACE
myTokenEndOffset = getWhitespaces(myBufferIndex + mySymbolLength)
@@ -209,17 +219,20 @@ class JavaLexer(level: LanguageLevel) : Lexer {
}
private fun getWhitespaces(offset: Int): Int {
return getChars(offset, " \t\n\r\u000c")
return getChars(offset, " \t\n\r\u000c", '\u001a')
}
private fun getSimpleWhitespaces(offset: Int): Int {
return getChars(offset, " \t")
return getChars(offset, " \t", null)
}
/**
* @param offset the offset to start.
* @param charsToDetect the chars to detect.
* @param endChar the char which is applied only at the end of the file, or `null` if no end sequence is needed.
* @return The new position if none of the chars were detected
*/
private fun getChars(offset: Int, charsToDetect: CharSequence): Int {
private fun getChars(offset: Int, charsToDetect: CharSequence, endChar: Char?): Int {
var pos = offset
while (pos < myBufferEndOffset) {
var detected = false
@@ -231,7 +244,12 @@ class JavaLexer(level: LanguageLevel) : Lexer {
break
}
}
if (!detected && endChar != null && pos + mySymbolLength == myBufferEndOffset) {
if (endChar == c) {
pos += mySymbolLength
break
}
}
if (!detected) break
}