From b7fbbb9858d1a956d7b87fcc7f3ed9007d2653d8 Mon Sep 17 00:00:00 2001 From: Ivan Posti Date: Thu, 5 May 2022 16:37:49 +0300 Subject: [PATCH] [Spellchecker] Update Grazie platform dependencies GitOrigin-RevId: 1f20d38ddc84ee50fe813b6d6c5e8dd6bb8df2fc --- spellchecker/intellij.spellchecker.iml | 137 ++++++++++++++---- .../grazie/GrazieSpellCheckerEngine.kt | 57 ++++++-- .../grazie/dictionary/SimpleWordList.kt | 7 +- .../grazie/dictionary/WordListAdapter.kt | 6 +- .../hunspell/HunspellDictionary.kt | 13 +- 5 files changed, 165 insertions(+), 55 deletions(-) diff --git a/spellchecker/intellij.spellchecker.iml b/spellchecker/intellij.spellchecker.iml index 8bc3748d2dd3..05bfa94d66d4 100644 --- a/spellchecker/intellij.spellchecker.iml +++ b/spellchecker/intellij.spellchecker.iml @@ -22,50 +22,142 @@ - - + + - + - + - - + + - + - + - - + + - + + + + + + + + + + + - + - - + + - + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -83,19 +175,8 @@ - - - - - - - - - - - - + \ No newline at end of file diff --git a/spellchecker/src/com/intellij/spellchecker/grazie/GrazieSpellCheckerEngine.kt b/spellchecker/src/com/intellij/spellchecker/grazie/GrazieSpellCheckerEngine.kt index 19e5d84109d7..50985ed2d131 100644 --- a/spellchecker/src/com/intellij/spellchecker/grazie/GrazieSpellCheckerEngine.kt +++ b/spellchecker/src/com/intellij/spellchecker/grazie/GrazieSpellCheckerEngine.kt @@ -1,10 +1,18 @@ -// Copyright 2000-2020 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license that can be found in the LICENSE file. +// Copyright 2000-2022 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license. package com.intellij.spellchecker.grazie +import ai.grazie.nlp.langs.alphabet.Alphabet +import ai.grazie.nlp.phonetics.metaphone.DoubleMetaphone import ai.grazie.spell.GrazieSpeller import ai.grazie.spell.GrazieSplittingSpeller -import ai.grazie.spell.language.English +import ai.grazie.spell.dictionary.RuleDictionary +import ai.grazie.spell.dictionary.rule.IgnoreRuleDictionary +import ai.grazie.spell.lists.hunspell.HunspellWordList +import ai.grazie.spell.suggestion.filter.feature.RadiusSuggestionFilter +import ai.grazie.spell.suggestion.ranker.* import ai.grazie.spell.utils.DictionaryResources +import ai.grazie.utils.mpp.FromResourcesDataLoader +import ai.grazie.utils.mpp.Resources import com.intellij.openapi.progress.ProgressManager import com.intellij.openapi.project.Project import com.intellij.openapi.util.io.FileUtil @@ -18,7 +26,7 @@ import com.intellij.spellchecker.grazie.async.WordListLoader import com.intellij.spellchecker.grazie.dictionary.ExtendedWordListWithFrequency import com.intellij.spellchecker.grazie.dictionary.WordListAdapter import com.intellij.util.containers.SLRUCache -import org.apache.lucene.analysis.hunspell.TimeoutPolicy +import kotlinx.coroutines.runBlocking internal class GrazieSpellCheckerEngine(project: Project) : SpellCheckerEngine { override fun getTransformation(): Transformation = Transformation() @@ -29,20 +37,43 @@ internal class GrazieSpellCheckerEngine(project: Project) : SpellCheckerEngine { private val mySpeller: GrazieAsyncSpeller = GrazieAsyncSpeller(project) { GrazieSplittingSpeller( - GrazieSpeller( - GrazieSpeller.UserConfig( - GrazieSpeller.UserConfig.Dictionary( - dictionary = ExtendedWordListWithFrequency( - DictionaryResources.getHunspellDict("/dictionary/en", TimeoutPolicy.NO_TIMEOUT) { ProgressManager.checkCanceled() }, - adapter), - isAlien = { word -> English.isAlien(word) && adapter.isAlien(word) } - ) - ) - ), + GrazieSpeller(createSpellerConfig()), GrazieSplittingSpeller.UserConfig() ) } + private fun createSpellerConfig(): GrazieSpeller.UserConfig { + val path = "/dictionary/en" + val wordList = ExtendedWordListWithFrequency( + HunspellWordList.create( + Resources.text("$path.aff"), + Resources.text("$path.dic"), + checkCanceled = { ProgressManager.checkCanceled() } + ), + adapter + ) + val dictionary = GrazieSpeller.UserConfig.Dictionary( + dictionary = wordList, + rules = RuleDictionary.Aggregated( + IgnoreRuleDictionary.standard(tooShortLength = 2), + runBlocking { DictionaryResources.getReplacingRules("/rule/en", FromResourcesDataLoader) } + ), + isAlien = { !Alphabet.ENGLISH.matchAny(it) && adapter.isAlien(it) } + ) + return GrazieSpeller.UserConfig( + dictionary, + model = GrazieSpeller.UserConfig.Model( + filter = RadiusSuggestionFilter(0.05), + ranker = LinearAggregatingSuggestionRanker( + JaroWinklerSuggestionRanker() to 0.43, + LevenshteinSuggestionRanker() to 0.20, + PhoneticSuggestionRanker(DoubleMetaphone()) to 0.11, + FrequencySuggestionRanker(wordList) to 0.23 + ) + ) + ) + } + private data class SuggestionsRequest(val word: String, val maxSuggestions: Int) private val suggestionsCache = SLRUCache.create>(1024, 1024) { request -> mySpeller.suggest(request.word, request.maxSuggestions).take(request.maxSuggestions) diff --git a/spellchecker/src/com/intellij/spellchecker/grazie/dictionary/SimpleWordList.kt b/spellchecker/src/com/intellij/spellchecker/grazie/dictionary/SimpleWordList.kt index c53f21f827ab..50fa05d726af 100644 --- a/spellchecker/src/com/intellij/spellchecker/grazie/dictionary/SimpleWordList.kt +++ b/spellchecker/src/com/intellij/spellchecker/grazie/dictionary/SimpleWordList.kt @@ -1,10 +1,9 @@ -// Copyright 2000-2021 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license that can be found in the LICENSE file. +// Copyright 2000-2022 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license. package com.intellij.spellchecker.grazie.dictionary +import ai.grazie.nlp.similarity.Levenshtein import ai.grazie.spell.lists.WordList -import ai.grazie.spell.utils.Distances import com.intellij.util.containers.CollectionFactory -import kotlin.collections.LinkedHashSet class SimpleWordList(private val container: Set) : WordList { companion object { @@ -18,6 +17,6 @@ class SimpleWordList(private val container: Set) : WordList { } override fun suggest(word: String) = container.filterTo(LinkedHashSet()) { - Distances.levenshtein.distance(it, word, MAX_LEVENSHTEIN_DISTANCE + 1) <= MAX_LEVENSHTEIN_DISTANCE + Levenshtein.distance(it, word, MAX_LEVENSHTEIN_DISTANCE + 1) <= MAX_LEVENSHTEIN_DISTANCE } } diff --git a/spellchecker/src/com/intellij/spellchecker/grazie/dictionary/WordListAdapter.kt b/spellchecker/src/com/intellij/spellchecker/grazie/dictionary/WordListAdapter.kt index f2690e4e97e3..fbc361ba4a67 100644 --- a/spellchecker/src/com/intellij/spellchecker/grazie/dictionary/WordListAdapter.kt +++ b/spellchecker/src/com/intellij/spellchecker/grazie/dictionary/WordListAdapter.kt @@ -1,8 +1,8 @@ -// Copyright 2000-2020 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license that can be found in the LICENSE file. +// Copyright 2000-2022 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license. package com.intellij.spellchecker.grazie.dictionary +import ai.grazie.nlp.similarity.Levenshtein import ai.grazie.spell.lists.WordList -import ai.grazie.spell.utils.Distances internal class WordListAdapter : WordList, EditableWordListAdapter() { fun isAlien(word: String): Boolean { @@ -26,7 +26,7 @@ internal class WordListAdapter : WordList, EditableWordListAdapter() { val result = LinkedHashSet() for (dictionary in dictionaries.values) { dictionary.consumeSuggestions(word) { - val distance = Distances.levenshtein.distance(word, it, SimpleWordList.MAX_LEVENSHTEIN_DISTANCE + 1) + val distance = Levenshtein.distance(word, it, SimpleWordList.MAX_LEVENSHTEIN_DISTANCE + 1) if (distance <= SimpleWordList.MAX_LEVENSHTEIN_DISTANCE) { result.add(it) } diff --git a/spellchecker/src/com/intellij/spellchecker/hunspell/HunspellDictionary.kt b/spellchecker/src/com/intellij/spellchecker/hunspell/HunspellDictionary.kt index 05ede1776694..91b5ab527bbb 100644 --- a/spellchecker/src/com/intellij/spellchecker/hunspell/HunspellDictionary.kt +++ b/spellchecker/src/com/intellij/spellchecker/hunspell/HunspellDictionary.kt @@ -1,4 +1,4 @@ -// Copyright 2000-2021 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license that can be found in the LICENSE file. +// Copyright 2000-2022 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license. package com.intellij.spellchecker.hunspell import ai.grazie.spell.lists.hunspell.HunspellWordList @@ -7,7 +7,6 @@ import com.intellij.openapi.util.io.FileUtilRt import com.intellij.openapi.vfs.VfsUtil.findFileByIoFile import com.intellij.spellchecker.dictionary.Dictionary import com.intellij.util.Consumer -import org.apache.lucene.analysis.hunspell.TimeoutPolicy import java.io.File import java.io.FileNotFoundException import java.io.InputStreamReader @@ -40,11 +39,11 @@ class HunspellDictionary(path: String, name: String? = null) : Dictionary { val bundle = loadHunspellBundle(path) if (bundle !== null) { - bundle.dic.inputStream().use { dic -> - bundle.aff.inputStream().use { aff -> - this.dict = HunspellWordList(aff, dic, TimeoutPolicy.NO_TIMEOUT) { ProgressManager.checkCanceled() } - } - } + this.dict = HunspellWordList( + bundle.aff.path, + bundle.dic.path, + checkCanceled = { ProgressManager.checkCanceled() } + ) val file = findFileByIoFile(bundle.dic, true)!! InputStreamReader(file.inputStream, file.charset).use { reader ->