[Spellchecker] Update Grazie platform dependencies

GitOrigin-RevId: 1f20d38ddc84ee50fe813b6d6c5e8dd6bb8df2fc
This commit is contained in:
Ivan Posti
2022-05-05 16:37:49 +03:00
committed by intellij-monorepo-bot
parent 9c9c165223
commit b7fbbb9858
5 changed files with 165 additions and 55 deletions

View File

@@ -22,50 +22,142 @@
<orderEntry type="library" name="Guava" level="project" />
<orderEntry type="module" module-name="intellij.platform.statistics" />
<orderEntry type="module-library">
<library name="ai.grazie.utils:utils-common" type="repository">
<properties include-transitive-deps="false" maven-id="ai.grazie.utils:utils-common:0.1.50-1" />
<library name="ai.grazie.utils.common" type="repository">
<properties include-transitive-deps="false" maven-id="ai.grazie.utils:utils-common-jvm:0.2.27" />
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/ai/grazie/utils/utils-common/0.1.50-1/utils-common-0.1.50-1.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/ai/grazie/utils/utils-common-jvm/0.2.27/utils-common-jvm-0.2.27.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/ai/grazie/utils/utils-common/0.1.50-1/utils-common-0.1.50-1-sources.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/ai/grazie/utils/utils-common-jvm/0.2.27/utils-common-jvm-0.2.27-sources.jar!/" />
</SOURCES>
</library>
</orderEntry>
<orderEntry type="module-library">
<library name="ai.grazie.nlp:nlp-common" type="repository">
<properties include-transitive-deps="false" maven-id="ai.grazie.nlp:nlp-common:0.1.50-1" />
<library name="ai.grazie.nlp.common" type="repository">
<properties include-transitive-deps="false" maven-id="ai.grazie.nlp:nlp-common-jvm:0.2.27" />
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/ai/grazie/nlp/nlp-common/0.1.50-1/nlp-common-0.1.50-1.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/ai/grazie/nlp/nlp-common-jvm/0.2.27/nlp-common-jvm-0.2.27.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/ai/grazie/nlp/nlp-common/0.1.50-1/nlp-common-0.1.50-1-sources.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/ai/grazie/nlp/nlp-common-jvm/0.2.27/nlp-common-jvm-0.2.27-sources.jar!/" />
</SOURCES>
</library>
</orderEntry>
<orderEntry type="module-library">
<library name="ai.grazie.nlp:nlp-tokenizer" type="repository">
<properties include-transitive-deps="false" maven-id="ai.grazie.nlp:nlp-tokenizer:0.1.50-1" />
<library name="ai.grazie.nlp-tokenizer" type="repository">
<properties include-transitive-deps="false" maven-id="ai.grazie.nlp:nlp-tokenizer-jvm:0.2.27" />
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/ai/grazie/nlp/nlp-tokenizer/0.1.50-1/nlp-tokenizer-0.1.50-1.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/ai/grazie/nlp/nlp-tokenizer-jvm/0.2.27/nlp-tokenizer-jvm-0.2.27.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</orderEntry>
<orderEntry type="module-library">
<library name="ai.grazie.spell.gec.local.engine" type="repository">
<properties include-transitive-deps="false" maven-id="ai.grazie.spell:gec-spell-local-engine-jvm:0.2.27" />
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/ai/grazie/spell/gec-spell-local-engine-jvm/0.2.27/gec-spell-local-engine-jvm-0.2.27.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/ai/grazie/nlp/nlp-tokenizer/0.1.50-1/nlp-tokenizer-0.1.50-1-sources.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/ai/grazie/spell/gec-spell-local-engine-jvm/0.2.27/gec-spell-local-engine-jvm-0.2.27-sources.jar!/" />
</SOURCES>
</library>
</orderEntry>
<orderEntry type="module-library">
<library name="ai.grazie.spell:hunspell-en" type="repository">
<properties include-transitive-deps="false" maven-id="ai.grazie.spell:hunspell-en:0.1.7-1" />
<library name="ai.grazie.nlp.langs" type="repository">
<properties include-transitive-deps="false" maven-id="ai.grazie.nlp:nlp-langs-jvm:0.2.27" />
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/ai/grazie/spell/hunspell-en/0.1.7-1/hunspell-en-0.1.7-1.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/ai/grazie/nlp/nlp-langs-jvm/0.2.27/nlp-langs-jvm-0.2.27.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/ai/grazie/spell/hunspell-en/0.1.7-1/hunspell-en-0.1.7-1-sources.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/ai/grazie/nlp/nlp-langs-jvm/0.2.27/nlp-langs-jvm-0.2.27-sources.jar!/" />
</SOURCES>
</library>
</orderEntry>
<orderEntry type="module-library">
<library name="ai.grazie.nlp.similarity" type="repository">
<properties include-transitive-deps="false" maven-id="ai.grazie.nlp:nlp-similarity-jvm:0.2.27" />
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/ai/grazie/nlp/nlp-similarity-jvm/0.2.27/nlp-similarity-jvm-0.2.27.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/ai/grazie/nlp/nlp-similarity-jvm/0.2.27/nlp-similarity-jvm-0.2.27-sources.jar!/" />
</SOURCES>
</library>
</orderEntry>
<orderEntry type="module-library">
<library name="ai.grazie.nlp.patterns" type="repository">
<properties include-transitive-deps="false" maven-id="ai.grazie.nlp:nlp-patterns-jvm:0.2.27" />
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/ai/grazie/nlp/nlp-patterns-jvm/0.2.27/nlp-patterns-jvm-0.2.27.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/ai/grazie/nlp/nlp-patterns-jvm/0.2.27/nlp-patterns-jvm-0.2.27-sources.jar!/" />
</SOURCES>
</library>
</orderEntry>
<orderEntry type="module-library">
<library name="ai.grazie.nlp.phonetics" type="repository">
<properties include-transitive-deps="false" maven-id="ai.grazie.nlp:nlp-phonetics-jvm:0.2.27" />
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/ai/grazie/nlp/nlp-phonetics-jvm/0.2.27/nlp-phonetics-jvm-0.2.27.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</orderEntry>
<orderEntry type="module-library">
<library name="ai.grazie.spell.hunspell.en" type="repository">
<properties include-transitive-deps="false" maven-id="ai.grazie.spell:hunspell-en-jvm:0.2.11" />
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/ai/grazie/spell/hunspell-en-jvm/0.2.11/hunspell-en-jvm-0.2.11.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/ai/grazie/spell/hunspell-en-jvm/0.2.11/hunspell-en-jvm-0.2.11-sources.jar!/" />
</SOURCES>
</library>
</orderEntry>
<orderEntry type="module-library">
<library name="ai.grazie.nlp.stemmer" type="repository">
<properties include-transitive-deps="false" maven-id="ai.grazie.nlp:nlp-stemmer-jvm:0.2.27" />
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/ai/grazie/nlp/nlp-stemmer-jvm/0.2.27/nlp-stemmer-jvm-0.2.27.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/ai/grazie/nlp/nlp-stemmer-jvm/0.2.27/nlp-stemmer-jvm-0.2.27-sources.jar!/" />
</SOURCES>
</library>
</orderEntry>
<orderEntry type="module-library">
<library name="ai.grazie.model.gec" type="repository">
<properties include-transitive-deps="false" maven-id="ai.grazie.model:model-gec-jvm:0.2.27" />
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/ai/grazie/model/model-gec-jvm/0.2.27/model-gec-jvm-0.2.27.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/ai/grazie/model/model-gec-jvm/0.2.27/model-gec-jvm-0.2.27-sources.jar!/" />
</SOURCES>
</library>
</orderEntry>
<orderEntry type="module-library">
<library name="ai.grazie.model.text" type="repository">
<properties include-transitive-deps="false" maven-id="ai.grazie.model:model-text-jvm:0.2.27" />
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/ai/grazie/model/model-text-jvm/0.2.27/model-text-jvm-0.2.27.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/ai/grazie/model/model-text-jvm/0.2.27/model-text-jvm-0.2.27-sources.jar!/" />
</SOURCES>
</library>
</orderEntry>
@@ -83,19 +175,8 @@
</orderEntry>
<orderEntry type="library" name="lucene-core" level="project" />
<orderEntry type="library" name="lucene-analyzers-common" level="project" />
<orderEntry type="module-library">
<library name="ai.grazie.spell:gec-spell-local-engine" type="repository">
<properties include-transitive-deps="false" maven-id="ai.grazie.spell:gec-spell-local-engine:0.1.50-1" />
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/ai/grazie/spell/gec-spell-local-engine/0.1.50-1/gec-spell-local-engine-0.1.50-1.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/ai/grazie/spell/gec-spell-local-engine/0.1.50-1/gec-spell-local-engine-0.1.50-1-sources.jar!/" />
</SOURCES>
</library>
</orderEntry>
<orderEntry type="module" module-name="intellij.platform.core.ui" />
<orderEntry type="module" module-name="intellij.platform.util.jdom" />
<orderEntry type="library" name="kotlinx-coroutines-jdk8" level="project" />
</component>
</module>

View File

@@ -1,10 +1,18 @@
// Copyright 2000-2020 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license that can be found in the LICENSE file.
// Copyright 2000-2022 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license.
package com.intellij.spellchecker.grazie
import ai.grazie.nlp.langs.alphabet.Alphabet
import ai.grazie.nlp.phonetics.metaphone.DoubleMetaphone
import ai.grazie.spell.GrazieSpeller
import ai.grazie.spell.GrazieSplittingSpeller
import ai.grazie.spell.language.English
import ai.grazie.spell.dictionary.RuleDictionary
import ai.grazie.spell.dictionary.rule.IgnoreRuleDictionary
import ai.grazie.spell.lists.hunspell.HunspellWordList
import ai.grazie.spell.suggestion.filter.feature.RadiusSuggestionFilter
import ai.grazie.spell.suggestion.ranker.*
import ai.grazie.spell.utils.DictionaryResources
import ai.grazie.utils.mpp.FromResourcesDataLoader
import ai.grazie.utils.mpp.Resources
import com.intellij.openapi.progress.ProgressManager
import com.intellij.openapi.project.Project
import com.intellij.openapi.util.io.FileUtil
@@ -18,7 +26,7 @@ import com.intellij.spellchecker.grazie.async.WordListLoader
import com.intellij.spellchecker.grazie.dictionary.ExtendedWordListWithFrequency
import com.intellij.spellchecker.grazie.dictionary.WordListAdapter
import com.intellij.util.containers.SLRUCache
import org.apache.lucene.analysis.hunspell.TimeoutPolicy
import kotlinx.coroutines.runBlocking
internal class GrazieSpellCheckerEngine(project: Project) : SpellCheckerEngine {
override fun getTransformation(): Transformation = Transformation()
@@ -29,20 +37,43 @@ internal class GrazieSpellCheckerEngine(project: Project) : SpellCheckerEngine {
private val mySpeller: GrazieAsyncSpeller = GrazieAsyncSpeller(project) {
GrazieSplittingSpeller(
GrazieSpeller(
GrazieSpeller.UserConfig(
GrazieSpeller.UserConfig.Dictionary(
dictionary = ExtendedWordListWithFrequency(
DictionaryResources.getHunspellDict("/dictionary/en", TimeoutPolicy.NO_TIMEOUT) { ProgressManager.checkCanceled() },
adapter),
isAlien = { word -> English.isAlien(word) && adapter.isAlien(word) }
)
)
),
GrazieSpeller(createSpellerConfig()),
GrazieSplittingSpeller.UserConfig()
)
}
private fun createSpellerConfig(): GrazieSpeller.UserConfig {
val path = "/dictionary/en"
val wordList = ExtendedWordListWithFrequency(
HunspellWordList.create(
Resources.text("$path.aff"),
Resources.text("$path.dic"),
checkCanceled = { ProgressManager.checkCanceled() }
),
adapter
)
val dictionary = GrazieSpeller.UserConfig.Dictionary(
dictionary = wordList,
rules = RuleDictionary.Aggregated(
IgnoreRuleDictionary.standard(tooShortLength = 2),
runBlocking { DictionaryResources.getReplacingRules("/rule/en", FromResourcesDataLoader) }
),
isAlien = { !Alphabet.ENGLISH.matchAny(it) && adapter.isAlien(it) }
)
return GrazieSpeller.UserConfig(
dictionary,
model = GrazieSpeller.UserConfig.Model(
filter = RadiusSuggestionFilter(0.05),
ranker = LinearAggregatingSuggestionRanker(
JaroWinklerSuggestionRanker() to 0.43,
LevenshteinSuggestionRanker() to 0.20,
PhoneticSuggestionRanker(DoubleMetaphone()) to 0.11,
FrequencySuggestionRanker(wordList) to 0.23
)
)
)
}
private data class SuggestionsRequest(val word: String, val maxSuggestions: Int)
private val suggestionsCache = SLRUCache.create<SuggestionsRequest, List<String>>(1024, 1024) { request ->
mySpeller.suggest(request.word, request.maxSuggestions).take(request.maxSuggestions)

View File

@@ -1,10 +1,9 @@
// Copyright 2000-2021 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license that can be found in the LICENSE file.
// Copyright 2000-2022 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license.
package com.intellij.spellchecker.grazie.dictionary
import ai.grazie.nlp.similarity.Levenshtein
import ai.grazie.spell.lists.WordList
import ai.grazie.spell.utils.Distances
import com.intellij.util.containers.CollectionFactory
import kotlin.collections.LinkedHashSet
class SimpleWordList(private val container: Set<String>) : WordList {
companion object {
@@ -18,6 +17,6 @@ class SimpleWordList(private val container: Set<String>) : WordList {
}
override fun suggest(word: String) = container.filterTo(LinkedHashSet()) {
Distances.levenshtein.distance(it, word, MAX_LEVENSHTEIN_DISTANCE + 1) <= MAX_LEVENSHTEIN_DISTANCE
Levenshtein.distance(it, word, MAX_LEVENSHTEIN_DISTANCE + 1) <= MAX_LEVENSHTEIN_DISTANCE
}
}

View File

@@ -1,8 +1,8 @@
// Copyright 2000-2020 JetBrains s.r.o. Use of this source code is governed by the Apache 2.0 license that can be found in the LICENSE file.
// Copyright 2000-2022 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license.
package com.intellij.spellchecker.grazie.dictionary
import ai.grazie.nlp.similarity.Levenshtein
import ai.grazie.spell.lists.WordList
import ai.grazie.spell.utils.Distances
internal class WordListAdapter : WordList, EditableWordListAdapter() {
fun isAlien(word: String): Boolean {
@@ -26,7 +26,7 @@ internal class WordListAdapter : WordList, EditableWordListAdapter() {
val result = LinkedHashSet<String>()
for (dictionary in dictionaries.values) {
dictionary.consumeSuggestions(word) {
val distance = Distances.levenshtein.distance(word, it, SimpleWordList.MAX_LEVENSHTEIN_DISTANCE + 1)
val distance = Levenshtein.distance(word, it, SimpleWordList.MAX_LEVENSHTEIN_DISTANCE + 1)
if (distance <= SimpleWordList.MAX_LEVENSHTEIN_DISTANCE) {
result.add(it)
}

View File

@@ -1,4 +1,4 @@
// Copyright 2000-2021 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license that can be found in the LICENSE file.
// Copyright 2000-2022 JetBrains s.r.o. and contributors. Use of this source code is governed by the Apache 2.0 license.
package com.intellij.spellchecker.hunspell
import ai.grazie.spell.lists.hunspell.HunspellWordList
@@ -7,7 +7,6 @@ import com.intellij.openapi.util.io.FileUtilRt
import com.intellij.openapi.vfs.VfsUtil.findFileByIoFile
import com.intellij.spellchecker.dictionary.Dictionary
import com.intellij.util.Consumer
import org.apache.lucene.analysis.hunspell.TimeoutPolicy
import java.io.File
import java.io.FileNotFoundException
import java.io.InputStreamReader
@@ -40,11 +39,11 @@ class HunspellDictionary(path: String, name: String? = null) : Dictionary {
val bundle = loadHunspellBundle(path)
if (bundle !== null) {
bundle.dic.inputStream().use { dic ->
bundle.aff.inputStream().use { aff ->
this.dict = HunspellWordList(aff, dic, TimeoutPolicy.NO_TIMEOUT) { ProgressManager.checkCanceled() }
}
}
this.dict = HunspellWordList(
bundle.aff.path,
bundle.dic.path,
checkCanceled = { ProgressManager.checkCanceled() }
)
val file = findFileByIoFile(bundle.dic, true)!!
InputStreamReader(file.inputStream, file.charset).use { reader ->