mirror of
https://gitflic.ru/project/openide/openide.git
synced 2026-01-08 15:09:39 +07:00
[aia] LLM-17290 add external api recall calculation, adapt chat code generation feature to a new report type
GitOrigin-RevId: 96f0f2fa5994a24b61d30a0824f447e35417d121
This commit is contained in:
committed by
intellij-monorepo-bot
parent
e8e0390d34
commit
e2cc44b1ca
@@ -2,6 +2,8 @@ package com.intellij.cce.evaluation.data
|
||||
|
||||
import com.intellij.cce.evaluable.*
|
||||
import com.intellij.cce.metric.*
|
||||
import com.intellij.cce.metric.ExternalApiRecall.Companion.AIA_GROUND_TRUTH_EXTERNAL_API_CALLS
|
||||
import com.intellij.cce.metric.ExternalApiRecall.Companion.AIA_PREDICTED_EXTERNAL_API_CALLS
|
||||
import com.intellij.cce.metric.context.MeanContextLines
|
||||
import com.intellij.cce.metric.context.MeanContextSize
|
||||
|
||||
@@ -194,6 +196,16 @@ object Analysis {
|
||||
),
|
||||
)
|
||||
|
||||
val GROUND_TRUTH_EXTERNAL_API_CALLS: TrivialEvalData<List<String>> = EvalDataDescription(
|
||||
name = "Ground truth external API calls",
|
||||
description = "Bind with the list of initial external API calls",
|
||||
DataPlacement.AdditionalConcatenatedLines(AIA_GROUND_TRUTH_EXTERNAL_API_CALLS),
|
||||
presentation = EvalDataPresentation(
|
||||
PresentationCategory.ANALYSIS,
|
||||
renderer = DataRenderer.Lines,
|
||||
)
|
||||
)
|
||||
|
||||
val PREDICTED_API_CALLS: TrivialEvalData<List<String>> = EvalDataDescription(
|
||||
name = "Predicted internal API calls",
|
||||
description = "Bind with the list of predicted internal API calls",
|
||||
@@ -204,6 +216,16 @@ object Analysis {
|
||||
),
|
||||
)
|
||||
|
||||
val PREDICTED_EXTERNAL_API_CALLS: TrivialEvalData<List<String>> = EvalDataDescription(
|
||||
name = "Predicted external API calls",
|
||||
description = "Bind with the list of predicted external API calls",
|
||||
DataPlacement.AdditionalConcatenatedLines(AIA_PREDICTED_EXTERNAL_API_CALLS),
|
||||
presentation = EvalDataPresentation(
|
||||
PresentationCategory.ANALYSIS,
|
||||
renderer = DataRenderer.Lines,
|
||||
)
|
||||
)
|
||||
|
||||
val FAILED_FILE_VALIDATIONS: TrivialEvalData<List<String>> = EvalDataDescription(
|
||||
name = "Failed file validations",
|
||||
description = "Bind with failed file validations",
|
||||
@@ -330,14 +352,23 @@ object Metrics {
|
||||
dependencies = MetricDependencies(Analysis.ERASED_APIS)
|
||||
) { PreservedApi() }
|
||||
|
||||
val API_RECALL: EvalMetric = EvalMetric(
|
||||
val INTERNAL_API_RECALL: EvalMetric = EvalMetric(
|
||||
threshold = 1.0,
|
||||
dependencies = MetricDependencies(
|
||||
Analysis.GROUND_TRUTH_API_CALLS,
|
||||
Analysis.PREDICTED_API_CALLS,
|
||||
DataRenderer.TextDiff
|
||||
) { initial, result -> TextUpdate(initial.sorted().joinToString("\n"), result.sorted().joinToString("\n")) }
|
||||
) { ApiRecall() }
|
||||
) { InternalApiRecall() }
|
||||
|
||||
val EXTERNAL_API_RECALL: EvalMetric = EvalMetric(
|
||||
threshold = 1.0,
|
||||
dependencies = MetricDependencies(
|
||||
Analysis.GROUND_TRUTH_EXTERNAL_API_CALLS,
|
||||
Analysis.PREDICTED_EXTERNAL_API_CALLS,
|
||||
DataRenderer.TextDiff
|
||||
) { initial, result -> TextUpdate(initial.sorted().joinToString("\n"), result.sorted().joinToString("\n")) }
|
||||
) { ExternalApiRecall() }
|
||||
|
||||
val FILE_VALIDATIONS_SUCCESS: EvalMetric = EvalMetric(
|
||||
threshold = 1.0,
|
||||
|
||||
@@ -6,22 +6,25 @@ import com.intellij.cce.evaluable.AIA_GROUND_TRUTH_INTERNAL_API_CALLS
|
||||
import com.intellij.cce.evaluable.AIA_PREDICTED_API_CALLS
|
||||
import com.intellij.cce.metric.util.Sample
|
||||
|
||||
class ApiRecall : ConfidenceIntervalMetric<Double>() {
|
||||
override val name: String = "API Recall"
|
||||
override val description: String = "The fraction of correctly guessed project-defined API calls"
|
||||
override val showByDefault: Boolean = true
|
||||
override val valueType = MetricValueType.DOUBLE
|
||||
override val value: Double
|
||||
abstract class ApiRecall : ConfidenceIntervalMetric<Double>() {
|
||||
final override val showByDefault: Boolean = true
|
||||
final override val valueType = MetricValueType.DOUBLE
|
||||
final override val value: Double
|
||||
get() = compute(sample)
|
||||
|
||||
override val supportsIndividualScores: Boolean = true
|
||||
|
||||
abstract fun extractPredictedApiCallsFromLookup(lookup: Lookup): List<String>
|
||||
abstract fun extractExpectedApiCallsFromLookup(lookup: Lookup): List<String>
|
||||
|
||||
@Suppress("UNCHECKED_CAST")
|
||||
override fun evaluate(sessions: List<Session>): Number {
|
||||
final override fun evaluate(sessions: List<Session>): Number {
|
||||
val fileSample = Sample()
|
||||
sessions
|
||||
.flatMap { it.lookups }
|
||||
.forEach {
|
||||
val predictedApiCalls = it.additionalList(AIA_PREDICTED_API_CALLS) ?: emptyList()
|
||||
val groundTruthApiCalls = it.additionalList(AIA_GROUND_TRUTH_INTERNAL_API_CALLS) ?: emptyList()
|
||||
val predictedApiCalls = extractPredictedApiCallsFromLookup(it)
|
||||
val groundTruthApiCalls = extractExpectedApiCallsFromLookup(it)
|
||||
val apiRecall = calculateApiRecallForLookupSnippets(predictedApiCalls, groundTruthApiCalls)
|
||||
fileSample.add(apiRecall)
|
||||
coreSample.add(apiRecall)
|
||||
@@ -29,7 +32,7 @@ class ApiRecall : ConfidenceIntervalMetric<Double>() {
|
||||
return fileSample.mean()
|
||||
}
|
||||
|
||||
override fun compute(sample: List<Double>): Double = sample.average()
|
||||
final override fun compute(sample: List<Double>): Double = sample.average()
|
||||
|
||||
private fun calculateApiRecallForLookupSnippets(
|
||||
predictedApiCalls: List<String>,
|
||||
@@ -44,5 +47,36 @@ class ApiRecall : ConfidenceIntervalMetric<Double>() {
|
||||
}
|
||||
}
|
||||
|
||||
class InternalApiRecall : ApiRecall() {
|
||||
override val name: String = "API Recall"
|
||||
override val description: String = "The fraction of correctly guessed project-defined API calls"
|
||||
|
||||
override fun extractPredictedApiCallsFromLookup(lookup: Lookup): List<String> {
|
||||
return lookup.additionalList(AIA_PREDICTED_API_CALLS) ?: emptyList()
|
||||
}
|
||||
|
||||
override fun extractExpectedApiCallsFromLookup(lookup: Lookup): List<String> {
|
||||
return lookup.additionalList(AIA_GROUND_TRUTH_INTERNAL_API_CALLS) ?: emptyList()
|
||||
}
|
||||
}
|
||||
|
||||
class ExternalApiRecall : ApiRecall() {
|
||||
override val name: String = "External API Recall"
|
||||
override val description: String = "The fraction of correctly guessed library-defined API calls"
|
||||
|
||||
companion object {
|
||||
const val AIA_PREDICTED_EXTERNAL_API_CALLS = "external_api_calls"
|
||||
const val AIA_GROUND_TRUTH_EXTERNAL_API_CALLS = "external_api_calls_gt"
|
||||
}
|
||||
|
||||
override fun extractPredictedApiCallsFromLookup(lookup: Lookup): List<String> {
|
||||
return lookup.additionalList(AIA_PREDICTED_EXTERNAL_API_CALLS) ?: emptyList()
|
||||
}
|
||||
|
||||
override fun extractExpectedApiCallsFromLookup(lookup: Lookup): List<String> {
|
||||
return lookup.additionalList(AIA_GROUND_TRUTH_EXTERNAL_API_CALLS) ?: emptyList()
|
||||
}
|
||||
}
|
||||
|
||||
internal fun Lookup.additionalList(key: String): List<String>? =
|
||||
additionalInfo[key]?.let { it as String }?.split("\n")?.filter { it.isNotEmpty() }
|
||||
additionalInfo[key]?.let { it as String }?.split("\n")?.filter { it.isNotEmpty() }
|
||||
|
||||
@@ -112,10 +112,8 @@ fun extractCalledExternalApiMethodsQualifiedNames(psiElement: PsiElement): List<
|
||||
val psiMethodCall = (it as? PsiMethodCallExpression) ?: return@forEach
|
||||
val referenceName = psiMethodCall.methodExpression.referenceName ?: return@forEach
|
||||
val method = it.resolveMethod()
|
||||
if (method != null && (
|
||||
isInternalApiMethod(method, psiElement) ||
|
||||
isFromStandardLibrary(method)
|
||||
)) {
|
||||
if (method != null && (isInternalApiMethod(method, psiElement) ||
|
||||
isFromStandardLibrary(method))) {
|
||||
return@forEach
|
||||
}
|
||||
externalApiMethodsQualifiedNames.add(referenceName)
|
||||
|
||||
@@ -0,0 +1,110 @@
|
||||
package com.intellij.cce.metric
|
||||
|
||||
import com.intellij.cce.core.Lookup
|
||||
import org.junit.jupiter.api.Assertions.assertEquals
|
||||
import org.junit.jupiter.api.Test
|
||||
|
||||
class InternalApiRecallTest {
|
||||
|
||||
@Test
|
||||
fun `extractPredictedApiCallsFromLookup should return list of predicted API calls when present`() {
|
||||
val lookup = Lookup(
|
||||
prefix = "test",
|
||||
offset = 0,
|
||||
suggestions = listOf(),
|
||||
latency = 10L,
|
||||
isNew = false,
|
||||
additionalInfo = mapOf("predicted_api_calls" to "call1\ncall2\ncall3")
|
||||
)
|
||||
val apiRecall = InternalApiRecall()
|
||||
|
||||
val result = apiRecall.extractPredictedApiCallsFromLookup(lookup)
|
||||
|
||||
assertEquals(listOf("call1", "call2", "call3"), result)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `extractPredictedApiCallsFromLookup should return empty list when predicted API calls are absent`() {
|
||||
val lookup = Lookup(
|
||||
prefix = "test",
|
||||
offset = 0,
|
||||
suggestions = listOf(),
|
||||
latency = 10L,
|
||||
isNew = false,
|
||||
additionalInfo = emptyMap()
|
||||
)
|
||||
val apiRecall = InternalApiRecall()
|
||||
|
||||
val result = apiRecall.extractPredictedApiCallsFromLookup(lookup)
|
||||
|
||||
assertEquals(emptyList<String>(), result)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `extractPredictedApiCallsFromLookup should return empty list when predicted API calls are empty`() {
|
||||
val lookup = Lookup(
|
||||
prefix = "test",
|
||||
offset = 0,
|
||||
suggestions = listOf(),
|
||||
latency = 10L,
|
||||
isNew = false,
|
||||
additionalInfo = mapOf("predicted_api_calls" to "")
|
||||
)
|
||||
val apiRecall = InternalApiRecall()
|
||||
|
||||
val result = apiRecall.extractPredictedApiCallsFromLookup(lookup)
|
||||
|
||||
assertEquals(emptyList<String>(), result)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `extractExpectedApiCallsFromLookup should return list of expected API calls when present`() {
|
||||
val lookup = Lookup(
|
||||
prefix = "test",
|
||||
offset = 0,
|
||||
suggestions = listOf(),
|
||||
latency = 10L,
|
||||
isNew = false,
|
||||
additionalInfo = mapOf("ground_truth_internal_api_calls" to "call1\ncall2\ncall3")
|
||||
)
|
||||
val apiRecall = InternalApiRecall()
|
||||
|
||||
val result = apiRecall.extractExpectedApiCallsFromLookup(lookup)
|
||||
|
||||
assertEquals(listOf("call1", "call2", "call3"), result)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `extractExpectedApiCallsFromLookup should return empty list when expected API calls are absent`() {
|
||||
val lookup = Lookup(
|
||||
prefix = "test",
|
||||
offset = 0,
|
||||
suggestions = listOf(),
|
||||
latency = 10L,
|
||||
isNew = false,
|
||||
additionalInfo = emptyMap()
|
||||
)
|
||||
val apiRecall = InternalApiRecall()
|
||||
|
||||
val result = apiRecall.extractExpectedApiCallsFromLookup(lookup)
|
||||
|
||||
assertEquals(emptyList<String>(), result)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `extractExpectedApiCallsFromLookup should return empty list when expected API calls are empty`() {
|
||||
val lookup = Lookup(
|
||||
prefix = "test",
|
||||
offset = 0,
|
||||
suggestions = listOf(),
|
||||
latency = 10L,
|
||||
isNew = false,
|
||||
additionalInfo = mapOf("ground_truth_internal_api_calls" to "")
|
||||
)
|
||||
val apiRecall = InternalApiRecall()
|
||||
|
||||
val result = apiRecall.extractExpectedApiCallsFromLookup(lookup)
|
||||
|
||||
assertEquals(emptyList<String>(), result)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user