[aia] LLM-17290 add external api recall calculation, adapt chat code generation feature to a new report type

GitOrigin-RevId: 96f0f2fa5994a24b61d30a0824f447e35417d121
This commit is contained in:
Nikolai.Palchikov
2025-06-04 10:42:56 +02:00
committed by intellij-monorepo-bot
parent e8e0390d34
commit e2cc44b1ca
4 changed files with 190 additions and 17 deletions

View File

@@ -2,6 +2,8 @@ package com.intellij.cce.evaluation.data
import com.intellij.cce.evaluable.*
import com.intellij.cce.metric.*
import com.intellij.cce.metric.ExternalApiRecall.Companion.AIA_GROUND_TRUTH_EXTERNAL_API_CALLS
import com.intellij.cce.metric.ExternalApiRecall.Companion.AIA_PREDICTED_EXTERNAL_API_CALLS
import com.intellij.cce.metric.context.MeanContextLines
import com.intellij.cce.metric.context.MeanContextSize
@@ -194,6 +196,16 @@ object Analysis {
),
)
val GROUND_TRUTH_EXTERNAL_API_CALLS: TrivialEvalData<List<String>> = EvalDataDescription(
name = "Ground truth external API calls",
description = "Bind with the list of initial external API calls",
DataPlacement.AdditionalConcatenatedLines(AIA_GROUND_TRUTH_EXTERNAL_API_CALLS),
presentation = EvalDataPresentation(
PresentationCategory.ANALYSIS,
renderer = DataRenderer.Lines,
)
)
val PREDICTED_API_CALLS: TrivialEvalData<List<String>> = EvalDataDescription(
name = "Predicted internal API calls",
description = "Bind with the list of predicted internal API calls",
@@ -204,6 +216,16 @@ object Analysis {
),
)
val PREDICTED_EXTERNAL_API_CALLS: TrivialEvalData<List<String>> = EvalDataDescription(
name = "Predicted external API calls",
description = "Bind with the list of predicted external API calls",
DataPlacement.AdditionalConcatenatedLines(AIA_PREDICTED_EXTERNAL_API_CALLS),
presentation = EvalDataPresentation(
PresentationCategory.ANALYSIS,
renderer = DataRenderer.Lines,
)
)
val FAILED_FILE_VALIDATIONS: TrivialEvalData<List<String>> = EvalDataDescription(
name = "Failed file validations",
description = "Bind with failed file validations",
@@ -330,14 +352,23 @@ object Metrics {
dependencies = MetricDependencies(Analysis.ERASED_APIS)
) { PreservedApi() }
val API_RECALL: EvalMetric = EvalMetric(
val INTERNAL_API_RECALL: EvalMetric = EvalMetric(
threshold = 1.0,
dependencies = MetricDependencies(
Analysis.GROUND_TRUTH_API_CALLS,
Analysis.PREDICTED_API_CALLS,
DataRenderer.TextDiff
) { initial, result -> TextUpdate(initial.sorted().joinToString("\n"), result.sorted().joinToString("\n")) }
) { ApiRecall() }
) { InternalApiRecall() }
val EXTERNAL_API_RECALL: EvalMetric = EvalMetric(
threshold = 1.0,
dependencies = MetricDependencies(
Analysis.GROUND_TRUTH_EXTERNAL_API_CALLS,
Analysis.PREDICTED_EXTERNAL_API_CALLS,
DataRenderer.TextDiff
) { initial, result -> TextUpdate(initial.sorted().joinToString("\n"), result.sorted().joinToString("\n")) }
) { ExternalApiRecall() }
val FILE_VALIDATIONS_SUCCESS: EvalMetric = EvalMetric(
threshold = 1.0,

View File

@@ -6,22 +6,25 @@ import com.intellij.cce.evaluable.AIA_GROUND_TRUTH_INTERNAL_API_CALLS
import com.intellij.cce.evaluable.AIA_PREDICTED_API_CALLS
import com.intellij.cce.metric.util.Sample
class ApiRecall : ConfidenceIntervalMetric<Double>() {
override val name: String = "API Recall"
override val description: String = "The fraction of correctly guessed project-defined API calls"
override val showByDefault: Boolean = true
override val valueType = MetricValueType.DOUBLE
override val value: Double
abstract class ApiRecall : ConfidenceIntervalMetric<Double>() {
final override val showByDefault: Boolean = true
final override val valueType = MetricValueType.DOUBLE
final override val value: Double
get() = compute(sample)
override val supportsIndividualScores: Boolean = true
abstract fun extractPredictedApiCallsFromLookup(lookup: Lookup): List<String>
abstract fun extractExpectedApiCallsFromLookup(lookup: Lookup): List<String>
@Suppress("UNCHECKED_CAST")
override fun evaluate(sessions: List<Session>): Number {
final override fun evaluate(sessions: List<Session>): Number {
val fileSample = Sample()
sessions
.flatMap { it.lookups }
.forEach {
val predictedApiCalls = it.additionalList(AIA_PREDICTED_API_CALLS) ?: emptyList()
val groundTruthApiCalls = it.additionalList(AIA_GROUND_TRUTH_INTERNAL_API_CALLS) ?: emptyList()
val predictedApiCalls = extractPredictedApiCallsFromLookup(it)
val groundTruthApiCalls = extractExpectedApiCallsFromLookup(it)
val apiRecall = calculateApiRecallForLookupSnippets(predictedApiCalls, groundTruthApiCalls)
fileSample.add(apiRecall)
coreSample.add(apiRecall)
@@ -29,7 +32,7 @@ class ApiRecall : ConfidenceIntervalMetric<Double>() {
return fileSample.mean()
}
override fun compute(sample: List<Double>): Double = sample.average()
final override fun compute(sample: List<Double>): Double = sample.average()
private fun calculateApiRecallForLookupSnippets(
predictedApiCalls: List<String>,
@@ -44,5 +47,36 @@ class ApiRecall : ConfidenceIntervalMetric<Double>() {
}
}
class InternalApiRecall : ApiRecall() {
override val name: String = "API Recall"
override val description: String = "The fraction of correctly guessed project-defined API calls"
override fun extractPredictedApiCallsFromLookup(lookup: Lookup): List<String> {
return lookup.additionalList(AIA_PREDICTED_API_CALLS) ?: emptyList()
}
override fun extractExpectedApiCallsFromLookup(lookup: Lookup): List<String> {
return lookup.additionalList(AIA_GROUND_TRUTH_INTERNAL_API_CALLS) ?: emptyList()
}
}
class ExternalApiRecall : ApiRecall() {
override val name: String = "External API Recall"
override val description: String = "The fraction of correctly guessed library-defined API calls"
companion object {
const val AIA_PREDICTED_EXTERNAL_API_CALLS = "external_api_calls"
const val AIA_GROUND_TRUTH_EXTERNAL_API_CALLS = "external_api_calls_gt"
}
override fun extractPredictedApiCallsFromLookup(lookup: Lookup): List<String> {
return lookup.additionalList(AIA_PREDICTED_EXTERNAL_API_CALLS) ?: emptyList()
}
override fun extractExpectedApiCallsFromLookup(lookup: Lookup): List<String> {
return lookup.additionalList(AIA_GROUND_TRUTH_EXTERNAL_API_CALLS) ?: emptyList()
}
}
internal fun Lookup.additionalList(key: String): List<String>? =
additionalInfo[key]?.let { it as String }?.split("\n")?.filter { it.isNotEmpty() }
additionalInfo[key]?.let { it as String }?.split("\n")?.filter { it.isNotEmpty() }

View File

@@ -112,10 +112,8 @@ fun extractCalledExternalApiMethodsQualifiedNames(psiElement: PsiElement): List<
val psiMethodCall = (it as? PsiMethodCallExpression) ?: return@forEach
val referenceName = psiMethodCall.methodExpression.referenceName ?: return@forEach
val method = it.resolveMethod()
if (method != null && (
isInternalApiMethod(method, psiElement) ||
isFromStandardLibrary(method)
)) {
if (method != null && (isInternalApiMethod(method, psiElement) ||
isFromStandardLibrary(method))) {
return@forEach
}
externalApiMethodsQualifiedNames.add(referenceName)

View File

@@ -0,0 +1,110 @@
package com.intellij.cce.metric
import com.intellij.cce.core.Lookup
import org.junit.jupiter.api.Assertions.assertEquals
import org.junit.jupiter.api.Test
class InternalApiRecallTest {
@Test
fun `extractPredictedApiCallsFromLookup should return list of predicted API calls when present`() {
val lookup = Lookup(
prefix = "test",
offset = 0,
suggestions = listOf(),
latency = 10L,
isNew = false,
additionalInfo = mapOf("predicted_api_calls" to "call1\ncall2\ncall3")
)
val apiRecall = InternalApiRecall()
val result = apiRecall.extractPredictedApiCallsFromLookup(lookup)
assertEquals(listOf("call1", "call2", "call3"), result)
}
@Test
fun `extractPredictedApiCallsFromLookup should return empty list when predicted API calls are absent`() {
val lookup = Lookup(
prefix = "test",
offset = 0,
suggestions = listOf(),
latency = 10L,
isNew = false,
additionalInfo = emptyMap()
)
val apiRecall = InternalApiRecall()
val result = apiRecall.extractPredictedApiCallsFromLookup(lookup)
assertEquals(emptyList<String>(), result)
}
@Test
fun `extractPredictedApiCallsFromLookup should return empty list when predicted API calls are empty`() {
val lookup = Lookup(
prefix = "test",
offset = 0,
suggestions = listOf(),
latency = 10L,
isNew = false,
additionalInfo = mapOf("predicted_api_calls" to "")
)
val apiRecall = InternalApiRecall()
val result = apiRecall.extractPredictedApiCallsFromLookup(lookup)
assertEquals(emptyList<String>(), result)
}
@Test
fun `extractExpectedApiCallsFromLookup should return list of expected API calls when present`() {
val lookup = Lookup(
prefix = "test",
offset = 0,
suggestions = listOf(),
latency = 10L,
isNew = false,
additionalInfo = mapOf("ground_truth_internal_api_calls" to "call1\ncall2\ncall3")
)
val apiRecall = InternalApiRecall()
val result = apiRecall.extractExpectedApiCallsFromLookup(lookup)
assertEquals(listOf("call1", "call2", "call3"), result)
}
@Test
fun `extractExpectedApiCallsFromLookup should return empty list when expected API calls are absent`() {
val lookup = Lookup(
prefix = "test",
offset = 0,
suggestions = listOf(),
latency = 10L,
isNew = false,
additionalInfo = emptyMap()
)
val apiRecall = InternalApiRecall()
val result = apiRecall.extractExpectedApiCallsFromLookup(lookup)
assertEquals(emptyList<String>(), result)
}
@Test
fun `extractExpectedApiCallsFromLookup should return empty list when expected API calls are empty`() {
val lookup = Lookup(
prefix = "test",
offset = 0,
suggestions = listOf(),
latency = 10L,
isNew = false,
additionalInfo = mapOf("ground_truth_internal_api_calls" to "")
)
val apiRecall = InternalApiRecall()
val result = apiRecall.extractExpectedApiCallsFromLookup(lookup)
assertEquals(emptyList<String>(), result)
}
}