Coverage for langsmith/evaluation/string_evaluator.py: 0%
26 statements
« prev ^ index » next coverage.py v7.10.1, created at 2025-12-11 16:15 -0800
« prev ^ index » next coverage.py v7.10.1, created at 2025-12-11 16:15 -0800
1"""This module contains the StringEvaluator class."""
3import uuid
4from typing import Callable, Optional
6from pydantic import BaseModel
8from langsmith.evaluation.evaluator import EvaluationResult, RunEvaluator
9from langsmith.schemas import Example, Run
12class StringEvaluator(RunEvaluator, BaseModel):
13 """Grades the run's string input, output, and optional answer."""
15 evaluation_name: Optional[str] = None
16 """The name evaluation, such as `'Accuracy'` or `'Salience'`."""
17 input_key: str = "input"
18 """The key in the run inputs to extract the input string."""
19 prediction_key: str = "output"
20 """The key in the run outputs to extra the prediction string."""
21 answer_key: Optional[str] = "output"
22 """The key in the example outputs the answer string."""
23 grading_function: Callable[[str, str, Optional[str]], dict]
24 """Function that grades the run output against the example output."""
26 def evaluate_run(
27 self,
28 run: Run,
29 example: Optional[Example] = None,
30 evaluator_run_id: Optional[uuid.UUID] = None,
31 ) -> EvaluationResult:
32 """Evaluate a single run."""
33 if run.outputs is None:
34 raise ValueError("Run outputs cannot be None.")
35 if not example or example.outputs is None or self.answer_key is None:
36 answer = None
37 else:
38 answer = example.outputs.get(self.answer_key)
39 run_input = run.inputs[self.input_key]
40 run_output = run.outputs[self.prediction_key]
41 grading_results = self.grading_function(run_input, run_output, answer)
42 return EvaluationResult(**{"key": self.evaluation_name, **grading_results})