Coverage for langsmith/evaluation/string_evaluator.py: 0%

26 statements  

« prev     ^ index     » next       coverage.py v7.10.1, created at 2025-12-11 16:15 -0800

1"""This module contains the StringEvaluator class.""" 

2 

3import uuid 

4from typing import Callable, Optional 

5 

6from pydantic import BaseModel 

7 

8from langsmith.evaluation.evaluator import EvaluationResult, RunEvaluator 

9from langsmith.schemas import Example, Run 

10 

11 

12class StringEvaluator(RunEvaluator, BaseModel): 

13 """Grades the run's string input, output, and optional answer.""" 

14 

15 evaluation_name: Optional[str] = None 

16 """The name evaluation, such as `'Accuracy'` or `'Salience'`.""" 

17 input_key: str = "input" 

18 """The key in the run inputs to extract the input string.""" 

19 prediction_key: str = "output" 

20 """The key in the run outputs to extra the prediction string.""" 

21 answer_key: Optional[str] = "output" 

22 """The key in the example outputs the answer string.""" 

23 grading_function: Callable[[str, str, Optional[str]], dict] 

24 """Function that grades the run output against the example output.""" 

25 

26 def evaluate_run( 

27 self, 

28 run: Run, 

29 example: Optional[Example] = None, 

30 evaluator_run_id: Optional[uuid.UUID] = None, 

31 ) -> EvaluationResult: 

32 """Evaluate a single run.""" 

33 if run.outputs is None: 

34 raise ValueError("Run outputs cannot be None.") 

35 if not example or example.outputs is None or self.answer_key is None: 

36 answer = None 

37 else: 

38 answer = example.outputs.get(self.answer_key) 

39 run_input = run.inputs[self.input_key] 

40 run_output = run.outputs[self.prediction_key] 

41 grading_results = self.grading_function(run_input, run_output, answer) 

42 return EvaluationResult(**{"key": self.evaluation_name, **grading_results})