Coverage for langsmith/schemas.py: 3%

823 statements  

« prev     ^ index     » next       coverage.py v7.10.1, created at 2025-12-11 16:15 -0800

1"""Schemas for the LangSmith API.""" 

2 

3from __future__ import annotations 

4 

5from collections.abc import Iterator 

6from datetime import datetime, timedelta, timezone 

7from decimal import Decimal 

8from enum import Enum 

9from typing import ( 

10 Annotated, 

11 Any, 

12 NamedTuple, 

13 Optional, 

14 Protocol, 

15 Union, 

16 runtime_checkable, 

17) 

18from uuid import UUID 

19 

20from typing_extensions import NotRequired, TypedDict 

21 

22try: 

23 from pydantic.v1 import ( 

24 BaseModel, 

25 Field, # type: ignore[import] 

26 PrivateAttr, 

27 StrictBool, 

28 StrictFloat, 

29 StrictInt, 

30 ) 

31except ImportError: 

32 from pydantic import ( # type: ignore[assignment] 

33 BaseModel, 

34 Field, 

35 PrivateAttr, 

36 StrictBool, 

37 StrictFloat, 

38 StrictInt, 

39 ) 

40 

41from pathlib import Path 

42 

43from typing_extensions import Literal 

44 

45SCORE_TYPE = Union[StrictBool, StrictInt, StrictFloat, None] 

46VALUE_TYPE = Union[dict, str, None] 

47 

48 

49class Attachment(NamedTuple): 

50 """Annotated type that will be stored as an attachment if used. 

51 

52 Examples: 

53 ```python 

54 from langsmith import traceable 

55 from langsmith.schemas import Attachment 

56 

57 

58 @traceable 

59 def my_function(bar: int, my_val: Attachment): 

60 # my_val will be stored as an attachment 

61 # bar will be stored as inputs 

62 return bar 

63 ``` 

64 """ 

65 

66 mime_type: str 

67 data: Union[bytes, Path] 

68 

69 

70Attachments = dict[str, Union[tuple[str, bytes], Attachment, tuple[str, Path]]] 

71"""Attachments associated with the run.  

72 

73Each entry is a tuple of `(mime_type, bytes)`, or `(mime_type, file_path)` 

74""" 

75 

76 

77@runtime_checkable 

78class BinaryIOLike(Protocol): 

79 """Protocol for binary IO-like objects.""" 

80 

81 def read(self, size: int = -1) -> bytes: 

82 """Read function.""" 

83 ... 

84 

85 def seek(self, offset: int, whence: int = 0) -> int: 

86 """Seek function.""" 

87 ... 

88 

89 def getvalue(self) -> bytes: 

90 """Get value function.""" 

91 ... 

92 

93 

94class ExampleBase(BaseModel): 

95 """Example base model.""" 

96 

97 dataset_id: UUID 

98 inputs: Optional[dict[str, Any]] = Field(default=None) 

99 outputs: Optional[dict[str, Any]] = Field(default=None) 

100 metadata: Optional[dict[str, Any]] = Field(default=None) 

101 

102 class Config: 

103 """Configuration class for the schema.""" 

104 

105 frozen = True 

106 arbitrary_types_allowed = True 

107 

108 

109class _AttachmentDict(TypedDict): 

110 mime_type: str 

111 data: Union[bytes, Path] 

112 

113 

114_AttachmentLike = Union[ 

115 Attachment, _AttachmentDict, tuple[str, bytes], tuple[str, Path] 

116] 

117 

118 

119class ExampleCreate(BaseModel): 

120 """Example upload with attachments.""" 

121 

122 id: Optional[UUID] 

123 created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc)) 

124 inputs: Optional[dict[str, Any]] = Field(default=None) 

125 outputs: Optional[dict[str, Any]] = Field(default=None) 

126 metadata: Optional[dict[str, Any]] = Field(default=None) 

127 split: Optional[Union[str, list[str]]] = None 

128 attachments: Optional[dict[str, _AttachmentLike]] = None 

129 use_source_run_io: bool = False 

130 use_source_run_attachments: Optional[list[str]] = None 

131 source_run_id: Optional[UUID] = None 

132 

133 def __init__(self, **data): 

134 """Initialize from dict.""" 

135 super().__init__(**data) 

136 

137 

138ExampleUploadWithAttachments = ExampleCreate 

139 

140 

141class ExampleUpsertWithAttachments(ExampleCreate): 

142 """Example create with attachments.""" 

143 

144 dataset_id: UUID 

145 

146 

147class AttachmentInfo(TypedDict): 

148 """Info for an attachment.""" 

149 

150 presigned_url: str 

151 reader: BinaryIOLike 

152 mime_type: Optional[str] 

153 

154 

155class Example(ExampleBase): 

156 """Example model.""" 

157 

158 id: UUID 

159 created_at: datetime = Field( 

160 default_factory=lambda: datetime.fromtimestamp(0, tz=timezone.utc) 

161 ) 

162 dataset_id: UUID = Field(default=UUID("00000000-0000-0000-0000-000000000000")) 

163 modified_at: Optional[datetime] = Field(default=None) 

164 source_run_id: Optional[UUID] = None 

165 attachments: Optional[dict[str, AttachmentInfo]] = Field(default=None) 

166 """Dictionary with attachment names as keys and a tuple of the S3 url 

167 and a reader of the data for the file.""" 

168 _host_url: Optional[str] = PrivateAttr(default=None) 

169 _tenant_id: Optional[UUID] = PrivateAttr(default=None) 

170 

171 def __init__( 

172 self, 

173 _host_url: Optional[str] = None, 

174 _tenant_id: Optional[UUID] = None, 

175 **kwargs: Any, 

176 ) -> None: 

177 """Initialize a Dataset object.""" 

178 super().__init__(**kwargs) 

179 self._host_url = _host_url 

180 self._tenant_id = _tenant_id 

181 

182 @property 

183 def url(self) -> Optional[str]: 

184 """URL of this run within the app.""" 

185 if self._host_url: 

186 path = f"/datasets/{self.dataset_id}/e/{self.id}" 

187 if self._tenant_id: 

188 return f"{self._host_url}/o/{str(self._tenant_id)}{path}" 

189 return f"{self._host_url}{path}" 

190 return None 

191 

192 def __repr__(self): 

193 """Return a string representation of the RunBase object.""" 

194 return f"{self.__class__}(id={self.id}, dataset_id={self.dataset_id}, link='{self.url}')" 

195 

196 

197class ExampleSearch(ExampleBase): 

198 """Example returned via search.""" 

199 

200 id: UUID 

201 

202 

203class AttachmentsOperations(BaseModel): 

204 """Operations to perform on attachments.""" 

205 

206 rename: dict[str, str] = Field( 

207 default_factory=dict, description="Mapping of old attachment names to new names" 

208 ) 

209 retain: list[str] = Field( 

210 default_factory=list, description="List of attachment names to keep" 

211 ) 

212 

213 

214class ExampleUpdate(BaseModel): 

215 """Example update with attachments.""" 

216 

217 id: UUID 

218 dataset_id: Optional[UUID] = None 

219 inputs: Optional[dict[str, Any]] = Field(default=None) 

220 outputs: Optional[dict[str, Any]] = Field(default=None) 

221 metadata: Optional[dict[str, Any]] = Field(default=None) 

222 split: Optional[Union[str, list[str]]] = None 

223 attachments: Optional[Attachments] = None 

224 attachments_operations: Optional[AttachmentsOperations] = None 

225 

226 class Config: 

227 """Configuration class for the schema.""" 

228 

229 frozen = True 

230 

231 def __init__(self, **data): 

232 """Initialize from dict.""" 

233 super().__init__(**data) 

234 

235 

236ExampleUpdateWithAttachments = ExampleUpdate 

237 

238 

239class DataType(str, Enum): 

240 """Enum for dataset data types.""" 

241 

242 kv = "kv" 

243 llm = "llm" 

244 chat = "chat" 

245 

246 

247class DatasetBase(BaseModel): 

248 """Dataset base model.""" 

249 

250 name: str 

251 description: Optional[str] = None 

252 data_type: Optional[DataType] = None 

253 

254 class Config: 

255 """Configuration class for the schema.""" 

256 

257 frozen = True 

258 

259 

260DatasetTransformationType = Literal[ 

261 "remove_system_messages", 

262 "convert_to_openai_message", 

263 "convert_to_openai_tool", 

264 "remove_extra_fields", 

265 "extract_tools_from_run", 

266] 

267 

268 

269class DatasetTransformation(TypedDict, total=False): 

270 """Schema for dataset transformations.""" 

271 

272 path: list[str] 

273 transformation_type: Union[DatasetTransformationType, str] 

274 

275 

276class Dataset(DatasetBase): 

277 """Dataset ORM model.""" 

278 

279 id: UUID 

280 created_at: datetime 

281 modified_at: Optional[datetime] = Field(default=None) 

282 example_count: Optional[int] = None 

283 session_count: Optional[int] = None 

284 last_session_start_time: Optional[datetime] = None 

285 inputs_schema: Optional[dict[str, Any]] = None 

286 outputs_schema: Optional[dict[str, Any]] = None 

287 transformations: Optional[list[DatasetTransformation]] = None 

288 metadata: Optional[dict[str, Any]] = None 

289 _host_url: Optional[str] = PrivateAttr(default=None) 

290 _tenant_id: Optional[UUID] = PrivateAttr(default=None) 

291 _public_path: Optional[str] = PrivateAttr(default=None) 

292 

293 def __init__( 

294 self, 

295 _host_url: Optional[str] = None, 

296 _tenant_id: Optional[UUID] = None, 

297 _public_path: Optional[str] = None, 

298 **kwargs: Any, 

299 ) -> None: 

300 """Initialize a Dataset object.""" 

301 if "inputs_schema_definition" in kwargs: 

302 kwargs["inputs_schema"] = kwargs.pop("inputs_schema_definition") 

303 

304 if "outputs_schema_definition" in kwargs: 

305 kwargs["outputs_schema"] = kwargs.pop("outputs_schema_definition") 

306 

307 super().__init__(**kwargs) 

308 self._host_url = _host_url 

309 self._tenant_id = _tenant_id 

310 self._public_path = _public_path 

311 

312 @property 

313 def url(self) -> Optional[str]: 

314 """URL of this run within the app.""" 

315 if self._host_url: 

316 if self._public_path: 

317 return f"{self._host_url}{self._public_path}" 

318 if self._tenant_id: 

319 return f"{self._host_url}/o/{str(self._tenant_id)}/datasets/{self.id}" 

320 return f"{self._host_url}/datasets/{self.id}" 

321 return None 

322 

323 

324class DatasetVersion(BaseModel): 

325 """Class representing a dataset version.""" 

326 

327 tags: Optional[list[str]] = None 

328 as_of: datetime 

329 

330 

331def _default_extra(): 

332 return {"metadata": {}} 

333 

334 

335class RunBase(BaseModel): 

336 """Base Run schema. 

337 

338 A Run is a span representing a single unit of work or operation within your LLM app. 

339 This could be a single call to an LLM or chain, to a prompt formatting call, 

340 to a runnable lambda invocation. If you are familiar with OpenTelemetry, 

341 you can think of a run as a span. 

342 """ 

343 

344 id: UUID 

345 """Unique identifier for the run.""" 

346 

347 name: str 

348 """Human-readable name for the run.""" 

349 

350 start_time: datetime 

351 """Start time of the run.""" 

352 

353 run_type: str 

354 """The type of run, such as tool, chain, llm, retriever, 

355 embedding, prompt, parser.""" 

356 

357 end_time: Optional[datetime] = None 

358 """End time of the run, if applicable.""" 

359 

360 extra: Optional[dict] = Field(default_factory=_default_extra) 

361 """Additional metadata or settings related to the run.""" 

362 

363 error: Optional[str] = None 

364 """Error message, if the run encountered any issues.""" 

365 

366 serialized: Optional[dict] = None 

367 """Serialized object that executed the run for potential reuse.""" 

368 

369 events: Optional[list[dict]] = None 

370 """List of events associated with the run, like 

371 start and end events.""" 

372 

373 inputs: dict = Field(default_factory=dict) 

374 """Inputs used for the run.""" 

375 

376 outputs: Optional[dict] = None 

377 """Outputs generated by the run, if any.""" 

378 

379 reference_example_id: Optional[UUID] = None 

380 """Reference to an example that this run may be based on.""" 

381 

382 parent_run_id: Optional[UUID] = None 

383 """Identifier for a parent run, if this run is a sub-run.""" 

384 

385 tags: Optional[list[str]] = None 

386 """Tags for categorizing or annotating the run.""" 

387 

388 attachments: Union[Attachments, dict[str, AttachmentInfo]] = Field( 

389 default_factory=dict 

390 ) 

391 """Attachments associated with the run. 

392  

393 Each entry is a tuple of `(mime_type, bytes)`. 

394 """ 

395 

396 @property 

397 def metadata(self) -> dict[str, Any]: 

398 """Retrieve the metadata (if any).""" 

399 if self.extra is None: 

400 self.extra = {} 

401 return self.extra.setdefault("metadata", {}) 

402 

403 @property 

404 def revision_id(self) -> Optional[UUID]: 

405 """Retrieve the revision ID (if any).""" 

406 return self.metadata.get("revision_id") 

407 

408 @property 

409 def latency(self) -> Optional[float]: 

410 """Latency in seconds.""" 

411 if self.end_time is None: 

412 return None 

413 return (self.end_time - self.start_time).total_seconds() 

414 

415 def __repr__(self): 

416 """Return a string representation of the RunBase object.""" 

417 return f"{self.__class__}(id={self.id}, name='{self.name}', run_type='{self.run_type}')" 

418 

419 class Config: 

420 """Configuration class for the schema.""" 

421 

422 arbitrary_types_allowed = True 

423 

424 

425class Run(RunBase): 

426 """Run schema when loading from the DB.""" 

427 

428 session_id: Optional[UUID] = None 

429 """The project ID this run belongs to.""" 

430 child_run_ids: Optional[list[UUID]] = None 

431 """Deprecated: The child run IDs of this run.""" 

432 child_runs: Optional[list[Run]] = None 

433 """The child runs of this run, if instructed to load using the client 

434 These are not populated by default, as it is a heavier query to make.""" 

435 feedback_stats: Optional[dict[str, Any]] = None 

436 """Feedback stats for this run.""" 

437 app_path: Optional[str] = None 

438 """Relative URL path of this run within the app.""" 

439 manifest_id: Optional[UUID] = None 

440 """Unique ID of the serialized object for this run.""" 

441 status: Optional[str] = None 

442 """Status of the run (e.g., 'success').""" 

443 prompt_tokens: Optional[int] = None 

444 """Number of tokens used for the prompt.""" 

445 completion_tokens: Optional[int] = None 

446 """Number of tokens generated as output.""" 

447 total_tokens: Optional[int] = None 

448 """Total tokens for prompt and completion.""" 

449 prompt_token_details: Optional[dict[str, int]] = None 

450 """Breakdown of prompt (input) token counts. 

451 

452 Does *not* need to sum to full prompt token count. 

453 """ 

454 completion_token_details: Optional[dict[str, int]] = None 

455 """Breakdown of completion (output) token counts. 

456 

457 Does *not* need to sum to full completion token count. 

458 """ 

459 first_token_time: Optional[datetime] = None 

460 """Time the first token was processed.""" 

461 total_cost: Optional[Decimal] = None 

462 """The total estimated LLM cost associated with the completion tokens.""" 

463 prompt_cost: Optional[Decimal] = None 

464 """The estimated cost associated with the prompt (input) tokens.""" 

465 completion_cost: Optional[Decimal] = None 

466 """The estimated cost associated with the completion tokens.""" 

467 prompt_cost_details: Optional[dict[str, Decimal]] = None 

468 """Breakdown of prompt (input) token costs. 

469 

470 Does *not* need to sum to full prompt token cost. 

471 """ 

472 completion_cost_details: Optional[dict[str, Decimal]] = None 

473 """Breakdown of completion (output) token costs. 

474 

475 Does *not* need to sum to full completion token cost. 

476 """ 

477 parent_run_ids: Optional[list[UUID]] = None 

478 """List of parent run IDs.""" 

479 trace_id: UUID 

480 """Unique ID assigned to every run within this nested trace.""" 

481 dotted_order: str = Field(default="") 

482 """Dotted order for the run. 

483 

484 This is a string composed of {time}{run-uuid}.* so that a trace can be 

485 sorted in the order it was executed. 

486 

487 Example: 

488 - Parent: 20230914T223155647Z1b64098b-4ab7-43f6-afee-992304f198d8 

489 - Children: 

490 - 20230914T223155647Z1b64098b-4ab7-43f6-afee-992304f198d8.20230914T223155649Z809ed3a2-0172-4f4d-8a02-a64e9b7a0f8a 

491 - 20230915T223155647Z1b64098b-4ab7-43f6-afee-992304f198d8.20230914T223155650Zc8d9f4c5-6c5a-4b2d-9b1c-3d9d7a7c5c7c 

492 """ # noqa: E501 

493 in_dataset: Optional[bool] = None 

494 """Whether this run is in a dataset.""" 

495 _host_url: Optional[str] = PrivateAttr(default=None) 

496 

497 def __init__(self, _host_url: Optional[str] = None, **kwargs: Any) -> None: 

498 """Initialize a Run object.""" 

499 if not kwargs.get("trace_id"): 

500 kwargs = {"trace_id": kwargs.get("id"), **kwargs} 

501 inputs = kwargs.pop("inputs", None) or {} 

502 super().__init__(**kwargs, inputs=inputs) 

503 self._host_url = _host_url 

504 if not self.dotted_order.strip() and not self.parent_run_id: 

505 self.dotted_order = f"{self.start_time.isoformat()}{self.id}" 

506 

507 @property 

508 def url(self) -> Optional[str]: 

509 """URL of this run within the app.""" 

510 if self._host_url and self.app_path: 

511 return f"{self._host_url}{self.app_path}" 

512 return None 

513 

514 @property 

515 def input_tokens(self) -> int | None: 

516 """Alias for prompt_tokens.""" 

517 return self.prompt_tokens 

518 

519 @property 

520 def output_tokens(self) -> int | None: 

521 """Alias for completion_tokens.""" 

522 return self.completion_tokens 

523 

524 @property 

525 def input_cost(self) -> Decimal | None: 

526 """Alias for prompt_cost.""" 

527 return self.prompt_cost 

528 

529 @property 

530 def output_cost(self) -> Decimal | None: 

531 """Alias for completion_cost.""" 

532 return self.completion_cost 

533 

534 @property 

535 def input_token_details(self) -> dict[str, int] | None: 

536 """Alias for prompt_token_details.""" 

537 return self.prompt_token_details 

538 

539 @property 

540 def output_token_details(self) -> dict[str, int] | None: 

541 """Alias for output_token_details.""" 

542 return self.completion_token_details 

543 

544 @property 

545 def input_cost_details(self) -> dict[str, Decimal] | None: 

546 """Alias for prompt_cost_details.""" 

547 return self.prompt_cost_details 

548 

549 @property 

550 def output_cost_details(self) -> dict[str, Decimal] | None: 

551 """Alias for completion_cost_details.""" 

552 return self.completion_cost_details 

553 

554 

555class RunTypeEnum(str, Enum): 

556 """(Deprecated) Enum for run types. Use string directly.""" 

557 

558 tool = "tool" 

559 chain = "chain" 

560 llm = "llm" 

561 retriever = "retriever" 

562 embedding = "embedding" 

563 prompt = "prompt" 

564 parser = "parser" 

565 

566 

567class RunLikeDict(TypedDict, total=False): 

568 """Run-like dictionary, for type-hinting.""" 

569 

570 name: str 

571 run_type: RunTypeEnum 

572 start_time: datetime 

573 inputs: Optional[dict] 

574 outputs: Optional[dict] 

575 end_time: Optional[datetime] 

576 extra: Optional[dict] 

577 error: Optional[str] 

578 serialized: Optional[dict] 

579 parent_run_id: Optional[UUID] 

580 manifest_id: Optional[UUID] 

581 events: Optional[list[dict]] 

582 tags: Optional[list[str]] 

583 inputs_s3_urls: Optional[dict] 

584 outputs_s3_urls: Optional[dict] 

585 id: Optional[UUID] 

586 session_id: Optional[UUID] 

587 session_name: Optional[str] 

588 reference_example_id: Optional[UUID] 

589 input_attachments: Optional[dict] 

590 output_attachments: Optional[dict] 

591 trace_id: UUID 

592 dotted_order: str 

593 attachments: Attachments 

594 

595 

596class RunWithAnnotationQueueInfo(RunBase): 

597 """Run schema with annotation queue info.""" 

598 

599 last_reviewed_time: Optional[datetime] = None 

600 """The last time this run was reviewed.""" 

601 added_at: Optional[datetime] = None 

602 """The time this run was added to the queue.""" 

603 

604 

605class FeedbackSourceBase(BaseModel): 

606 """Base class for feedback sources. 

607 

608 This represents whether feedback is submitted from the API, model, human labeler, 

609 etc. 

610 """ 

611 

612 type: str 

613 """The type of the feedback source.""" 

614 metadata: Optional[dict[str, Any]] = Field(default_factory=dict) 

615 """Additional metadata for the feedback source.""" 

616 user_id: Optional[Union[UUID, str]] = None 

617 """The user ID associated with the feedback source.""" 

618 user_name: Optional[str] = None 

619 """The user name associated with the feedback source.""" 

620 

621 

622class APIFeedbackSource(FeedbackSourceBase): 

623 """API feedback source.""" 

624 

625 type: Literal["api"] = "api" 

626 

627 

628class ModelFeedbackSource(FeedbackSourceBase): 

629 """Model feedback source.""" 

630 

631 type: Literal["model"] = "model" 

632 

633 

634class FeedbackSourceType(Enum): 

635 """Feedback source type.""" 

636 

637 API = "api" 

638 """General feedback submitted from the API.""" 

639 MODEL = "model" 

640 """Model-assisted feedback.""" 

641 

642 

643class FeedbackBase(BaseModel): 

644 """Feedback schema.""" 

645 

646 id: UUID 

647 """The unique ID of the feedback.""" 

648 created_at: Optional[datetime] = None 

649 """The time the feedback was created.""" 

650 modified_at: Optional[datetime] = None 

651 """The time the feedback was last modified.""" 

652 run_id: Optional[UUID] 

653 """The associated run ID this feedback is logged for.""" 

654 trace_id: Optional[UUID] 

655 """The associated trace ID this feedback is logged for.""" 

656 key: str 

657 """The metric name, tag, or aspect to provide feedback on.""" 

658 score: SCORE_TYPE = None 

659 """Value or score to assign the run.""" 

660 value: VALUE_TYPE = None 

661 """The display value, tag or other value for the feedback if not a metric.""" 

662 comment: Optional[str] = None 

663 """Comment or explanation for the feedback.""" 

664 correction: Union[str, dict, None] = None 

665 """Correction for the run.""" 

666 feedback_source: Optional[FeedbackSourceBase] = None 

667 """The source of the feedback.""" 

668 session_id: Optional[UUID] = None 

669 """The associated project ID (Session = Project) this feedback is logged for.""" 

670 comparative_experiment_id: Optional[UUID] = None 

671 """If logged within a 'comparative experiment', this is the ID of the experiment.""" 

672 feedback_group_id: Optional[UUID] = None 

673 """For preference scoring, this group ID is shared across feedbacks for each 

674 

675 run in the group that was being compared.""" 

676 extra: Optional[dict] = None 

677 """The metadata of the feedback.""" 

678 

679 class Config: 

680 """Configuration class for the schema.""" 

681 

682 frozen = True 

683 

684 

685class FeedbackCategory(TypedDict, total=False): 

686 """Specific value and label pair for feedback.""" 

687 

688 value: float 

689 """The numeric value associated with this feedback category.""" 

690 label: Optional[str] 

691 """An optional label to interpret the value for this feedback category.""" 

692 

693 

694class FeedbackConfig(TypedDict, total=False): 

695 """Represents _how_ a feedback value ought to be interpreted.""" 

696 

697 type: Literal["continuous", "categorical", "freeform"] 

698 """The type of feedback.""" 

699 min: Optional[float] 

700 """The minimum value for continuous feedback.""" 

701 max: Optional[float] 

702 """The maximum value for continuous feedback.""" 

703 categories: Optional[list[FeedbackCategory]] 

704 """If feedback is categorical, this defines the valid categories the server will accept. 

705 Not applicable to continuous or freeform feedback types.""" # noqa 

706 

707 

708class FeedbackCreate(FeedbackBase): 

709 """Schema used for creating feedback.""" 

710 

711 feedback_source: FeedbackSourceBase 

712 """The source of the feedback.""" 

713 feedback_config: Optional[FeedbackConfig] = None 

714 """The config for the feedback""" 

715 error: Optional[bool] = None 

716 

717 

718class Feedback(FeedbackBase): 

719 """Schema for getting feedback.""" 

720 

721 id: UUID 

722 created_at: datetime 

723 """The time the feedback was created.""" 

724 modified_at: datetime 

725 """The time the feedback was last modified.""" 

726 feedback_source: Optional[FeedbackSourceBase] = None 

727 """The source of the feedback. In this case""" 

728 

729 

730class TracerSession(BaseModel): 

731 """TracerSession schema for the API. 

732 

733 Sessions are also referred to as "Projects" in the UI. 

734 """ 

735 

736 id: UUID 

737 """The ID of the project.""" 

738 start_time: datetime = Field(default_factory=lambda: datetime.now(timezone.utc)) 

739 """The time the project was created.""" 

740 end_time: Optional[datetime] = None 

741 """The time the project was ended.""" 

742 description: Optional[str] = None 

743 """The description of the project.""" 

744 name: Optional[str] = None 

745 """The name of the session.""" 

746 extra: Optional[dict[str, Any]] = None 

747 """Extra metadata for the project.""" 

748 tenant_id: UUID 

749 """The tenant ID this project belongs to.""" 

750 reference_dataset_id: Optional[UUID] 

751 """The reference dataset IDs this project's runs were generated on.""" 

752 

753 _host_url: Optional[str] = PrivateAttr(default=None) 

754 

755 def __init__(self, _host_url: Optional[str] = None, **kwargs: Any) -> None: 

756 """Initialize a Run object.""" 

757 super().__init__(**kwargs) 

758 self._host_url = _host_url 

759 if self.start_time.tzinfo is None: 

760 self.start_time = self.start_time.replace(tzinfo=timezone.utc) 

761 

762 @property 

763 def url(self) -> Optional[str]: 

764 """URL of this run within the app.""" 

765 if self._host_url: 

766 return f"{self._host_url}/o/{self.tenant_id}/projects/p/{self.id}" 

767 return None 

768 

769 @property 

770 def metadata(self) -> dict[str, Any]: 

771 """Retrieve the metadata (if any).""" 

772 if self.extra is None or "metadata" not in self.extra: 

773 return {} 

774 return self.extra["metadata"] 

775 

776 @property 

777 def tags(self) -> list[str]: 

778 """Retrieve the tags (if any).""" 

779 if self.extra is None or "tags" not in self.extra: 

780 return [] 

781 return self.extra["tags"] 

782 

783 

784class TracerSessionResult(TracerSession): 

785 """A project, hydrated with additional information. 

786 

787 Sessions are also referred to as "Projects" in the UI. 

788 """ 

789 

790 run_count: Optional[int] 

791 """The number of runs in the project.""" 

792 latency_p50: Optional[timedelta] 

793 """The median (50th percentile) latency for the project.""" 

794 latency_p99: Optional[timedelta] 

795 """The 99th percentile latency for the project.""" 

796 total_tokens: Optional[int] 

797 """The total number of tokens consumed in the project.""" 

798 prompt_tokens: Optional[int] 

799 """The total number of prompt tokens consumed in the project.""" 

800 completion_tokens: Optional[int] 

801 """The total number of completion tokens consumed in the project.""" 

802 last_run_start_time: Optional[datetime] 

803 """The start time of the last run in the project.""" 

804 feedback_stats: Optional[dict[str, Any]] 

805 """Feedback stats for the project.""" 

806 session_feedback_stats: Optional[dict[str, Any]] 

807 """Summary feedback stats for the project.""" 

808 run_facets: Optional[list[dict[str, Any]]] 

809 """Facets for the runs in the project.""" 

810 total_cost: Optional[Decimal] 

811 """The total estimated LLM cost associated with the completion tokens.""" 

812 prompt_cost: Optional[Decimal] 

813 """The estimated cost associated with the prompt (input) tokens.""" 

814 completion_cost: Optional[Decimal] 

815 """The estimated cost associated with the completion tokens.""" 

816 first_token_p50: Optional[timedelta] 

817 """The median (50th percentile) time to process the first token.""" 

818 first_token_p99: Optional[timedelta] 

819 """The 99th percentile time to process the first token.""" 

820 error_rate: Optional[float] 

821 """The error rate for the project.""" 

822 

823 

824@runtime_checkable 

825class BaseMessageLike(Protocol): 

826 """A protocol representing objects similar to BaseMessage.""" 

827 

828 content: str 

829 """The content of the message.""" 

830 additional_kwargs: dict[Any, Any] 

831 """Additional keyword arguments associated with the message.""" 

832 

833 @property 

834 def type(self) -> str: 

835 """Type of the Message, used for serialization.""" 

836 

837 

838class DatasetShareSchema(TypedDict, total=False): 

839 """Represents the schema for a dataset share.""" 

840 

841 dataset_id: UUID 

842 """The ID of the dataset.""" 

843 share_token: UUID 

844 """The token for sharing the dataset.""" 

845 url: str 

846 """The URL of the shared dataset.""" 

847 

848 

849class AnnotationQueue(BaseModel): 

850 """Represents an annotation queue.""" 

851 

852 id: UUID 

853 """The unique identifier of the annotation queue.""" 

854 name: str 

855 """The name of the annotation queue.""" 

856 description: Optional[str] = None 

857 """An optional description of the annotation queue.""" 

858 created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc)) 

859 """The timestamp when the annotation queue was created.""" 

860 updated_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc)) 

861 """The timestamp when the annotation queue was last updated.""" 

862 tenant_id: UUID 

863 """The ID of the tenant associated with the annotation queue.""" 

864 

865 

866class AnnotationQueueWithDetails(AnnotationQueue): 

867 """Represents an annotation queue with details.""" 

868 

869 rubric_instructions: Optional[str] = None 

870 """The rubric instructions for the annotation queue.""" 

871 

872 

873class BatchIngestConfig(TypedDict, total=False): 

874 """Configuration for batch ingestion.""" 

875 

876 use_multipart_endpoint: bool 

877 """Whether to use the multipart endpoint for batch ingestion.""" 

878 scale_up_qsize_trigger: int 

879 """The queue size threshold that triggers scaling up.""" 

880 scale_up_nthreads_limit: int 

881 """The maximum number of threads to scale up to.""" 

882 scale_down_nempty_trigger: int 

883 """The number of empty threads that triggers scaling down.""" 

884 size_limit: int 

885 """The maximum size limit for the batch.""" 

886 size_limit_bytes: Optional[int] 

887 """The maximum size limit in bytes for the batch.""" 

888 

889 

890class LangSmithInfo(BaseModel): 

891 """Information about the LangSmith server.""" 

892 

893 version: str = "" 

894 """The version of the LangSmith server.""" 

895 license_expiration_time: Optional[datetime] = None 

896 """The time the license will expire.""" 

897 batch_ingest_config: Optional[BatchIngestConfig] = None 

898 """The instance flags.""" 

899 instance_flags: Optional[dict[str, Any]] = None 

900 

901 

902Example.update_forward_refs() 

903 

904 

905class LangSmithSettings(BaseModel): 

906 """Settings for the LangSmith tenant.""" 

907 

908 id: str 

909 """The ID of the tenant.""" 

910 display_name: str 

911 """The display name of the tenant.""" 

912 created_at: datetime 

913 """The creation time of the tenant.""" 

914 tenant_handle: Optional[str] = None 

915 

916 

917class FeedbackIngestToken(BaseModel): 

918 """Represents the schema for a feedback ingest token.""" 

919 

920 id: UUID 

921 """The ID of the feedback ingest token.""" 

922 url: str 

923 """The URL to GET when logging the feedback.""" 

924 expires_at: datetime 

925 """The expiration time of the token.""" 

926 

927 

928class RunEvent(TypedDict, total=False): 

929 """Run event schema.""" 

930 

931 name: str 

932 """Type of event.""" 

933 time: Union[datetime, str] 

934 """Time of the event.""" 

935 kwargs: Optional[dict[str, Any]] 

936 """Additional metadata for the event.""" 

937 

938 

939class TimeDeltaInput(TypedDict, total=False): 

940 """Timedelta input schema.""" 

941 

942 days: int 

943 """Number of days.""" 

944 hours: int 

945 """Number of hours.""" 

946 minutes: int 

947 """Number of minutes.""" 

948 

949 

950class DatasetDiffInfo(BaseModel): 

951 """Represents the difference information between two datasets.""" 

952 

953 examples_modified: list[UUID] 

954 """A list of UUIDs representing the modified examples.""" 

955 examples_added: list[UUID] 

956 """A list of UUIDs representing the added examples.""" 

957 examples_removed: list[UUID] 

958 """A list of UUIDs representing the removed examples.""" 

959 

960 

961class ComparativeExperiment(BaseModel): 

962 """Represents a comparative experiment. 

963 

964 This information summarizes evaluation results comparing 

965 two or more models on a given dataset. 

966 """ 

967 

968 id: UUID 

969 """The unique identifier for the comparative experiment.""" 

970 name: Optional[str] = None 

971 """The optional name of the comparative experiment.""" 

972 description: Optional[str] = None 

973 """An optional description of the comparative experiment.""" 

974 tenant_id: UUID 

975 """The identifier of the tenant associated with this experiment.""" 

976 created_at: datetime 

977 """The timestamp when the comparative experiment was created.""" 

978 modified_at: datetime 

979 """The timestamp when the comparative experiment was last modified.""" 

980 reference_dataset_id: UUID 

981 """The identifier of the reference dataset used in this experiment.""" 

982 extra: Optional[dict[str, Any]] = None 

983 """Optional additional information about the experiment.""" 

984 experiments_info: Optional[list[dict]] = None 

985 """Optional list of dictionaries containing information about individual experiments.""" 

986 feedback_stats: Optional[dict[str, Any]] = None 

987 """Optional dictionary containing feedback statistics for the experiment.""" 

988 

989 @property 

990 def metadata(self) -> dict[str, Any]: 

991 """Retrieve the metadata (if any).""" 

992 if self.extra is None or "metadata" not in self.extra: 

993 return {} 

994 return self.extra["metadata"] 

995 

996 

997class PromptCommit(BaseModel): 

998 """Represents a Prompt with a manifest.""" 

999 

1000 owner: str 

1001 """The handle of the owner of the prompt.""" 

1002 repo: str 

1003 """The name of the prompt.""" 

1004 commit_hash: str 

1005 """The commit hash of the prompt.""" 

1006 manifest: dict[str, Any] 

1007 """The manifest of the prompt.""" 

1008 examples: list[dict] 

1009 """The list of examples.""" 

1010 

1011 

1012class ListedPromptCommit(BaseModel): 

1013 """Represents a listed prompt commit with associated metadata.""" 

1014 

1015 id: UUID 

1016 """The unique identifier for the prompt commit.""" 

1017 

1018 owner: str 

1019 """The owner of the prompt commit.""" 

1020 

1021 repo: str 

1022 """The repository name of the prompt commit.""" 

1023 

1024 manifest_id: Optional[UUID] = None 

1025 """The optional identifier for the manifest associated with this commit.""" 

1026 

1027 repo_id: Optional[UUID] = None 

1028 """The optional identifier for the repository.""" 

1029 

1030 parent_id: Optional[UUID] = None 

1031 """The optional identifier for the parent commit.""" 

1032 

1033 commit_hash: Optional[str] = None 

1034 """The optional hash of the commit.""" 

1035 

1036 created_at: Optional[datetime] = None 

1037 """The optional timestamp when the commit was created.""" 

1038 

1039 updated_at: Optional[datetime] = None 

1040 """The optional timestamp when the commit was last updated.""" 

1041 

1042 example_run_ids: Optional[list[UUID]] = Field(default_factory=list) 

1043 """A list of example run identifiers associated with this commit.""" 

1044 

1045 num_downloads: Optional[int] = 0 

1046 """The number of times this commit has been downloaded.""" 

1047 

1048 num_views: Optional[int] = 0 

1049 """The number of times this commit has been viewed.""" 

1050 

1051 parent_commit_hash: Optional[str] = None 

1052 """The optional hash of the parent commit.""" 

1053 

1054 

1055class Prompt(BaseModel): 

1056 """Represents a Prompt with metadata.""" 

1057 

1058 repo_handle: str 

1059 """The name of the prompt.""" 

1060 description: Optional[str] = None 

1061 """The description of the prompt.""" 

1062 readme: Optional[str] = None 

1063 """The README of the prompt.""" 

1064 id: str 

1065 """The ID of the prompt.""" 

1066 tenant_id: str 

1067 """The tenant ID of the prompt owner.""" 

1068 created_at: datetime 

1069 """The creation time of the prompt.""" 

1070 updated_at: datetime 

1071 """The last update time of the prompt.""" 

1072 is_public: bool 

1073 """Whether the prompt is public.""" 

1074 is_archived: bool 

1075 """Whether the prompt is archived.""" 

1076 tags: list[str] 

1077 """The tags associated with the prompt.""" 

1078 original_repo_id: Optional[str] = None 

1079 """The ID of the original prompt, if forked.""" 

1080 upstream_repo_id: Optional[str] = None 

1081 """The ID of the upstream prompt, if forked.""" 

1082 owner: Optional[str] 

1083 """The handle of the owner of the prompt.""" 

1084 full_name: str 

1085 """The full name of the prompt. (owner + repo_handle)""" 

1086 num_likes: int 

1087 """The number of likes.""" 

1088 num_downloads: int 

1089 """The number of downloads.""" 

1090 num_views: int 

1091 """The number of views.""" 

1092 liked_by_auth_user: Optional[bool] = None 

1093 """Whether the prompt is liked by the authenticated user.""" 

1094 last_commit_hash: Optional[str] = None 

1095 """The hash of the last commit.""" 

1096 num_commits: int 

1097 """The number of commits.""" 

1098 original_repo_full_name: Optional[str] = None 

1099 """The full name of the original prompt, if forked.""" 

1100 upstream_repo_full_name: Optional[str] = None 

1101 """The full name of the upstream prompt, if forked.""" 

1102 

1103 

1104class ListPromptsResponse(BaseModel): 

1105 """A list of prompts with metadata.""" 

1106 

1107 repos: list[Prompt] 

1108 """The list of prompts.""" 

1109 total: int 

1110 """The total number of prompts.""" 

1111 

1112 

1113class PromptSortField(str, Enum): 

1114 """Enum for sorting fields for prompts.""" 

1115 

1116 num_downloads = "num_downloads" 

1117 """Number of downloads.""" 

1118 num_views = "num_views" 

1119 """Number of views.""" 

1120 updated_at = "updated_at" 

1121 """Last updated time.""" 

1122 num_likes = "num_likes" 

1123 """Number of likes.""" 

1124 

1125 

1126class InputTokenDetails(TypedDict, total=False): 

1127 """Breakdown of input token counts. 

1128 

1129 Does *not* need to sum to full input token count. Does *not* need to have all keys. 

1130 """ 

1131 

1132 audio: int 

1133 """Audio input tokens.""" 

1134 cache_creation: int 

1135 """Input tokens that were cached and there was a cache miss. 

1136 

1137 Since there was a cache miss, the cache was created from these tokens. 

1138 """ 

1139 cache_read: int 

1140 """Input tokens that were cached and there was a cache hit. 

1141 

1142 Since there was a cache hit, the tokens were read from the cache. More precisely, 

1143 the model state given these tokens was read from the cache. 

1144 """ 

1145 

1146 

1147class OutputTokenDetails(TypedDict, total=False): 

1148 """Breakdown of output token counts. 

1149 

1150 Does *not* need to sum to full output token count. Does *not* need to have all keys. 

1151 """ 

1152 

1153 audio: int 

1154 """Audio output tokens.""" 

1155 reasoning: int 

1156 """Reasoning output tokens. 

1157 

1158 Tokens generated by the model in a chain of thought process (i.e. by OpenAI's o1 

1159 models) that are not returned as part of model output. 

1160 """ 

1161 

1162 

1163class InputCostDetails(TypedDict, total=False): 

1164 """Breakdown of input token costs. 

1165 

1166 Does *not* need to sum to full input cost. Does *not* need to have all keys. 

1167 """ 

1168 

1169 audio: float 

1170 """Cost of audio input tokens.""" 

1171 cache_creation: float 

1172 """Cost of input tokens that were cached and there was a cache miss. 

1173 

1174 Since there was a cache miss, the cache was created from these tokens. 

1175 """ 

1176 cache_read: float 

1177 """Cost of input tokens that were cached and there was a cache hit. 

1178 

1179 Since there was a cache hit, the tokens were read from the cache. More precisely, 

1180 the model state given these tokens was read from the cache. 

1181 """ 

1182 

1183 

1184class OutputCostDetails(TypedDict, total=False): 

1185 """Breakdown of output token costs. 

1186 

1187 Does *not* need to sum to full output cost. Does *not* need to have all keys. 

1188 """ 

1189 

1190 audio: float 

1191 """Cost of audio output tokens.""" 

1192 reasoning: float 

1193 """Cost of reasoning output tokens. 

1194 

1195 Tokens generated by the model in a chain of thought process (i.e. by OpenAI's o1 

1196 models) that are not returned as part of model output. 

1197 """ 

1198 

1199 

1200class UsageMetadata(TypedDict): 

1201 """Usage metadata for a message, such as token counts. 

1202 

1203 This is a standard representation of token usage that is consistent across models. 

1204 """ 

1205 

1206 input_tokens: int 

1207 """Count of input (or prompt) tokens. Sum of all input token types.""" 

1208 output_tokens: int 

1209 """Count of output (or completion) tokens. Sum of all output token types.""" 

1210 total_tokens: int 

1211 """Total token count. Sum of input_tokens + output_tokens.""" 

1212 input_token_details: NotRequired[InputTokenDetails] 

1213 """Breakdown of input token counts. 

1214 

1215 Does *not* need to sum to full input token count. Does *not* need to have all keys. 

1216 """ 

1217 output_token_details: NotRequired[OutputTokenDetails] 

1218 """Breakdown of output token counts. 

1219 

1220 Does *not* need to sum to full output token count. Does *not* need to have all keys. 

1221 """ 

1222 input_cost: NotRequired[float] 

1223 """The cost of the input tokens.""" 

1224 output_cost: NotRequired[float] 

1225 """The cost of the output tokens.""" 

1226 total_cost: NotRequired[float] 

1227 """The total cost of the tokens.""" 

1228 input_cost_details: NotRequired[InputCostDetails] 

1229 """The cost details of the input tokens.""" 

1230 output_cost_details: NotRequired[OutputCostDetails] 

1231 """The cost details of the output tokens.""" 

1232 

1233 

1234class ExtractedUsageMetadata(TypedDict, total=False): 

1235 """Usage metadata dictionary extracted from a run. 

1236 

1237 Should be the same as UsageMetadata, but does not require all 

1238 keys to be present. 

1239 """ 

1240 

1241 input_tokens: int 

1242 """The number of tokens used for the prompt.""" 

1243 output_tokens: int 

1244 """The number of tokens generated as output.""" 

1245 total_tokens: int 

1246 """The total number of tokens used.""" 

1247 input_token_details: InputTokenDetails 

1248 """The details of the input tokens.""" 

1249 output_token_details: OutputTokenDetails 

1250 """The details of the output tokens.""" 

1251 input_cost: float 

1252 """The cost of the input tokens.""" 

1253 output_cost: float 

1254 """The cost of the output tokens.""" 

1255 total_cost: float 

1256 """The total cost of the tokens.""" 

1257 input_cost_details: InputCostDetails 

1258 """The cost details of the input tokens.""" 

1259 output_cost_details: OutputCostDetails 

1260 """The cost details of the output tokens.""" 

1261 

1262 

1263class UpsertExamplesResponse(TypedDict): 

1264 """Response object returned from the upsert_examples_multipart method.""" 

1265 

1266 count: int 

1267 """The number of examples that were upserted.""" 

1268 example_ids: list[str] 

1269 """The ids of the examples that were upserted.""" 

1270 

1271 

1272class ExampleWithRuns(Example): 

1273 """Example with runs.""" 

1274 

1275 runs: list[Run] = Field(default_factory=list) 

1276 

1277 """The runs of the example.""" 

1278 

1279 

1280class ExperimentRunStats(TypedDict): 

1281 """Run statistics for an experiment.""" 

1282 

1283 run_count: Optional[int] 

1284 """The number of runs in the project.""" 

1285 latency_p50: Optional[timedelta] 

1286 """The median (50th percentile) latency for the project.""" 

1287 latency_p99: Optional[timedelta] 

1288 """The 99th percentile latency for the project.""" 

1289 total_tokens: Optional[int] 

1290 """The total number of tokens consumed in the project.""" 

1291 prompt_tokens: Optional[int] 

1292 """The total number of prompt tokens consumed in the project.""" 

1293 completion_tokens: Optional[int] 

1294 """The total number of completion tokens consumed in the project.""" 

1295 last_run_start_time: Optional[datetime] 

1296 """The start time of the last run in the project.""" 

1297 run_facets: Optional[list[dict[str, Any]]] 

1298 """Facets for the runs in the project.""" 

1299 total_cost: Optional[Decimal] 

1300 """The total estimated LLM cost associated with the completion tokens.""" 

1301 prompt_cost: Optional[Decimal] 

1302 """The estimated cost associated with the prompt (input) tokens.""" 

1303 completion_cost: Optional[Decimal] 

1304 """The estimated cost associated with the completion tokens.""" 

1305 first_token_p50: Optional[timedelta] 

1306 """The median (50th percentile) time to process the first token.""" 

1307 first_token_p99: Optional[timedelta] 

1308 """The 99th percentile time to process the first token.""" 

1309 error_rate: Optional[float] 

1310 """The error rate for the project.""" 

1311 

1312 

1313class ExperimentResults(TypedDict): 

1314 """Results container for experiment data with stats and examples. 

1315 

1316 Breaking change in v0.4.32: 

1317 The 'stats' field has been split into 'feedback_stats' and 'run_stats'. 

1318 """ 

1319 

1320 feedback_stats: dict 

1321 """Feedback statistics for the experiment.""" 

1322 run_stats: ExperimentRunStats 

1323 """Run statistics (latency, token count, etc.).""" 

1324 examples_with_runs: Iterator[ExampleWithRuns] 

1325 

1326 

1327class InsightsReport(BaseModel): 

1328 """An Insights Report created by the Insights Agent over a tracing project.""" 

1329 

1330 id: UUID | str 

1331 name: str 

1332 status: str 

1333 error: str | None = None 

1334 project_id: UUID | str 

1335 host_url: str 

1336 tenant_id: UUID | str 

1337 

1338 @property 

1339 def link(self) -> str: 

1340 """URL to view this Insights Report in LangSmith UI.""" 

1341 return f"{self.host_url}/o/{str(self.tenant_id)}/projects/p/{str(self.project_id)}?tab=4&clusterJobId={str(self.id)}" 

1342 

1343 def _repr_html_(self) -> str: 

1344 return f'<a href="{self.link}", target="_blank" rel="noopener">InsightsReport(\'{self.name}\')</a>' 

1345 

1346 

1347class FeedbackFormulaWeightedVariable(BaseModel): 

1348 """A feedback key and weight used when calculating feedback formulas.""" 

1349 

1350 part_type: Literal["weighted_key"] 

1351 weight: float 

1352 key: Annotated[str, Field(min_length=1)] 

1353 

1354 

1355class FeedbackFormulaCreate(BaseModel): 

1356 """Schema used for creating a feedback formula.""" 

1357 

1358 dataset_id: Optional[UUID] = None 

1359 session_id: Optional[UUID] = None 

1360 feedback_key: str 

1361 aggregation_type: Literal["sum", "avg"] 

1362 formula_parts: list[FeedbackFormulaWeightedVariable] = Field( 

1363 ..., min_items=1, max_items=50 

1364 ) 

1365 

1366 

1367class FeedbackFormulaUpdate(BaseModel): 

1368 """Schema used for updating a feedback formula.""" 

1369 

1370 feedback_key: str 

1371 aggregation_type: Literal["sum", "avg"] 

1372 formula_parts: list[FeedbackFormulaWeightedVariable] = Field( 

1373 ..., min_items=1, max_items=50 

1374 ) 

1375 

1376 

1377class FeedbackFormula(FeedbackFormulaCreate): 

1378 """Schema for getting feedback formulas.""" 

1379 

1380 id: UUID 

1381 created_at: datetime 

1382 modified_at: datetime