Coverage for langsmith/_internal/_serde.py: 23%
83 statements
« prev ^ index » next coverage.py v7.10.1, created at 2025-12-11 16:15 -0800
« prev ^ index » next coverage.py v7.10.1, created at 2025-12-11 16:15 -0800
1from __future__ import annotations
3import base64
4import collections
5import datetime
6import decimal
7import ipaddress
8import json
9import logging
10import pathlib
11import re
12import uuid
13from typing import Any
15from langsmith._internal import _orjson
17try:
18 from zoneinfo import ZoneInfo # type: ignore[import-not-found]
19except ImportError:
21 class ZoneInfo: # type: ignore[no-redef]
22 """Introduced in python 3.9."""
25logger = logging.getLogger(__name__)
28def _simple_default(obj):
29 try:
30 # Only need to handle types that orjson doesn't serialize by default
31 # https://github.com/ijl/orjson#serialize
32 if isinstance(obj, datetime.datetime):
33 return obj.isoformat()
34 elif isinstance(obj, uuid.UUID):
35 return str(obj)
36 elif isinstance(obj, BaseException):
37 return {"error": type(obj).__name__, "message": str(obj)}
38 elif isinstance(obj, (set, frozenset, collections.deque)):
39 return list(obj)
40 elif isinstance(obj, (datetime.timezone, ZoneInfo)):
41 return obj.tzname(None)
42 elif isinstance(obj, datetime.timedelta):
43 return obj.total_seconds()
44 elif isinstance(obj, decimal.Decimal):
45 if obj.as_tuple().exponent >= 0:
46 return int(obj)
47 else:
48 return float(obj)
49 elif isinstance(
50 obj,
51 (
52 ipaddress.IPv4Address,
53 ipaddress.IPv4Interface,
54 ipaddress.IPv4Network,
55 ipaddress.IPv6Address,
56 ipaddress.IPv6Interface,
57 ipaddress.IPv6Network,
58 pathlib.Path,
59 ),
60 ):
61 return str(obj)
62 elif isinstance(obj, re.Pattern):
63 return obj.pattern
64 elif isinstance(obj, (bytes, bytearray)):
65 return base64.b64encode(obj).decode()
66 return str(obj)
67 except BaseException as e:
68 logger.debug(f"Failed to serialize {type(obj)} to JSON: {e}")
69 return str(obj)
72_serialization_methods = [
73 (
74 "model_dump",
75 {"exclude_none": True, "mode": "json"},
76 ), # Pydantic V2 with non-serializable fields
77 ("dict", {}), # Pydantic V1 with non-serializable field
78 ("to_dict", {}), # dataclasses-json
79]
82# IMPORTANT: This function is used from Rust code in `langsmith-pyo3` serialization,
83# in order to handle serializing these tricky Python types *from Rust*.
84# Do not cause this function to become inaccessible (e.g. by deleting
85# or renaming it) without also fixing the corresponding Rust code found in:
86# rust/crates/langsmith-pyo3/src/serialization/mod.rs
87def _serialize_json(obj: Any) -> Any:
88 try:
89 if isinstance(obj, (set, tuple)):
90 if hasattr(obj, "_asdict") and callable(obj._asdict):
91 # NamedTuple
92 return obj._asdict()
93 return list(obj)
95 for attr, kwargs in _serialization_methods:
96 if (
97 hasattr(obj, attr)
98 and callable(getattr(obj, attr))
99 and not isinstance(obj, type)
100 ):
101 try:
102 method = getattr(obj, attr)
103 response = method(**kwargs)
104 if not isinstance(response, dict):
105 return str(response)
106 return response
107 except Exception as e:
108 logger.debug(
109 f"Failed to use {attr} to serialize {type(obj)} to"
110 f" JSON: {repr(e)}"
111 )
112 pass
113 return _simple_default(obj)
114 except BaseException as e:
115 logger.debug(f"Failed to serialize {type(obj)} to JSON: {e}")
116 return str(obj)
119def _elide_surrogates(s: bytes) -> bytes:
120 pattern = re.compile(rb"\\ud[89a-f][0-9a-f]{2}", re.IGNORECASE)
121 result = pattern.sub(b"", s)
122 return result
125def dumps_json(obj: Any) -> bytes:
126 """Serialize an object to a JSON formatted string.
128 Parameters
129 ----------
130 obj : Any
131 The object to serialize.
132 default : Callable[[Any], Any] or None, default=None
133 The default function to use for serialization.
135 Returns:
136 -------
137 str
138 The JSON formatted string.
139 """
140 try:
141 return _orjson.dumps(
142 obj,
143 default=_serialize_json,
144 option=_orjson.OPT_SERIALIZE_NUMPY
145 | _orjson.OPT_SERIALIZE_DATACLASS
146 | _orjson.OPT_SERIALIZE_UUID
147 | _orjson.OPT_NON_STR_KEYS,
148 )
149 except TypeError as e:
150 # Usually caused by UTF surrogate characters
151 logger.debug(f"Orjson serialization failed: {repr(e)}. Falling back to json.")
152 result = json.dumps(
153 obj,
154 default=_serialize_json,
155 ensure_ascii=True,
156 ).encode("utf-8")
157 try:
158 result = _orjson.dumps(
159 _orjson.loads(result.decode("utf-8", errors="surrogateescape"))
160 )
161 except _orjson.JSONDecodeError:
162 result = _elide_surrogates(result)
163 return result