Coverage for langsmith/_internal/_serde.py: 23%

83 statements  

« prev     ^ index     » next       coverage.py v7.10.1, created at 2025-12-11 16:15 -0800

1from __future__ import annotations 

2 

3import base64 

4import collections 

5import datetime 

6import decimal 

7import ipaddress 

8import json 

9import logging 

10import pathlib 

11import re 

12import uuid 

13from typing import Any 

14 

15from langsmith._internal import _orjson 

16 

17try: 

18 from zoneinfo import ZoneInfo # type: ignore[import-not-found] 

19except ImportError: 

20 

21 class ZoneInfo: # type: ignore[no-redef] 

22 """Introduced in python 3.9.""" 

23 

24 

25logger = logging.getLogger(__name__) 

26 

27 

28def _simple_default(obj): 

29 try: 

30 # Only need to handle types that orjson doesn't serialize by default 

31 # https://github.com/ijl/orjson#serialize 

32 if isinstance(obj, datetime.datetime): 

33 return obj.isoformat() 

34 elif isinstance(obj, uuid.UUID): 

35 return str(obj) 

36 elif isinstance(obj, BaseException): 

37 return {"error": type(obj).__name__, "message": str(obj)} 

38 elif isinstance(obj, (set, frozenset, collections.deque)): 

39 return list(obj) 

40 elif isinstance(obj, (datetime.timezone, ZoneInfo)): 

41 return obj.tzname(None) 

42 elif isinstance(obj, datetime.timedelta): 

43 return obj.total_seconds() 

44 elif isinstance(obj, decimal.Decimal): 

45 if obj.as_tuple().exponent >= 0: 

46 return int(obj) 

47 else: 

48 return float(obj) 

49 elif isinstance( 

50 obj, 

51 ( 

52 ipaddress.IPv4Address, 

53 ipaddress.IPv4Interface, 

54 ipaddress.IPv4Network, 

55 ipaddress.IPv6Address, 

56 ipaddress.IPv6Interface, 

57 ipaddress.IPv6Network, 

58 pathlib.Path, 

59 ), 

60 ): 

61 return str(obj) 

62 elif isinstance(obj, re.Pattern): 

63 return obj.pattern 

64 elif isinstance(obj, (bytes, bytearray)): 

65 return base64.b64encode(obj).decode() 

66 return str(obj) 

67 except BaseException as e: 

68 logger.debug(f"Failed to serialize {type(obj)} to JSON: {e}") 

69 return str(obj) 

70 

71 

72_serialization_methods = [ 

73 ( 

74 "model_dump", 

75 {"exclude_none": True, "mode": "json"}, 

76 ), # Pydantic V2 with non-serializable fields 

77 ("dict", {}), # Pydantic V1 with non-serializable field 

78 ("to_dict", {}), # dataclasses-json 

79] 

80 

81 

82# IMPORTANT: This function is used from Rust code in `langsmith-pyo3` serialization, 

83# in order to handle serializing these tricky Python types *from Rust*. 

84# Do not cause this function to become inaccessible (e.g. by deleting 

85# or renaming it) without also fixing the corresponding Rust code found in: 

86# rust/crates/langsmith-pyo3/src/serialization/mod.rs 

87def _serialize_json(obj: Any) -> Any: 

88 try: 

89 if isinstance(obj, (set, tuple)): 

90 if hasattr(obj, "_asdict") and callable(obj._asdict): 

91 # NamedTuple 

92 return obj._asdict() 

93 return list(obj) 

94 

95 for attr, kwargs in _serialization_methods: 

96 if ( 

97 hasattr(obj, attr) 

98 and callable(getattr(obj, attr)) 

99 and not isinstance(obj, type) 

100 ): 

101 try: 

102 method = getattr(obj, attr) 

103 response = method(**kwargs) 

104 if not isinstance(response, dict): 

105 return str(response) 

106 return response 

107 except Exception as e: 

108 logger.debug( 

109 f"Failed to use {attr} to serialize {type(obj)} to" 

110 f" JSON: {repr(e)}" 

111 ) 

112 pass 

113 return _simple_default(obj) 

114 except BaseException as e: 

115 logger.debug(f"Failed to serialize {type(obj)} to JSON: {e}") 

116 return str(obj) 

117 

118 

119def _elide_surrogates(s: bytes) -> bytes: 

120 pattern = re.compile(rb"\\ud[89a-f][0-9a-f]{2}", re.IGNORECASE) 

121 result = pattern.sub(b"", s) 

122 return result 

123 

124 

125def dumps_json(obj: Any) -> bytes: 

126 """Serialize an object to a JSON formatted string. 

127 

128 Parameters 

129 ---------- 

130 obj : Any 

131 The object to serialize. 

132 default : Callable[[Any], Any] or None, default=None 

133 The default function to use for serialization. 

134 

135 Returns: 

136 ------- 

137 str 

138 The JSON formatted string. 

139 """ 

140 try: 

141 return _orjson.dumps( 

142 obj, 

143 default=_serialize_json, 

144 option=_orjson.OPT_SERIALIZE_NUMPY 

145 | _orjson.OPT_SERIALIZE_DATACLASS 

146 | _orjson.OPT_SERIALIZE_UUID 

147 | _orjson.OPT_NON_STR_KEYS, 

148 ) 

149 except TypeError as e: 

150 # Usually caused by UTF surrogate characters 

151 logger.debug(f"Orjson serialization failed: {repr(e)}. Falling back to json.") 

152 result = json.dumps( 

153 obj, 

154 default=_serialize_json, 

155 ensure_ascii=True, 

156 ).encode("utf-8") 

157 try: 

158 result = _orjson.dumps( 

159 _orjson.loads(result.decode("utf-8", errors="surrogateescape")) 

160 ) 

161 except _orjson.JSONDecodeError: 

162 result = _elide_surrogates(result) 

163 return result