sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], reverse_args: bool = False 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 def _parse_binary_range( 47 self: Parser, this: t.Optional[exp.Expression] 48 ) -> t.Optional[exp.Expression]: 49 expression = self._parse_bitwise() 50 if reverse_args: 51 this, expression = expression, this 52 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 53 54 return _parse_binary_range 55 56 57def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 58 # Default argument order is base, expression 59 this = seq_get(args, 0) 60 expression = seq_get(args, 1) 61 62 if expression: 63 if not dialect.LOG_BASE_FIRST: 64 this, expression = expression, this 65 return exp.Log(this=this, expression=expression) 66 67 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 68 69 70def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 73 74 75def build_lower(args: t.List) -> exp.Lower | exp.Hex: 76 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 79 80 81def build_upper(args: t.List) -> exp.Upper | exp.Hex: 82 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 85 86 87def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 88 def _builder(args: t.List, dialect: Dialect) -> E: 89 expression = expr_type( 90 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 91 ) 92 if len(args) > 2 and expr_type is exp.JSONExtract: 93 expression.set("expressions", args[2:]) 94 95 return expression 96 97 return _builder 98 99 100def build_mod(args: t.List) -> exp.Mod: 101 this = seq_get(args, 0) 102 expression = seq_get(args, 1) 103 104 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 105 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 106 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 107 108 return exp.Mod(this=this, expression=expression) 109 110 111def build_pad(args: t.List, is_left: bool = True): 112 return exp.Pad( 113 this=seq_get(args, 0), 114 expression=seq_get(args, 1), 115 fill_pattern=seq_get(args, 2), 116 is_left=is_left, 117 ) 118 119 120class _Parser(type): 121 def __new__(cls, clsname, bases, attrs): 122 klass = super().__new__(cls, clsname, bases, attrs) 123 124 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 125 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 126 127 return klass 128 129 130class Parser(metaclass=_Parser): 131 """ 132 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 133 134 Args: 135 error_level: The desired error level. 136 Default: ErrorLevel.IMMEDIATE 137 error_message_context: The amount of context to capture from a query string when displaying 138 the error message (in number of characters). 139 Default: 100 140 max_errors: Maximum number of error messages to include in a raised ParseError. 141 This is only relevant if error_level is ErrorLevel.RAISE. 142 Default: 3 143 """ 144 145 FUNCTIONS: t.Dict[str, t.Callable] = { 146 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 147 "CONCAT": lambda args, dialect: exp.Concat( 148 expressions=args, 149 safe=not dialect.STRICT_STRING_CONCAT, 150 coalesce=dialect.CONCAT_COALESCE, 151 ), 152 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 153 expressions=args, 154 safe=not dialect.STRICT_STRING_CONCAT, 155 coalesce=dialect.CONCAT_COALESCE, 156 ), 157 "DATE_TO_DATE_STR": lambda args: exp.Cast( 158 this=seq_get(args, 0), 159 to=exp.DataType(this=exp.DataType.Type.TEXT), 160 ), 161 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 162 "HEX": build_hex, 163 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 164 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 165 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 166 "LIKE": build_like, 167 "LOG": build_logarithm, 168 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 169 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 170 "LOWER": build_lower, 171 "LPAD": lambda args: build_pad(args), 172 "LEFTPAD": lambda args: build_pad(args), 173 "MOD": build_mod, 174 "RPAD": lambda args: build_pad(args, is_left=False), 175 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 176 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 177 if len(args) != 2 178 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 179 "TIME_TO_TIME_STR": lambda args: exp.Cast( 180 this=seq_get(args, 0), 181 to=exp.DataType(this=exp.DataType.Type.TEXT), 182 ), 183 "TO_HEX": build_hex, 184 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 185 this=exp.Cast( 186 this=seq_get(args, 0), 187 to=exp.DataType(this=exp.DataType.Type.TEXT), 188 ), 189 start=exp.Literal.number(1), 190 length=exp.Literal.number(10), 191 ), 192 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 193 "UPPER": build_upper, 194 "VAR_MAP": build_var_map, 195 } 196 197 NO_PAREN_FUNCTIONS = { 198 TokenType.CURRENT_DATE: exp.CurrentDate, 199 TokenType.CURRENT_DATETIME: exp.CurrentDate, 200 TokenType.CURRENT_TIME: exp.CurrentTime, 201 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 202 TokenType.CURRENT_USER: exp.CurrentUser, 203 } 204 205 STRUCT_TYPE_TOKENS = { 206 TokenType.NESTED, 207 TokenType.OBJECT, 208 TokenType.STRUCT, 209 } 210 211 NESTED_TYPE_TOKENS = { 212 TokenType.ARRAY, 213 TokenType.LIST, 214 TokenType.LOWCARDINALITY, 215 TokenType.MAP, 216 TokenType.NULLABLE, 217 *STRUCT_TYPE_TOKENS, 218 } 219 220 ENUM_TYPE_TOKENS = { 221 TokenType.ENUM, 222 TokenType.ENUM8, 223 TokenType.ENUM16, 224 } 225 226 AGGREGATE_TYPE_TOKENS = { 227 TokenType.AGGREGATEFUNCTION, 228 TokenType.SIMPLEAGGREGATEFUNCTION, 229 } 230 231 TYPE_TOKENS = { 232 TokenType.BIT, 233 TokenType.BOOLEAN, 234 TokenType.TINYINT, 235 TokenType.UTINYINT, 236 TokenType.SMALLINT, 237 TokenType.USMALLINT, 238 TokenType.INT, 239 TokenType.UINT, 240 TokenType.BIGINT, 241 TokenType.UBIGINT, 242 TokenType.INT128, 243 TokenType.UINT128, 244 TokenType.INT256, 245 TokenType.UINT256, 246 TokenType.MEDIUMINT, 247 TokenType.UMEDIUMINT, 248 TokenType.FIXEDSTRING, 249 TokenType.FLOAT, 250 TokenType.DOUBLE, 251 TokenType.CHAR, 252 TokenType.NCHAR, 253 TokenType.VARCHAR, 254 TokenType.NVARCHAR, 255 TokenType.BPCHAR, 256 TokenType.TEXT, 257 TokenType.MEDIUMTEXT, 258 TokenType.LONGTEXT, 259 TokenType.MEDIUMBLOB, 260 TokenType.LONGBLOB, 261 TokenType.BINARY, 262 TokenType.VARBINARY, 263 TokenType.JSON, 264 TokenType.JSONB, 265 TokenType.INTERVAL, 266 TokenType.TINYBLOB, 267 TokenType.TINYTEXT, 268 TokenType.TIME, 269 TokenType.TIMETZ, 270 TokenType.TIMESTAMP, 271 TokenType.TIMESTAMP_S, 272 TokenType.TIMESTAMP_MS, 273 TokenType.TIMESTAMP_NS, 274 TokenType.TIMESTAMPTZ, 275 TokenType.TIMESTAMPLTZ, 276 TokenType.TIMESTAMPNTZ, 277 TokenType.DATETIME, 278 TokenType.DATETIME64, 279 TokenType.DATE, 280 TokenType.DATE32, 281 TokenType.INT4RANGE, 282 TokenType.INT4MULTIRANGE, 283 TokenType.INT8RANGE, 284 TokenType.INT8MULTIRANGE, 285 TokenType.NUMRANGE, 286 TokenType.NUMMULTIRANGE, 287 TokenType.TSRANGE, 288 TokenType.TSMULTIRANGE, 289 TokenType.TSTZRANGE, 290 TokenType.TSTZMULTIRANGE, 291 TokenType.DATERANGE, 292 TokenType.DATEMULTIRANGE, 293 TokenType.DECIMAL, 294 TokenType.UDECIMAL, 295 TokenType.BIGDECIMAL, 296 TokenType.UUID, 297 TokenType.GEOGRAPHY, 298 TokenType.GEOMETRY, 299 TokenType.HLLSKETCH, 300 TokenType.HSTORE, 301 TokenType.PSEUDO_TYPE, 302 TokenType.SUPER, 303 TokenType.SERIAL, 304 TokenType.SMALLSERIAL, 305 TokenType.BIGSERIAL, 306 TokenType.XML, 307 TokenType.YEAR, 308 TokenType.UNIQUEIDENTIFIER, 309 TokenType.USERDEFINED, 310 TokenType.MONEY, 311 TokenType.SMALLMONEY, 312 TokenType.ROWVERSION, 313 TokenType.IMAGE, 314 TokenType.VARIANT, 315 TokenType.VECTOR, 316 TokenType.OBJECT, 317 TokenType.OBJECT_IDENTIFIER, 318 TokenType.INET, 319 TokenType.IPADDRESS, 320 TokenType.IPPREFIX, 321 TokenType.IPV4, 322 TokenType.IPV6, 323 TokenType.UNKNOWN, 324 TokenType.NULL, 325 TokenType.NAME, 326 TokenType.TDIGEST, 327 *ENUM_TYPE_TOKENS, 328 *NESTED_TYPE_TOKENS, 329 *AGGREGATE_TYPE_TOKENS, 330 } 331 332 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 333 TokenType.BIGINT: TokenType.UBIGINT, 334 TokenType.INT: TokenType.UINT, 335 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 336 TokenType.SMALLINT: TokenType.USMALLINT, 337 TokenType.TINYINT: TokenType.UTINYINT, 338 TokenType.DECIMAL: TokenType.UDECIMAL, 339 } 340 341 SUBQUERY_PREDICATES = { 342 TokenType.ANY: exp.Any, 343 TokenType.ALL: exp.All, 344 TokenType.EXISTS: exp.Exists, 345 TokenType.SOME: exp.Any, 346 } 347 348 RESERVED_TOKENS = { 349 *Tokenizer.SINGLE_TOKENS.values(), 350 TokenType.SELECT, 351 } - {TokenType.IDENTIFIER} 352 353 DB_CREATABLES = { 354 TokenType.DATABASE, 355 TokenType.DICTIONARY, 356 TokenType.MODEL, 357 TokenType.SCHEMA, 358 TokenType.SEQUENCE, 359 TokenType.STORAGE_INTEGRATION, 360 TokenType.TABLE, 361 TokenType.TAG, 362 TokenType.VIEW, 363 TokenType.WAREHOUSE, 364 TokenType.STREAMLIT, 365 } 366 367 CREATABLES = { 368 TokenType.COLUMN, 369 TokenType.CONSTRAINT, 370 TokenType.FOREIGN_KEY, 371 TokenType.FUNCTION, 372 TokenType.INDEX, 373 TokenType.PROCEDURE, 374 *DB_CREATABLES, 375 } 376 377 # Tokens that can represent identifiers 378 ID_VAR_TOKENS = { 379 TokenType.VAR, 380 TokenType.ANTI, 381 TokenType.APPLY, 382 TokenType.ASC, 383 TokenType.ASOF, 384 TokenType.AUTO_INCREMENT, 385 TokenType.BEGIN, 386 TokenType.BPCHAR, 387 TokenType.CACHE, 388 TokenType.CASE, 389 TokenType.COLLATE, 390 TokenType.COMMAND, 391 TokenType.COMMENT, 392 TokenType.COMMIT, 393 TokenType.CONSTRAINT, 394 TokenType.COPY, 395 TokenType.DEFAULT, 396 TokenType.DELETE, 397 TokenType.DESC, 398 TokenType.DESCRIBE, 399 TokenType.DICTIONARY, 400 TokenType.DIV, 401 TokenType.END, 402 TokenType.EXECUTE, 403 TokenType.ESCAPE, 404 TokenType.FALSE, 405 TokenType.FIRST, 406 TokenType.FILTER, 407 TokenType.FINAL, 408 TokenType.FORMAT, 409 TokenType.FULL, 410 TokenType.IDENTIFIER, 411 TokenType.IS, 412 TokenType.ISNULL, 413 TokenType.INTERVAL, 414 TokenType.KEEP, 415 TokenType.KILL, 416 TokenType.LEFT, 417 TokenType.LOAD, 418 TokenType.MERGE, 419 TokenType.NATURAL, 420 TokenType.NEXT, 421 TokenType.OFFSET, 422 TokenType.OPERATOR, 423 TokenType.ORDINALITY, 424 TokenType.OVERLAPS, 425 TokenType.OVERWRITE, 426 TokenType.PARTITION, 427 TokenType.PERCENT, 428 TokenType.PIVOT, 429 TokenType.PRAGMA, 430 TokenType.RANGE, 431 TokenType.RECURSIVE, 432 TokenType.REFERENCES, 433 TokenType.REFRESH, 434 TokenType.REPLACE, 435 TokenType.RIGHT, 436 TokenType.ROLLUP, 437 TokenType.ROW, 438 TokenType.ROWS, 439 TokenType.SEMI, 440 TokenType.SET, 441 TokenType.SETTINGS, 442 TokenType.SHOW, 443 TokenType.TEMPORARY, 444 TokenType.TOP, 445 TokenType.TRUE, 446 TokenType.TRUNCATE, 447 TokenType.UNIQUE, 448 TokenType.UNNEST, 449 TokenType.UNPIVOT, 450 TokenType.UPDATE, 451 TokenType.USE, 452 TokenType.VOLATILE, 453 TokenType.WINDOW, 454 *CREATABLES, 455 *SUBQUERY_PREDICATES, 456 *TYPE_TOKENS, 457 *NO_PAREN_FUNCTIONS, 458 } 459 460 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 461 462 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 463 TokenType.ANTI, 464 TokenType.APPLY, 465 TokenType.ASOF, 466 TokenType.FULL, 467 TokenType.LEFT, 468 TokenType.LOCK, 469 TokenType.NATURAL, 470 TokenType.OFFSET, 471 TokenType.RIGHT, 472 TokenType.SEMI, 473 TokenType.WINDOW, 474 } 475 476 ALIAS_TOKENS = ID_VAR_TOKENS 477 478 ARRAY_CONSTRUCTORS = { 479 "ARRAY": exp.Array, 480 "LIST": exp.List, 481 } 482 483 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 484 485 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 486 487 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 488 489 FUNC_TOKENS = { 490 TokenType.COLLATE, 491 TokenType.COMMAND, 492 TokenType.CURRENT_DATE, 493 TokenType.CURRENT_DATETIME, 494 TokenType.CURRENT_TIMESTAMP, 495 TokenType.CURRENT_TIME, 496 TokenType.CURRENT_USER, 497 TokenType.FILTER, 498 TokenType.FIRST, 499 TokenType.FORMAT, 500 TokenType.GLOB, 501 TokenType.IDENTIFIER, 502 TokenType.INDEX, 503 TokenType.ISNULL, 504 TokenType.ILIKE, 505 TokenType.INSERT, 506 TokenType.LIKE, 507 TokenType.MERGE, 508 TokenType.OFFSET, 509 TokenType.PRIMARY_KEY, 510 TokenType.RANGE, 511 TokenType.REPLACE, 512 TokenType.RLIKE, 513 TokenType.ROW, 514 TokenType.UNNEST, 515 TokenType.VAR, 516 TokenType.LEFT, 517 TokenType.RIGHT, 518 TokenType.SEQUENCE, 519 TokenType.DATE, 520 TokenType.DATETIME, 521 TokenType.TABLE, 522 TokenType.TIMESTAMP, 523 TokenType.TIMESTAMPTZ, 524 TokenType.TRUNCATE, 525 TokenType.WINDOW, 526 TokenType.XOR, 527 *TYPE_TOKENS, 528 *SUBQUERY_PREDICATES, 529 } 530 531 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 532 TokenType.AND: exp.And, 533 } 534 535 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 536 TokenType.COLON_EQ: exp.PropertyEQ, 537 } 538 539 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 540 TokenType.OR: exp.Or, 541 } 542 543 EQUALITY = { 544 TokenType.EQ: exp.EQ, 545 TokenType.NEQ: exp.NEQ, 546 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 547 } 548 549 COMPARISON = { 550 TokenType.GT: exp.GT, 551 TokenType.GTE: exp.GTE, 552 TokenType.LT: exp.LT, 553 TokenType.LTE: exp.LTE, 554 } 555 556 BITWISE = { 557 TokenType.AMP: exp.BitwiseAnd, 558 TokenType.CARET: exp.BitwiseXor, 559 TokenType.PIPE: exp.BitwiseOr, 560 } 561 562 TERM = { 563 TokenType.DASH: exp.Sub, 564 TokenType.PLUS: exp.Add, 565 TokenType.MOD: exp.Mod, 566 TokenType.COLLATE: exp.Collate, 567 } 568 569 FACTOR = { 570 TokenType.DIV: exp.IntDiv, 571 TokenType.LR_ARROW: exp.Distance, 572 TokenType.SLASH: exp.Div, 573 TokenType.STAR: exp.Mul, 574 } 575 576 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 577 578 TIMES = { 579 TokenType.TIME, 580 TokenType.TIMETZ, 581 } 582 583 TIMESTAMPS = { 584 TokenType.TIMESTAMP, 585 TokenType.TIMESTAMPTZ, 586 TokenType.TIMESTAMPLTZ, 587 *TIMES, 588 } 589 590 SET_OPERATIONS = { 591 TokenType.UNION, 592 TokenType.INTERSECT, 593 TokenType.EXCEPT, 594 } 595 596 JOIN_METHODS = { 597 TokenType.ASOF, 598 TokenType.NATURAL, 599 TokenType.POSITIONAL, 600 } 601 602 JOIN_SIDES = { 603 TokenType.LEFT, 604 TokenType.RIGHT, 605 TokenType.FULL, 606 } 607 608 JOIN_KINDS = { 609 TokenType.ANTI, 610 TokenType.CROSS, 611 TokenType.INNER, 612 TokenType.OUTER, 613 TokenType.SEMI, 614 TokenType.STRAIGHT_JOIN, 615 } 616 617 JOIN_HINTS: t.Set[str] = set() 618 619 LAMBDAS = { 620 TokenType.ARROW: lambda self, expressions: self.expression( 621 exp.Lambda, 622 this=self._replace_lambda( 623 self._parse_assignment(), 624 expressions, 625 ), 626 expressions=expressions, 627 ), 628 TokenType.FARROW: lambda self, expressions: self.expression( 629 exp.Kwarg, 630 this=exp.var(expressions[0].name), 631 expression=self._parse_assignment(), 632 ), 633 } 634 635 COLUMN_OPERATORS = { 636 TokenType.DOT: None, 637 TokenType.DCOLON: lambda self, this, to: self.expression( 638 exp.Cast if self.STRICT_CAST else exp.TryCast, 639 this=this, 640 to=to, 641 ), 642 TokenType.ARROW: lambda self, this, path: self.expression( 643 exp.JSONExtract, 644 this=this, 645 expression=self.dialect.to_json_path(path), 646 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 647 ), 648 TokenType.DARROW: lambda self, this, path: self.expression( 649 exp.JSONExtractScalar, 650 this=this, 651 expression=self.dialect.to_json_path(path), 652 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 653 ), 654 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 655 exp.JSONBExtract, 656 this=this, 657 expression=path, 658 ), 659 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 660 exp.JSONBExtractScalar, 661 this=this, 662 expression=path, 663 ), 664 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 665 exp.JSONBContains, 666 this=this, 667 expression=key, 668 ), 669 } 670 671 EXPRESSION_PARSERS = { 672 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 673 exp.Column: lambda self: self._parse_column(), 674 exp.Condition: lambda self: self._parse_assignment(), 675 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 676 exp.Expression: lambda self: self._parse_expression(), 677 exp.From: lambda self: self._parse_from(joins=True), 678 exp.Group: lambda self: self._parse_group(), 679 exp.Having: lambda self: self._parse_having(), 680 exp.Identifier: lambda self: self._parse_id_var(), 681 exp.Join: lambda self: self._parse_join(), 682 exp.Lambda: lambda self: self._parse_lambda(), 683 exp.Lateral: lambda self: self._parse_lateral(), 684 exp.Limit: lambda self: self._parse_limit(), 685 exp.Offset: lambda self: self._parse_offset(), 686 exp.Order: lambda self: self._parse_order(), 687 exp.Ordered: lambda self: self._parse_ordered(), 688 exp.Properties: lambda self: self._parse_properties(), 689 exp.Qualify: lambda self: self._parse_qualify(), 690 exp.Returning: lambda self: self._parse_returning(), 691 exp.Select: lambda self: self._parse_select(), 692 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 693 exp.Table: lambda self: self._parse_table_parts(), 694 exp.TableAlias: lambda self: self._parse_table_alias(), 695 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 696 exp.Where: lambda self: self._parse_where(), 697 exp.Window: lambda self: self._parse_named_window(), 698 exp.With: lambda self: self._parse_with(), 699 "JOIN_TYPE": lambda self: self._parse_join_parts(), 700 } 701 702 STATEMENT_PARSERS = { 703 TokenType.ALTER: lambda self: self._parse_alter(), 704 TokenType.BEGIN: lambda self: self._parse_transaction(), 705 TokenType.CACHE: lambda self: self._parse_cache(), 706 TokenType.COMMENT: lambda self: self._parse_comment(), 707 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 708 TokenType.COPY: lambda self: self._parse_copy(), 709 TokenType.CREATE: lambda self: self._parse_create(), 710 TokenType.DELETE: lambda self: self._parse_delete(), 711 TokenType.DESC: lambda self: self._parse_describe(), 712 TokenType.DESCRIBE: lambda self: self._parse_describe(), 713 TokenType.DROP: lambda self: self._parse_drop(), 714 TokenType.INSERT: lambda self: self._parse_insert(), 715 TokenType.KILL: lambda self: self._parse_kill(), 716 TokenType.LOAD: lambda self: self._parse_load(), 717 TokenType.MERGE: lambda self: self._parse_merge(), 718 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 719 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 720 TokenType.REFRESH: lambda self: self._parse_refresh(), 721 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 722 TokenType.SET: lambda self: self._parse_set(), 723 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 724 TokenType.UNCACHE: lambda self: self._parse_uncache(), 725 TokenType.UPDATE: lambda self: self._parse_update(), 726 TokenType.USE: lambda self: self.expression( 727 exp.Use, 728 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 729 this=self._parse_table(schema=False), 730 ), 731 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 732 } 733 734 UNARY_PARSERS = { 735 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 736 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 737 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 738 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 739 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 740 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 741 } 742 743 STRING_PARSERS = { 744 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 745 exp.RawString, this=token.text 746 ), 747 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 748 exp.National, this=token.text 749 ), 750 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 751 TokenType.STRING: lambda self, token: self.expression( 752 exp.Literal, this=token.text, is_string=True 753 ), 754 TokenType.UNICODE_STRING: lambda self, token: self.expression( 755 exp.UnicodeString, 756 this=token.text, 757 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 758 ), 759 } 760 761 NUMERIC_PARSERS = { 762 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 763 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 764 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 765 TokenType.NUMBER: lambda self, token: self.expression( 766 exp.Literal, this=token.text, is_string=False 767 ), 768 } 769 770 PRIMARY_PARSERS = { 771 **STRING_PARSERS, 772 **NUMERIC_PARSERS, 773 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 774 TokenType.NULL: lambda self, _: self.expression(exp.Null), 775 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 776 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 777 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 778 TokenType.STAR: lambda self, _: self.expression( 779 exp.Star, 780 **{ 781 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 782 "replace": self._parse_star_op("REPLACE"), 783 "rename": self._parse_star_op("RENAME"), 784 }, 785 ), 786 } 787 788 PLACEHOLDER_PARSERS = { 789 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 790 TokenType.PARAMETER: lambda self: self._parse_parameter(), 791 TokenType.COLON: lambda self: ( 792 self.expression(exp.Placeholder, this=self._prev.text) 793 if self._match_set(self.ID_VAR_TOKENS) 794 else None 795 ), 796 } 797 798 RANGE_PARSERS = { 799 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 800 TokenType.GLOB: binary_range_parser(exp.Glob), 801 TokenType.ILIKE: binary_range_parser(exp.ILike), 802 TokenType.IN: lambda self, this: self._parse_in(this), 803 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 804 TokenType.IS: lambda self, this: self._parse_is(this), 805 TokenType.LIKE: binary_range_parser(exp.Like), 806 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 807 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 808 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 809 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 810 } 811 812 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 813 "ALLOWED_VALUES": lambda self: self.expression( 814 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 815 ), 816 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 817 "AUTO": lambda self: self._parse_auto_property(), 818 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 819 "BACKUP": lambda self: self.expression( 820 exp.BackupProperty, this=self._parse_var(any_token=True) 821 ), 822 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 823 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 824 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 825 "CHECKSUM": lambda self: self._parse_checksum(), 826 "CLUSTER BY": lambda self: self._parse_cluster(), 827 "CLUSTERED": lambda self: self._parse_clustered_by(), 828 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 829 exp.CollateProperty, **kwargs 830 ), 831 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 832 "CONTAINS": lambda self: self._parse_contains_property(), 833 "COPY": lambda self: self._parse_copy_property(), 834 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 835 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 836 "DEFINER": lambda self: self._parse_definer(), 837 "DETERMINISTIC": lambda self: self.expression( 838 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 839 ), 840 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 841 "DISTKEY": lambda self: self._parse_distkey(), 842 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 843 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 844 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 845 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 846 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 847 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 848 "FREESPACE": lambda self: self._parse_freespace(), 849 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 850 "HEAP": lambda self: self.expression(exp.HeapProperty), 851 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 852 "IMMUTABLE": lambda self: self.expression( 853 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 854 ), 855 "INHERITS": lambda self: self.expression( 856 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 857 ), 858 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 859 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 860 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 861 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 862 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 863 "LIKE": lambda self: self._parse_create_like(), 864 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 865 "LOCK": lambda self: self._parse_locking(), 866 "LOCKING": lambda self: self._parse_locking(), 867 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 868 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 869 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 870 "MODIFIES": lambda self: self._parse_modifies_property(), 871 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 872 "NO": lambda self: self._parse_no_property(), 873 "ON": lambda self: self._parse_on_property(), 874 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 875 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 876 "PARTITION": lambda self: self._parse_partitioned_of(), 877 "PARTITION BY": lambda self: self._parse_partitioned_by(), 878 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 879 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 880 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 881 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 882 "READS": lambda self: self._parse_reads_property(), 883 "REMOTE": lambda self: self._parse_remote_with_connection(), 884 "RETURNS": lambda self: self._parse_returns(), 885 "STRICT": lambda self: self.expression(exp.StrictProperty), 886 "ROW": lambda self: self._parse_row(), 887 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 888 "SAMPLE": lambda self: self.expression( 889 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 890 ), 891 "SECURE": lambda self: self.expression(exp.SecureProperty), 892 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 893 "SETTINGS": lambda self: self.expression( 894 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 895 ), 896 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 897 "SORTKEY": lambda self: self._parse_sortkey(), 898 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 899 "STABLE": lambda self: self.expression( 900 exp.StabilityProperty, this=exp.Literal.string("STABLE") 901 ), 902 "STORED": lambda self: self._parse_stored(), 903 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 904 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 905 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 906 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 907 "TO": lambda self: self._parse_to_table(), 908 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 909 "TRANSFORM": lambda self: self.expression( 910 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 911 ), 912 "TTL": lambda self: self._parse_ttl(), 913 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 914 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 915 "VOLATILE": lambda self: self._parse_volatile_property(), 916 "WITH": lambda self: self._parse_with_property(), 917 } 918 919 CONSTRAINT_PARSERS = { 920 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 921 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 922 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 923 "CHARACTER SET": lambda self: self.expression( 924 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 925 ), 926 "CHECK": lambda self: self.expression( 927 exp.CheckColumnConstraint, 928 this=self._parse_wrapped(self._parse_assignment), 929 enforced=self._match_text_seq("ENFORCED"), 930 ), 931 "COLLATE": lambda self: self.expression( 932 exp.CollateColumnConstraint, this=self._parse_var(any_token=True) 933 ), 934 "COMMENT": lambda self: self.expression( 935 exp.CommentColumnConstraint, this=self._parse_string() 936 ), 937 "COMPRESS": lambda self: self._parse_compress(), 938 "CLUSTERED": lambda self: self.expression( 939 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 940 ), 941 "NONCLUSTERED": lambda self: self.expression( 942 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 943 ), 944 "DEFAULT": lambda self: self.expression( 945 exp.DefaultColumnConstraint, this=self._parse_bitwise() 946 ), 947 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 948 "EPHEMERAL": lambda self: self.expression( 949 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 950 ), 951 "EXCLUDE": lambda self: self.expression( 952 exp.ExcludeColumnConstraint, this=self._parse_index_params() 953 ), 954 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 955 "FORMAT": lambda self: self.expression( 956 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 957 ), 958 "GENERATED": lambda self: self._parse_generated_as_identity(), 959 "IDENTITY": lambda self: self._parse_auto_increment(), 960 "INLINE": lambda self: self._parse_inline(), 961 "LIKE": lambda self: self._parse_create_like(), 962 "NOT": lambda self: self._parse_not_constraint(), 963 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 964 "ON": lambda self: ( 965 self._match(TokenType.UPDATE) 966 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 967 ) 968 or self.expression(exp.OnProperty, this=self._parse_id_var()), 969 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 970 "PERIOD": lambda self: self._parse_period_for_system_time(), 971 "PRIMARY KEY": lambda self: self._parse_primary_key(), 972 "REFERENCES": lambda self: self._parse_references(match=False), 973 "TITLE": lambda self: self.expression( 974 exp.TitleColumnConstraint, this=self._parse_var_or_string() 975 ), 976 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 977 "UNIQUE": lambda self: self._parse_unique(), 978 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 979 "WITH": lambda self: self.expression( 980 exp.Properties, expressions=self._parse_wrapped_properties() 981 ), 982 } 983 984 ALTER_PARSERS = { 985 "ADD": lambda self: self._parse_alter_table_add(), 986 "ALTER": lambda self: self._parse_alter_table_alter(), 987 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 988 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 989 "DROP": lambda self: self._parse_alter_table_drop(), 990 "RENAME": lambda self: self._parse_alter_table_rename(), 991 "SET": lambda self: self._parse_alter_table_set(), 992 } 993 994 ALTER_ALTER_PARSERS = { 995 "DISTKEY": lambda self: self._parse_alter_diststyle(), 996 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 997 "SORTKEY": lambda self: self._parse_alter_sortkey(), 998 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 999 } 1000 1001 SCHEMA_UNNAMED_CONSTRAINTS = { 1002 "CHECK", 1003 "EXCLUDE", 1004 "FOREIGN KEY", 1005 "LIKE", 1006 "PERIOD", 1007 "PRIMARY KEY", 1008 "UNIQUE", 1009 } 1010 1011 NO_PAREN_FUNCTION_PARSERS = { 1012 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1013 "CASE": lambda self: self._parse_case(), 1014 "CONNECT_BY_ROOT": lambda self: self.expression( 1015 exp.ConnectByRoot, this=self._parse_column() 1016 ), 1017 "IF": lambda self: self._parse_if(), 1018 "NEXT": lambda self: self._parse_next_value_for(), 1019 } 1020 1021 INVALID_FUNC_NAME_TOKENS = { 1022 TokenType.IDENTIFIER, 1023 TokenType.STRING, 1024 } 1025 1026 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1027 1028 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1029 1030 FUNCTION_PARSERS = { 1031 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1032 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1033 "DECODE": lambda self: self._parse_decode(), 1034 "EXTRACT": lambda self: self._parse_extract(), 1035 "GAP_FILL": lambda self: self._parse_gap_fill(), 1036 "JSON_OBJECT": lambda self: self._parse_json_object(), 1037 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1038 "JSON_TABLE": lambda self: self._parse_json_table(), 1039 "MATCH": lambda self: self._parse_match_against(), 1040 "OPENJSON": lambda self: self._parse_open_json(), 1041 "POSITION": lambda self: self._parse_position(), 1042 "PREDICT": lambda self: self._parse_predict(), 1043 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1044 "STRING_AGG": lambda self: self._parse_string_agg(), 1045 "SUBSTRING": lambda self: self._parse_substring(), 1046 "TRIM": lambda self: self._parse_trim(), 1047 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1048 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1049 } 1050 1051 QUERY_MODIFIER_PARSERS = { 1052 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1053 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1054 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1055 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1056 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1057 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1058 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1059 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1060 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1061 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1062 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1063 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1064 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1065 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1066 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1067 TokenType.CLUSTER_BY: lambda self: ( 1068 "cluster", 1069 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1070 ), 1071 TokenType.DISTRIBUTE_BY: lambda self: ( 1072 "distribute", 1073 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1074 ), 1075 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1076 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1077 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1078 } 1079 1080 SET_PARSERS = { 1081 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1082 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1083 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1084 "TRANSACTION": lambda self: self._parse_set_transaction(), 1085 } 1086 1087 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1088 1089 TYPE_LITERAL_PARSERS = { 1090 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1091 } 1092 1093 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1094 1095 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1096 1097 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1098 1099 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1100 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1101 "ISOLATION": ( 1102 ("LEVEL", "REPEATABLE", "READ"), 1103 ("LEVEL", "READ", "COMMITTED"), 1104 ("LEVEL", "READ", "UNCOMITTED"), 1105 ("LEVEL", "SERIALIZABLE"), 1106 ), 1107 "READ": ("WRITE", "ONLY"), 1108 } 1109 1110 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1111 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1112 ) 1113 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1114 1115 CREATE_SEQUENCE: OPTIONS_TYPE = { 1116 "SCALE": ("EXTEND", "NOEXTEND"), 1117 "SHARD": ("EXTEND", "NOEXTEND"), 1118 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1119 **dict.fromkeys( 1120 ( 1121 "SESSION", 1122 "GLOBAL", 1123 "KEEP", 1124 "NOKEEP", 1125 "ORDER", 1126 "NOORDER", 1127 "NOCACHE", 1128 "CYCLE", 1129 "NOCYCLE", 1130 "NOMINVALUE", 1131 "NOMAXVALUE", 1132 "NOSCALE", 1133 "NOSHARD", 1134 ), 1135 tuple(), 1136 ), 1137 } 1138 1139 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1140 1141 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1142 1143 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1144 1145 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1146 "TYPE": ("EVOLUTION",), 1147 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1148 } 1149 1150 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1151 1152 CLONE_KEYWORDS = {"CLONE", "COPY"} 1153 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1154 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1155 1156 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1157 1158 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1159 1160 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1161 1162 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1163 1164 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1165 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1166 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1167 1168 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1169 1170 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1171 1172 ADD_CONSTRAINT_TOKENS = { 1173 TokenType.CONSTRAINT, 1174 TokenType.FOREIGN_KEY, 1175 TokenType.INDEX, 1176 TokenType.KEY, 1177 TokenType.PRIMARY_KEY, 1178 TokenType.UNIQUE, 1179 } 1180 1181 DISTINCT_TOKENS = {TokenType.DISTINCT} 1182 1183 NULL_TOKENS = {TokenType.NULL} 1184 1185 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1186 1187 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1188 1189 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1190 1191 STRICT_CAST = True 1192 1193 PREFIXED_PIVOT_COLUMNS = False 1194 IDENTIFY_PIVOT_STRINGS = False 1195 1196 LOG_DEFAULTS_TO_LN = False 1197 1198 # Whether ADD is present for each column added by ALTER TABLE 1199 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1200 1201 # Whether the table sample clause expects CSV syntax 1202 TABLESAMPLE_CSV = False 1203 1204 # The default method used for table sampling 1205 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1206 1207 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1208 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1209 1210 # Whether the TRIM function expects the characters to trim as its first argument 1211 TRIM_PATTERN_FIRST = False 1212 1213 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1214 STRING_ALIASES = False 1215 1216 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1217 MODIFIERS_ATTACHED_TO_SET_OP = True 1218 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1219 1220 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1221 NO_PAREN_IF_COMMANDS = True 1222 1223 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1224 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1225 1226 # Whether the `:` operator is used to extract a value from a VARIANT column 1227 COLON_IS_VARIANT_EXTRACT = False 1228 1229 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1230 # If this is True and '(' is not found, the keyword will be treated as an identifier 1231 VALUES_FOLLOWED_BY_PAREN = True 1232 1233 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1234 SUPPORTS_IMPLICIT_UNNEST = False 1235 1236 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1237 INTERVAL_SPANS = True 1238 1239 # Whether a PARTITION clause can follow a table reference 1240 SUPPORTS_PARTITION_SELECTION = False 1241 1242 __slots__ = ( 1243 "error_level", 1244 "error_message_context", 1245 "max_errors", 1246 "dialect", 1247 "sql", 1248 "errors", 1249 "_tokens", 1250 "_index", 1251 "_curr", 1252 "_next", 1253 "_prev", 1254 "_prev_comments", 1255 ) 1256 1257 # Autofilled 1258 SHOW_TRIE: t.Dict = {} 1259 SET_TRIE: t.Dict = {} 1260 1261 def __init__( 1262 self, 1263 error_level: t.Optional[ErrorLevel] = None, 1264 error_message_context: int = 100, 1265 max_errors: int = 3, 1266 dialect: DialectType = None, 1267 ): 1268 from sqlglot.dialects import Dialect 1269 1270 self.error_level = error_level or ErrorLevel.IMMEDIATE 1271 self.error_message_context = error_message_context 1272 self.max_errors = max_errors 1273 self.dialect = Dialect.get_or_raise(dialect) 1274 self.reset() 1275 1276 def reset(self): 1277 self.sql = "" 1278 self.errors = [] 1279 self._tokens = [] 1280 self._index = 0 1281 self._curr = None 1282 self._next = None 1283 self._prev = None 1284 self._prev_comments = None 1285 1286 def parse( 1287 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1288 ) -> t.List[t.Optional[exp.Expression]]: 1289 """ 1290 Parses a list of tokens and returns a list of syntax trees, one tree 1291 per parsed SQL statement. 1292 1293 Args: 1294 raw_tokens: The list of tokens. 1295 sql: The original SQL string, used to produce helpful debug messages. 1296 1297 Returns: 1298 The list of the produced syntax trees. 1299 """ 1300 return self._parse( 1301 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1302 ) 1303 1304 def parse_into( 1305 self, 1306 expression_types: exp.IntoType, 1307 raw_tokens: t.List[Token], 1308 sql: t.Optional[str] = None, 1309 ) -> t.List[t.Optional[exp.Expression]]: 1310 """ 1311 Parses a list of tokens into a given Expression type. If a collection of Expression 1312 types is given instead, this method will try to parse the token list into each one 1313 of them, stopping at the first for which the parsing succeeds. 1314 1315 Args: 1316 expression_types: The expression type(s) to try and parse the token list into. 1317 raw_tokens: The list of tokens. 1318 sql: The original SQL string, used to produce helpful debug messages. 1319 1320 Returns: 1321 The target Expression. 1322 """ 1323 errors = [] 1324 for expression_type in ensure_list(expression_types): 1325 parser = self.EXPRESSION_PARSERS.get(expression_type) 1326 if not parser: 1327 raise TypeError(f"No parser registered for {expression_type}") 1328 1329 try: 1330 return self._parse(parser, raw_tokens, sql) 1331 except ParseError as e: 1332 e.errors[0]["into_expression"] = expression_type 1333 errors.append(e) 1334 1335 raise ParseError( 1336 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1337 errors=merge_errors(errors), 1338 ) from errors[-1] 1339 1340 def _parse( 1341 self, 1342 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1343 raw_tokens: t.List[Token], 1344 sql: t.Optional[str] = None, 1345 ) -> t.List[t.Optional[exp.Expression]]: 1346 self.reset() 1347 self.sql = sql or "" 1348 1349 total = len(raw_tokens) 1350 chunks: t.List[t.List[Token]] = [[]] 1351 1352 for i, token in enumerate(raw_tokens): 1353 if token.token_type == TokenType.SEMICOLON: 1354 if token.comments: 1355 chunks.append([token]) 1356 1357 if i < total - 1: 1358 chunks.append([]) 1359 else: 1360 chunks[-1].append(token) 1361 1362 expressions = [] 1363 1364 for tokens in chunks: 1365 self._index = -1 1366 self._tokens = tokens 1367 self._advance() 1368 1369 expressions.append(parse_method(self)) 1370 1371 if self._index < len(self._tokens): 1372 self.raise_error("Invalid expression / Unexpected token") 1373 1374 self.check_errors() 1375 1376 return expressions 1377 1378 def check_errors(self) -> None: 1379 """Logs or raises any found errors, depending on the chosen error level setting.""" 1380 if self.error_level == ErrorLevel.WARN: 1381 for error in self.errors: 1382 logger.error(str(error)) 1383 elif self.error_level == ErrorLevel.RAISE and self.errors: 1384 raise ParseError( 1385 concat_messages(self.errors, self.max_errors), 1386 errors=merge_errors(self.errors), 1387 ) 1388 1389 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1390 """ 1391 Appends an error in the list of recorded errors or raises it, depending on the chosen 1392 error level setting. 1393 """ 1394 token = token or self._curr or self._prev or Token.string("") 1395 start = token.start 1396 end = token.end + 1 1397 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1398 highlight = self.sql[start:end] 1399 end_context = self.sql[end : end + self.error_message_context] 1400 1401 error = ParseError.new( 1402 f"{message}. Line {token.line}, Col: {token.col}.\n" 1403 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1404 description=message, 1405 line=token.line, 1406 col=token.col, 1407 start_context=start_context, 1408 highlight=highlight, 1409 end_context=end_context, 1410 ) 1411 1412 if self.error_level == ErrorLevel.IMMEDIATE: 1413 raise error 1414 1415 self.errors.append(error) 1416 1417 def expression( 1418 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1419 ) -> E: 1420 """ 1421 Creates a new, validated Expression. 1422 1423 Args: 1424 exp_class: The expression class to instantiate. 1425 comments: An optional list of comments to attach to the expression. 1426 kwargs: The arguments to set for the expression along with their respective values. 1427 1428 Returns: 1429 The target expression. 1430 """ 1431 instance = exp_class(**kwargs) 1432 instance.add_comments(comments) if comments else self._add_comments(instance) 1433 return self.validate_expression(instance) 1434 1435 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1436 if expression and self._prev_comments: 1437 expression.add_comments(self._prev_comments) 1438 self._prev_comments = None 1439 1440 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1441 """ 1442 Validates an Expression, making sure that all its mandatory arguments are set. 1443 1444 Args: 1445 expression: The expression to validate. 1446 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1447 1448 Returns: 1449 The validated expression. 1450 """ 1451 if self.error_level != ErrorLevel.IGNORE: 1452 for error_message in expression.error_messages(args): 1453 self.raise_error(error_message) 1454 1455 return expression 1456 1457 def _find_sql(self, start: Token, end: Token) -> str: 1458 return self.sql[start.start : end.end + 1] 1459 1460 def _is_connected(self) -> bool: 1461 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1462 1463 def _advance(self, times: int = 1) -> None: 1464 self._index += times 1465 self._curr = seq_get(self._tokens, self._index) 1466 self._next = seq_get(self._tokens, self._index + 1) 1467 1468 if self._index > 0: 1469 self._prev = self._tokens[self._index - 1] 1470 self._prev_comments = self._prev.comments 1471 else: 1472 self._prev = None 1473 self._prev_comments = None 1474 1475 def _retreat(self, index: int) -> None: 1476 if index != self._index: 1477 self._advance(index - self._index) 1478 1479 def _warn_unsupported(self) -> None: 1480 if len(self._tokens) <= 1: 1481 return 1482 1483 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1484 # interested in emitting a warning for the one being currently processed. 1485 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1486 1487 logger.warning( 1488 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1489 ) 1490 1491 def _parse_command(self) -> exp.Command: 1492 self._warn_unsupported() 1493 return self.expression( 1494 exp.Command, 1495 comments=self._prev_comments, 1496 this=self._prev.text.upper(), 1497 expression=self._parse_string(), 1498 ) 1499 1500 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1501 """ 1502 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1503 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1504 solve this by setting & resetting the parser state accordingly 1505 """ 1506 index = self._index 1507 error_level = self.error_level 1508 1509 self.error_level = ErrorLevel.IMMEDIATE 1510 try: 1511 this = parse_method() 1512 except ParseError: 1513 this = None 1514 finally: 1515 if not this or retreat: 1516 self._retreat(index) 1517 self.error_level = error_level 1518 1519 return this 1520 1521 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1522 start = self._prev 1523 exists = self._parse_exists() if allow_exists else None 1524 1525 self._match(TokenType.ON) 1526 1527 materialized = self._match_text_seq("MATERIALIZED") 1528 kind = self._match_set(self.CREATABLES) and self._prev 1529 if not kind: 1530 return self._parse_as_command(start) 1531 1532 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1533 this = self._parse_user_defined_function(kind=kind.token_type) 1534 elif kind.token_type == TokenType.TABLE: 1535 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1536 elif kind.token_type == TokenType.COLUMN: 1537 this = self._parse_column() 1538 else: 1539 this = self._parse_id_var() 1540 1541 self._match(TokenType.IS) 1542 1543 return self.expression( 1544 exp.Comment, 1545 this=this, 1546 kind=kind.text, 1547 expression=self._parse_string(), 1548 exists=exists, 1549 materialized=materialized, 1550 ) 1551 1552 def _parse_to_table( 1553 self, 1554 ) -> exp.ToTableProperty: 1555 table = self._parse_table_parts(schema=True) 1556 return self.expression(exp.ToTableProperty, this=table) 1557 1558 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1559 def _parse_ttl(self) -> exp.Expression: 1560 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1561 this = self._parse_bitwise() 1562 1563 if self._match_text_seq("DELETE"): 1564 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1565 if self._match_text_seq("RECOMPRESS"): 1566 return self.expression( 1567 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1568 ) 1569 if self._match_text_seq("TO", "DISK"): 1570 return self.expression( 1571 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1572 ) 1573 if self._match_text_seq("TO", "VOLUME"): 1574 return self.expression( 1575 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1576 ) 1577 1578 return this 1579 1580 expressions = self._parse_csv(_parse_ttl_action) 1581 where = self._parse_where() 1582 group = self._parse_group() 1583 1584 aggregates = None 1585 if group and self._match(TokenType.SET): 1586 aggregates = self._parse_csv(self._parse_set_item) 1587 1588 return self.expression( 1589 exp.MergeTreeTTL, 1590 expressions=expressions, 1591 where=where, 1592 group=group, 1593 aggregates=aggregates, 1594 ) 1595 1596 def _parse_statement(self) -> t.Optional[exp.Expression]: 1597 if self._curr is None: 1598 return None 1599 1600 if self._match_set(self.STATEMENT_PARSERS): 1601 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1602 1603 if self._match_set(self.dialect.tokenizer.COMMANDS): 1604 return self._parse_command() 1605 1606 expression = self._parse_expression() 1607 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1608 return self._parse_query_modifiers(expression) 1609 1610 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1611 start = self._prev 1612 temporary = self._match(TokenType.TEMPORARY) 1613 materialized = self._match_text_seq("MATERIALIZED") 1614 1615 kind = self._match_set(self.CREATABLES) and self._prev.text 1616 if not kind: 1617 return self._parse_as_command(start) 1618 1619 if_exists = exists or self._parse_exists() 1620 table = self._parse_table_parts( 1621 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1622 ) 1623 1624 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1625 1626 if self._match(TokenType.L_PAREN, advance=False): 1627 expressions = self._parse_wrapped_csv(self._parse_types) 1628 else: 1629 expressions = None 1630 1631 return self.expression( 1632 exp.Drop, 1633 comments=start.comments, 1634 exists=if_exists, 1635 this=table, 1636 expressions=expressions, 1637 kind=kind.upper(), 1638 temporary=temporary, 1639 materialized=materialized, 1640 cascade=self._match_text_seq("CASCADE"), 1641 constraints=self._match_text_seq("CONSTRAINTS"), 1642 purge=self._match_text_seq("PURGE"), 1643 cluster=cluster, 1644 ) 1645 1646 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1647 return ( 1648 self._match_text_seq("IF") 1649 and (not not_ or self._match(TokenType.NOT)) 1650 and self._match(TokenType.EXISTS) 1651 ) 1652 1653 def _parse_create(self) -> exp.Create | exp.Command: 1654 # Note: this can't be None because we've matched a statement parser 1655 start = self._prev 1656 comments = self._prev_comments 1657 1658 replace = ( 1659 start.token_type == TokenType.REPLACE 1660 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1661 or self._match_pair(TokenType.OR, TokenType.ALTER) 1662 ) 1663 1664 unique = self._match(TokenType.UNIQUE) 1665 1666 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1667 self._advance() 1668 1669 properties = None 1670 create_token = self._match_set(self.CREATABLES) and self._prev 1671 1672 if not create_token: 1673 # exp.Properties.Location.POST_CREATE 1674 properties = self._parse_properties() 1675 create_token = self._match_set(self.CREATABLES) and self._prev 1676 1677 if not properties or not create_token: 1678 return self._parse_as_command(start) 1679 1680 exists = self._parse_exists(not_=True) 1681 this = None 1682 expression: t.Optional[exp.Expression] = None 1683 indexes = None 1684 no_schema_binding = None 1685 begin = None 1686 end = None 1687 clone = None 1688 1689 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1690 nonlocal properties 1691 if properties and temp_props: 1692 properties.expressions.extend(temp_props.expressions) 1693 elif temp_props: 1694 properties = temp_props 1695 1696 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1697 this = self._parse_user_defined_function(kind=create_token.token_type) 1698 1699 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1700 extend_props(self._parse_properties()) 1701 1702 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1703 extend_props(self._parse_properties()) 1704 1705 if not expression: 1706 if self._match(TokenType.COMMAND): 1707 expression = self._parse_as_command(self._prev) 1708 else: 1709 begin = self._match(TokenType.BEGIN) 1710 return_ = self._match_text_seq("RETURN") 1711 1712 if self._match(TokenType.STRING, advance=False): 1713 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1714 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1715 expression = self._parse_string() 1716 extend_props(self._parse_properties()) 1717 else: 1718 expression = self._parse_statement() 1719 1720 end = self._match_text_seq("END") 1721 1722 if return_: 1723 expression = self.expression(exp.Return, this=expression) 1724 elif create_token.token_type == TokenType.INDEX: 1725 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1726 if not self._match(TokenType.ON): 1727 index = self._parse_id_var() 1728 anonymous = False 1729 else: 1730 index = None 1731 anonymous = True 1732 1733 this = self._parse_index(index=index, anonymous=anonymous) 1734 elif create_token.token_type in self.DB_CREATABLES: 1735 table_parts = self._parse_table_parts( 1736 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1737 ) 1738 1739 # exp.Properties.Location.POST_NAME 1740 self._match(TokenType.COMMA) 1741 extend_props(self._parse_properties(before=True)) 1742 1743 this = self._parse_schema(this=table_parts) 1744 1745 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1746 extend_props(self._parse_properties()) 1747 1748 self._match(TokenType.ALIAS) 1749 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1750 # exp.Properties.Location.POST_ALIAS 1751 extend_props(self._parse_properties()) 1752 1753 if create_token.token_type == TokenType.SEQUENCE: 1754 expression = self._parse_types() 1755 extend_props(self._parse_properties()) 1756 else: 1757 expression = self._parse_ddl_select() 1758 1759 if create_token.token_type == TokenType.TABLE: 1760 # exp.Properties.Location.POST_EXPRESSION 1761 extend_props(self._parse_properties()) 1762 1763 indexes = [] 1764 while True: 1765 index = self._parse_index() 1766 1767 # exp.Properties.Location.POST_INDEX 1768 extend_props(self._parse_properties()) 1769 1770 if not index: 1771 break 1772 else: 1773 self._match(TokenType.COMMA) 1774 indexes.append(index) 1775 elif create_token.token_type == TokenType.VIEW: 1776 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1777 no_schema_binding = True 1778 1779 shallow = self._match_text_seq("SHALLOW") 1780 1781 if self._match_texts(self.CLONE_KEYWORDS): 1782 copy = self._prev.text.lower() == "copy" 1783 clone = self.expression( 1784 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1785 ) 1786 1787 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1788 return self._parse_as_command(start) 1789 1790 return self.expression( 1791 exp.Create, 1792 comments=comments, 1793 this=this, 1794 kind=create_token.text.upper(), 1795 replace=replace, 1796 unique=unique, 1797 expression=expression, 1798 exists=exists, 1799 properties=properties, 1800 indexes=indexes, 1801 no_schema_binding=no_schema_binding, 1802 begin=begin, 1803 end=end, 1804 clone=clone, 1805 ) 1806 1807 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1808 seq = exp.SequenceProperties() 1809 1810 options = [] 1811 index = self._index 1812 1813 while self._curr: 1814 self._match(TokenType.COMMA) 1815 if self._match_text_seq("INCREMENT"): 1816 self._match_text_seq("BY") 1817 self._match_text_seq("=") 1818 seq.set("increment", self._parse_term()) 1819 elif self._match_text_seq("MINVALUE"): 1820 seq.set("minvalue", self._parse_term()) 1821 elif self._match_text_seq("MAXVALUE"): 1822 seq.set("maxvalue", self._parse_term()) 1823 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1824 self._match_text_seq("=") 1825 seq.set("start", self._parse_term()) 1826 elif self._match_text_seq("CACHE"): 1827 # T-SQL allows empty CACHE which is initialized dynamically 1828 seq.set("cache", self._parse_number() or True) 1829 elif self._match_text_seq("OWNED", "BY"): 1830 # "OWNED BY NONE" is the default 1831 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1832 else: 1833 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1834 if opt: 1835 options.append(opt) 1836 else: 1837 break 1838 1839 seq.set("options", options if options else None) 1840 return None if self._index == index else seq 1841 1842 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1843 # only used for teradata currently 1844 self._match(TokenType.COMMA) 1845 1846 kwargs = { 1847 "no": self._match_text_seq("NO"), 1848 "dual": self._match_text_seq("DUAL"), 1849 "before": self._match_text_seq("BEFORE"), 1850 "default": self._match_text_seq("DEFAULT"), 1851 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1852 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1853 "after": self._match_text_seq("AFTER"), 1854 "minimum": self._match_texts(("MIN", "MINIMUM")), 1855 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1856 } 1857 1858 if self._match_texts(self.PROPERTY_PARSERS): 1859 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1860 try: 1861 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1862 except TypeError: 1863 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1864 1865 return None 1866 1867 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1868 return self._parse_wrapped_csv(self._parse_property) 1869 1870 def _parse_property(self) -> t.Optional[exp.Expression]: 1871 if self._match_texts(self.PROPERTY_PARSERS): 1872 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1873 1874 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1875 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1876 1877 if self._match_text_seq("COMPOUND", "SORTKEY"): 1878 return self._parse_sortkey(compound=True) 1879 1880 if self._match_text_seq("SQL", "SECURITY"): 1881 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1882 1883 index = self._index 1884 key = self._parse_column() 1885 1886 if not self._match(TokenType.EQ): 1887 self._retreat(index) 1888 return self._parse_sequence_properties() 1889 1890 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1891 if isinstance(key, exp.Column): 1892 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1893 1894 value = self._parse_bitwise() or self._parse_var(any_token=True) 1895 1896 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 1897 if isinstance(value, exp.Column): 1898 value = exp.var(value.name) 1899 1900 return self.expression(exp.Property, this=key, value=value) 1901 1902 def _parse_stored(self) -> exp.FileFormatProperty: 1903 self._match(TokenType.ALIAS) 1904 1905 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1906 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1907 1908 return self.expression( 1909 exp.FileFormatProperty, 1910 this=( 1911 self.expression( 1912 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1913 ) 1914 if input_format or output_format 1915 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1916 ), 1917 ) 1918 1919 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 1920 field = self._parse_field() 1921 if isinstance(field, exp.Identifier) and not field.quoted: 1922 field = exp.var(field) 1923 1924 return field 1925 1926 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1927 self._match(TokenType.EQ) 1928 self._match(TokenType.ALIAS) 1929 1930 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1931 1932 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1933 properties = [] 1934 while True: 1935 if before: 1936 prop = self._parse_property_before() 1937 else: 1938 prop = self._parse_property() 1939 if not prop: 1940 break 1941 for p in ensure_list(prop): 1942 properties.append(p) 1943 1944 if properties: 1945 return self.expression(exp.Properties, expressions=properties) 1946 1947 return None 1948 1949 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1950 return self.expression( 1951 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1952 ) 1953 1954 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1955 if self._index >= 2: 1956 pre_volatile_token = self._tokens[self._index - 2] 1957 else: 1958 pre_volatile_token = None 1959 1960 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1961 return exp.VolatileProperty() 1962 1963 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1964 1965 def _parse_retention_period(self) -> exp.Var: 1966 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 1967 number = self._parse_number() 1968 number_str = f"{number} " if number else "" 1969 unit = self._parse_var(any_token=True) 1970 return exp.var(f"{number_str}{unit}") 1971 1972 def _parse_system_versioning_property( 1973 self, with_: bool = False 1974 ) -> exp.WithSystemVersioningProperty: 1975 self._match(TokenType.EQ) 1976 prop = self.expression( 1977 exp.WithSystemVersioningProperty, 1978 **{ # type: ignore 1979 "on": True, 1980 "with": with_, 1981 }, 1982 ) 1983 1984 if self._match_text_seq("OFF"): 1985 prop.set("on", False) 1986 return prop 1987 1988 self._match(TokenType.ON) 1989 if self._match(TokenType.L_PAREN): 1990 while self._curr and not self._match(TokenType.R_PAREN): 1991 if self._match_text_seq("HISTORY_TABLE", "="): 1992 prop.set("this", self._parse_table_parts()) 1993 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 1994 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 1995 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 1996 prop.set("retention_period", self._parse_retention_period()) 1997 1998 self._match(TokenType.COMMA) 1999 2000 return prop 2001 2002 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2003 self._match(TokenType.EQ) 2004 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2005 prop = self.expression(exp.DataDeletionProperty, on=on) 2006 2007 if self._match(TokenType.L_PAREN): 2008 while self._curr and not self._match(TokenType.R_PAREN): 2009 if self._match_text_seq("FILTER_COLUMN", "="): 2010 prop.set("filter_column", self._parse_column()) 2011 elif self._match_text_seq("RETENTION_PERIOD", "="): 2012 prop.set("retention_period", self._parse_retention_period()) 2013 2014 self._match(TokenType.COMMA) 2015 2016 return prop 2017 2018 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2019 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2020 prop = self._parse_system_versioning_property(with_=True) 2021 self._match_r_paren() 2022 return prop 2023 2024 if self._match(TokenType.L_PAREN, advance=False): 2025 return self._parse_wrapped_properties() 2026 2027 if self._match_text_seq("JOURNAL"): 2028 return self._parse_withjournaltable() 2029 2030 if self._match_texts(self.VIEW_ATTRIBUTES): 2031 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2032 2033 if self._match_text_seq("DATA"): 2034 return self._parse_withdata(no=False) 2035 elif self._match_text_seq("NO", "DATA"): 2036 return self._parse_withdata(no=True) 2037 2038 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2039 return self._parse_serde_properties(with_=True) 2040 2041 if self._match(TokenType.SCHEMA): 2042 return self.expression( 2043 exp.WithSchemaBindingProperty, 2044 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2045 ) 2046 2047 if not self._next: 2048 return None 2049 2050 return self._parse_withisolatedloading() 2051 2052 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2053 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2054 self._match(TokenType.EQ) 2055 2056 user = self._parse_id_var() 2057 self._match(TokenType.PARAMETER) 2058 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2059 2060 if not user or not host: 2061 return None 2062 2063 return exp.DefinerProperty(this=f"{user}@{host}") 2064 2065 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2066 self._match(TokenType.TABLE) 2067 self._match(TokenType.EQ) 2068 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2069 2070 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2071 return self.expression(exp.LogProperty, no=no) 2072 2073 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2074 return self.expression(exp.JournalProperty, **kwargs) 2075 2076 def _parse_checksum(self) -> exp.ChecksumProperty: 2077 self._match(TokenType.EQ) 2078 2079 on = None 2080 if self._match(TokenType.ON): 2081 on = True 2082 elif self._match_text_seq("OFF"): 2083 on = False 2084 2085 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2086 2087 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2088 return self.expression( 2089 exp.Cluster, 2090 expressions=( 2091 self._parse_wrapped_csv(self._parse_ordered) 2092 if wrapped 2093 else self._parse_csv(self._parse_ordered) 2094 ), 2095 ) 2096 2097 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2098 self._match_text_seq("BY") 2099 2100 self._match_l_paren() 2101 expressions = self._parse_csv(self._parse_column) 2102 self._match_r_paren() 2103 2104 if self._match_text_seq("SORTED", "BY"): 2105 self._match_l_paren() 2106 sorted_by = self._parse_csv(self._parse_ordered) 2107 self._match_r_paren() 2108 else: 2109 sorted_by = None 2110 2111 self._match(TokenType.INTO) 2112 buckets = self._parse_number() 2113 self._match_text_seq("BUCKETS") 2114 2115 return self.expression( 2116 exp.ClusteredByProperty, 2117 expressions=expressions, 2118 sorted_by=sorted_by, 2119 buckets=buckets, 2120 ) 2121 2122 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2123 if not self._match_text_seq("GRANTS"): 2124 self._retreat(self._index - 1) 2125 return None 2126 2127 return self.expression(exp.CopyGrantsProperty) 2128 2129 def _parse_freespace(self) -> exp.FreespaceProperty: 2130 self._match(TokenType.EQ) 2131 return self.expression( 2132 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2133 ) 2134 2135 def _parse_mergeblockratio( 2136 self, no: bool = False, default: bool = False 2137 ) -> exp.MergeBlockRatioProperty: 2138 if self._match(TokenType.EQ): 2139 return self.expression( 2140 exp.MergeBlockRatioProperty, 2141 this=self._parse_number(), 2142 percent=self._match(TokenType.PERCENT), 2143 ) 2144 2145 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2146 2147 def _parse_datablocksize( 2148 self, 2149 default: t.Optional[bool] = None, 2150 minimum: t.Optional[bool] = None, 2151 maximum: t.Optional[bool] = None, 2152 ) -> exp.DataBlocksizeProperty: 2153 self._match(TokenType.EQ) 2154 size = self._parse_number() 2155 2156 units = None 2157 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2158 units = self._prev.text 2159 2160 return self.expression( 2161 exp.DataBlocksizeProperty, 2162 size=size, 2163 units=units, 2164 default=default, 2165 minimum=minimum, 2166 maximum=maximum, 2167 ) 2168 2169 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2170 self._match(TokenType.EQ) 2171 always = self._match_text_seq("ALWAYS") 2172 manual = self._match_text_seq("MANUAL") 2173 never = self._match_text_seq("NEVER") 2174 default = self._match_text_seq("DEFAULT") 2175 2176 autotemp = None 2177 if self._match_text_seq("AUTOTEMP"): 2178 autotemp = self._parse_schema() 2179 2180 return self.expression( 2181 exp.BlockCompressionProperty, 2182 always=always, 2183 manual=manual, 2184 never=never, 2185 default=default, 2186 autotemp=autotemp, 2187 ) 2188 2189 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2190 index = self._index 2191 no = self._match_text_seq("NO") 2192 concurrent = self._match_text_seq("CONCURRENT") 2193 2194 if not self._match_text_seq("ISOLATED", "LOADING"): 2195 self._retreat(index) 2196 return None 2197 2198 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2199 return self.expression( 2200 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2201 ) 2202 2203 def _parse_locking(self) -> exp.LockingProperty: 2204 if self._match(TokenType.TABLE): 2205 kind = "TABLE" 2206 elif self._match(TokenType.VIEW): 2207 kind = "VIEW" 2208 elif self._match(TokenType.ROW): 2209 kind = "ROW" 2210 elif self._match_text_seq("DATABASE"): 2211 kind = "DATABASE" 2212 else: 2213 kind = None 2214 2215 if kind in ("DATABASE", "TABLE", "VIEW"): 2216 this = self._parse_table_parts() 2217 else: 2218 this = None 2219 2220 if self._match(TokenType.FOR): 2221 for_or_in = "FOR" 2222 elif self._match(TokenType.IN): 2223 for_or_in = "IN" 2224 else: 2225 for_or_in = None 2226 2227 if self._match_text_seq("ACCESS"): 2228 lock_type = "ACCESS" 2229 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2230 lock_type = "EXCLUSIVE" 2231 elif self._match_text_seq("SHARE"): 2232 lock_type = "SHARE" 2233 elif self._match_text_seq("READ"): 2234 lock_type = "READ" 2235 elif self._match_text_seq("WRITE"): 2236 lock_type = "WRITE" 2237 elif self._match_text_seq("CHECKSUM"): 2238 lock_type = "CHECKSUM" 2239 else: 2240 lock_type = None 2241 2242 override = self._match_text_seq("OVERRIDE") 2243 2244 return self.expression( 2245 exp.LockingProperty, 2246 this=this, 2247 kind=kind, 2248 for_or_in=for_or_in, 2249 lock_type=lock_type, 2250 override=override, 2251 ) 2252 2253 def _parse_partition_by(self) -> t.List[exp.Expression]: 2254 if self._match(TokenType.PARTITION_BY): 2255 return self._parse_csv(self._parse_assignment) 2256 return [] 2257 2258 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2259 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2260 if self._match_text_seq("MINVALUE"): 2261 return exp.var("MINVALUE") 2262 if self._match_text_seq("MAXVALUE"): 2263 return exp.var("MAXVALUE") 2264 return self._parse_bitwise() 2265 2266 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2267 expression = None 2268 from_expressions = None 2269 to_expressions = None 2270 2271 if self._match(TokenType.IN): 2272 this = self._parse_wrapped_csv(self._parse_bitwise) 2273 elif self._match(TokenType.FROM): 2274 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2275 self._match_text_seq("TO") 2276 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2277 elif self._match_text_seq("WITH", "(", "MODULUS"): 2278 this = self._parse_number() 2279 self._match_text_seq(",", "REMAINDER") 2280 expression = self._parse_number() 2281 self._match_r_paren() 2282 else: 2283 self.raise_error("Failed to parse partition bound spec.") 2284 2285 return self.expression( 2286 exp.PartitionBoundSpec, 2287 this=this, 2288 expression=expression, 2289 from_expressions=from_expressions, 2290 to_expressions=to_expressions, 2291 ) 2292 2293 # https://www.postgresql.org/docs/current/sql-createtable.html 2294 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2295 if not self._match_text_seq("OF"): 2296 self._retreat(self._index - 1) 2297 return None 2298 2299 this = self._parse_table(schema=True) 2300 2301 if self._match(TokenType.DEFAULT): 2302 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2303 elif self._match_text_seq("FOR", "VALUES"): 2304 expression = self._parse_partition_bound_spec() 2305 else: 2306 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2307 2308 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2309 2310 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2311 self._match(TokenType.EQ) 2312 return self.expression( 2313 exp.PartitionedByProperty, 2314 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2315 ) 2316 2317 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2318 if self._match_text_seq("AND", "STATISTICS"): 2319 statistics = True 2320 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2321 statistics = False 2322 else: 2323 statistics = None 2324 2325 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2326 2327 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2328 if self._match_text_seq("SQL"): 2329 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2330 return None 2331 2332 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2333 if self._match_text_seq("SQL", "DATA"): 2334 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2335 return None 2336 2337 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2338 if self._match_text_seq("PRIMARY", "INDEX"): 2339 return exp.NoPrimaryIndexProperty() 2340 if self._match_text_seq("SQL"): 2341 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2342 return None 2343 2344 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2345 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2346 return exp.OnCommitProperty() 2347 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2348 return exp.OnCommitProperty(delete=True) 2349 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2350 2351 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2352 if self._match_text_seq("SQL", "DATA"): 2353 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2354 return None 2355 2356 def _parse_distkey(self) -> exp.DistKeyProperty: 2357 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2358 2359 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2360 table = self._parse_table(schema=True) 2361 2362 options = [] 2363 while self._match_texts(("INCLUDING", "EXCLUDING")): 2364 this = self._prev.text.upper() 2365 2366 id_var = self._parse_id_var() 2367 if not id_var: 2368 return None 2369 2370 options.append( 2371 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2372 ) 2373 2374 return self.expression(exp.LikeProperty, this=table, expressions=options) 2375 2376 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2377 return self.expression( 2378 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2379 ) 2380 2381 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2382 self._match(TokenType.EQ) 2383 return self.expression( 2384 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2385 ) 2386 2387 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2388 self._match_text_seq("WITH", "CONNECTION") 2389 return self.expression( 2390 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2391 ) 2392 2393 def _parse_returns(self) -> exp.ReturnsProperty: 2394 value: t.Optional[exp.Expression] 2395 null = None 2396 is_table = self._match(TokenType.TABLE) 2397 2398 if is_table: 2399 if self._match(TokenType.LT): 2400 value = self.expression( 2401 exp.Schema, 2402 this="TABLE", 2403 expressions=self._parse_csv(self._parse_struct_types), 2404 ) 2405 if not self._match(TokenType.GT): 2406 self.raise_error("Expecting >") 2407 else: 2408 value = self._parse_schema(exp.var("TABLE")) 2409 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2410 null = True 2411 value = None 2412 else: 2413 value = self._parse_types() 2414 2415 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2416 2417 def _parse_describe(self) -> exp.Describe: 2418 kind = self._match_set(self.CREATABLES) and self._prev.text 2419 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2420 if self._match(TokenType.DOT): 2421 style = None 2422 self._retreat(self._index - 2) 2423 this = self._parse_table(schema=True) 2424 properties = self._parse_properties() 2425 expressions = properties.expressions if properties else None 2426 return self.expression( 2427 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2428 ) 2429 2430 def _parse_insert(self) -> exp.Insert: 2431 comments = ensure_list(self._prev_comments) 2432 hint = self._parse_hint() 2433 overwrite = self._match(TokenType.OVERWRITE) 2434 ignore = self._match(TokenType.IGNORE) 2435 local = self._match_text_seq("LOCAL") 2436 alternative = None 2437 is_function = None 2438 2439 if self._match_text_seq("DIRECTORY"): 2440 this: t.Optional[exp.Expression] = self.expression( 2441 exp.Directory, 2442 this=self._parse_var_or_string(), 2443 local=local, 2444 row_format=self._parse_row_format(match_row=True), 2445 ) 2446 else: 2447 if self._match(TokenType.OR): 2448 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2449 2450 self._match(TokenType.INTO) 2451 comments += ensure_list(self._prev_comments) 2452 self._match(TokenType.TABLE) 2453 is_function = self._match(TokenType.FUNCTION) 2454 2455 this = ( 2456 self._parse_table(schema=True, parse_partition=True) 2457 if not is_function 2458 else self._parse_function() 2459 ) 2460 2461 returning = self._parse_returning() 2462 2463 return self.expression( 2464 exp.Insert, 2465 comments=comments, 2466 hint=hint, 2467 is_function=is_function, 2468 this=this, 2469 stored=self._match_text_seq("STORED") and self._parse_stored(), 2470 by_name=self._match_text_seq("BY", "NAME"), 2471 exists=self._parse_exists(), 2472 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2473 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2474 conflict=self._parse_on_conflict(), 2475 returning=returning or self._parse_returning(), 2476 overwrite=overwrite, 2477 alternative=alternative, 2478 ignore=ignore, 2479 ) 2480 2481 def _parse_kill(self) -> exp.Kill: 2482 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2483 2484 return self.expression( 2485 exp.Kill, 2486 this=self._parse_primary(), 2487 kind=kind, 2488 ) 2489 2490 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2491 conflict = self._match_text_seq("ON", "CONFLICT") 2492 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2493 2494 if not conflict and not duplicate: 2495 return None 2496 2497 conflict_keys = None 2498 constraint = None 2499 2500 if conflict: 2501 if self._match_text_seq("ON", "CONSTRAINT"): 2502 constraint = self._parse_id_var() 2503 elif self._match(TokenType.L_PAREN): 2504 conflict_keys = self._parse_csv(self._parse_id_var) 2505 self._match_r_paren() 2506 2507 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2508 if self._prev.token_type == TokenType.UPDATE: 2509 self._match(TokenType.SET) 2510 expressions = self._parse_csv(self._parse_equality) 2511 else: 2512 expressions = None 2513 2514 return self.expression( 2515 exp.OnConflict, 2516 duplicate=duplicate, 2517 expressions=expressions, 2518 action=action, 2519 conflict_keys=conflict_keys, 2520 constraint=constraint, 2521 ) 2522 2523 def _parse_returning(self) -> t.Optional[exp.Returning]: 2524 if not self._match(TokenType.RETURNING): 2525 return None 2526 return self.expression( 2527 exp.Returning, 2528 expressions=self._parse_csv(self._parse_expression), 2529 into=self._match(TokenType.INTO) and self._parse_table_part(), 2530 ) 2531 2532 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2533 if not self._match(TokenType.FORMAT): 2534 return None 2535 return self._parse_row_format() 2536 2537 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2538 index = self._index 2539 with_ = with_ or self._match_text_seq("WITH") 2540 2541 if not self._match(TokenType.SERDE_PROPERTIES): 2542 self._retreat(index) 2543 return None 2544 return self.expression( 2545 exp.SerdeProperties, 2546 **{ # type: ignore 2547 "expressions": self._parse_wrapped_properties(), 2548 "with": with_, 2549 }, 2550 ) 2551 2552 def _parse_row_format( 2553 self, match_row: bool = False 2554 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2555 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2556 return None 2557 2558 if self._match_text_seq("SERDE"): 2559 this = self._parse_string() 2560 2561 serde_properties = self._parse_serde_properties() 2562 2563 return self.expression( 2564 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2565 ) 2566 2567 self._match_text_seq("DELIMITED") 2568 2569 kwargs = {} 2570 2571 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2572 kwargs["fields"] = self._parse_string() 2573 if self._match_text_seq("ESCAPED", "BY"): 2574 kwargs["escaped"] = self._parse_string() 2575 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2576 kwargs["collection_items"] = self._parse_string() 2577 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2578 kwargs["map_keys"] = self._parse_string() 2579 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2580 kwargs["lines"] = self._parse_string() 2581 if self._match_text_seq("NULL", "DEFINED", "AS"): 2582 kwargs["null"] = self._parse_string() 2583 2584 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2585 2586 def _parse_load(self) -> exp.LoadData | exp.Command: 2587 if self._match_text_seq("DATA"): 2588 local = self._match_text_seq("LOCAL") 2589 self._match_text_seq("INPATH") 2590 inpath = self._parse_string() 2591 overwrite = self._match(TokenType.OVERWRITE) 2592 self._match_pair(TokenType.INTO, TokenType.TABLE) 2593 2594 return self.expression( 2595 exp.LoadData, 2596 this=self._parse_table(schema=True), 2597 local=local, 2598 overwrite=overwrite, 2599 inpath=inpath, 2600 partition=self._parse_partition(), 2601 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2602 serde=self._match_text_seq("SERDE") and self._parse_string(), 2603 ) 2604 return self._parse_as_command(self._prev) 2605 2606 def _parse_delete(self) -> exp.Delete: 2607 # This handles MySQL's "Multiple-Table Syntax" 2608 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2609 tables = None 2610 comments = self._prev_comments 2611 if not self._match(TokenType.FROM, advance=False): 2612 tables = self._parse_csv(self._parse_table) or None 2613 2614 returning = self._parse_returning() 2615 2616 return self.expression( 2617 exp.Delete, 2618 comments=comments, 2619 tables=tables, 2620 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2621 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2622 where=self._parse_where(), 2623 returning=returning or self._parse_returning(), 2624 limit=self._parse_limit(), 2625 ) 2626 2627 def _parse_update(self) -> exp.Update: 2628 comments = self._prev_comments 2629 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2630 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2631 returning = self._parse_returning() 2632 return self.expression( 2633 exp.Update, 2634 comments=comments, 2635 **{ # type: ignore 2636 "this": this, 2637 "expressions": expressions, 2638 "from": self._parse_from(joins=True), 2639 "where": self._parse_where(), 2640 "returning": returning or self._parse_returning(), 2641 "order": self._parse_order(), 2642 "limit": self._parse_limit(), 2643 }, 2644 ) 2645 2646 def _parse_uncache(self) -> exp.Uncache: 2647 if not self._match(TokenType.TABLE): 2648 self.raise_error("Expecting TABLE after UNCACHE") 2649 2650 return self.expression( 2651 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2652 ) 2653 2654 def _parse_cache(self) -> exp.Cache: 2655 lazy = self._match_text_seq("LAZY") 2656 self._match(TokenType.TABLE) 2657 table = self._parse_table(schema=True) 2658 2659 options = [] 2660 if self._match_text_seq("OPTIONS"): 2661 self._match_l_paren() 2662 k = self._parse_string() 2663 self._match(TokenType.EQ) 2664 v = self._parse_string() 2665 options = [k, v] 2666 self._match_r_paren() 2667 2668 self._match(TokenType.ALIAS) 2669 return self.expression( 2670 exp.Cache, 2671 this=table, 2672 lazy=lazy, 2673 options=options, 2674 expression=self._parse_select(nested=True), 2675 ) 2676 2677 def _parse_partition(self) -> t.Optional[exp.Partition]: 2678 if not self._match(TokenType.PARTITION): 2679 return None 2680 2681 return self.expression( 2682 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2683 ) 2684 2685 def _parse_value(self) -> t.Optional[exp.Tuple]: 2686 if self._match(TokenType.L_PAREN): 2687 expressions = self._parse_csv(self._parse_expression) 2688 self._match_r_paren() 2689 return self.expression(exp.Tuple, expressions=expressions) 2690 2691 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2692 expression = self._parse_expression() 2693 if expression: 2694 return self.expression(exp.Tuple, expressions=[expression]) 2695 return None 2696 2697 def _parse_projections(self) -> t.List[exp.Expression]: 2698 return self._parse_expressions() 2699 2700 def _parse_select( 2701 self, 2702 nested: bool = False, 2703 table: bool = False, 2704 parse_subquery_alias: bool = True, 2705 parse_set_operation: bool = True, 2706 ) -> t.Optional[exp.Expression]: 2707 cte = self._parse_with() 2708 2709 if cte: 2710 this = self._parse_statement() 2711 2712 if not this: 2713 self.raise_error("Failed to parse any statement following CTE") 2714 return cte 2715 2716 if "with" in this.arg_types: 2717 this.set("with", cte) 2718 else: 2719 self.raise_error(f"{this.key} does not support CTE") 2720 this = cte 2721 2722 return this 2723 2724 # duckdb supports leading with FROM x 2725 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2726 2727 if self._match(TokenType.SELECT): 2728 comments = self._prev_comments 2729 2730 hint = self._parse_hint() 2731 all_ = self._match(TokenType.ALL) 2732 distinct = self._match_set(self.DISTINCT_TOKENS) 2733 2734 kind = ( 2735 self._match(TokenType.ALIAS) 2736 and self._match_texts(("STRUCT", "VALUE")) 2737 and self._prev.text.upper() 2738 ) 2739 2740 if distinct: 2741 distinct = self.expression( 2742 exp.Distinct, 2743 on=self._parse_value() if self._match(TokenType.ON) else None, 2744 ) 2745 2746 if all_ and distinct: 2747 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2748 2749 limit = self._parse_limit(top=True) 2750 projections = self._parse_projections() 2751 2752 this = self.expression( 2753 exp.Select, 2754 kind=kind, 2755 hint=hint, 2756 distinct=distinct, 2757 expressions=projections, 2758 limit=limit, 2759 ) 2760 this.comments = comments 2761 2762 into = self._parse_into() 2763 if into: 2764 this.set("into", into) 2765 2766 if not from_: 2767 from_ = self._parse_from() 2768 2769 if from_: 2770 this.set("from", from_) 2771 2772 this = self._parse_query_modifiers(this) 2773 elif (table or nested) and self._match(TokenType.L_PAREN): 2774 if self._match(TokenType.PIVOT): 2775 this = self._parse_simplified_pivot() 2776 elif self._match(TokenType.FROM): 2777 this = exp.select("*").from_( 2778 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2779 ) 2780 else: 2781 this = ( 2782 self._parse_table() 2783 if table 2784 else self._parse_select(nested=True, parse_set_operation=False) 2785 ) 2786 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2787 2788 self._match_r_paren() 2789 2790 # We return early here so that the UNION isn't attached to the subquery by the 2791 # following call to _parse_set_operations, but instead becomes the parent node 2792 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2793 elif self._match(TokenType.VALUES, advance=False): 2794 this = self._parse_derived_table_values() 2795 elif from_: 2796 this = exp.select("*").from_(from_.this, copy=False) 2797 else: 2798 this = None 2799 2800 if parse_set_operation: 2801 return self._parse_set_operations(this) 2802 return this 2803 2804 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2805 if not skip_with_token and not self._match(TokenType.WITH): 2806 return None 2807 2808 comments = self._prev_comments 2809 recursive = self._match(TokenType.RECURSIVE) 2810 2811 expressions = [] 2812 while True: 2813 expressions.append(self._parse_cte()) 2814 2815 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2816 break 2817 else: 2818 self._match(TokenType.WITH) 2819 2820 return self.expression( 2821 exp.With, comments=comments, expressions=expressions, recursive=recursive 2822 ) 2823 2824 def _parse_cte(self) -> exp.CTE: 2825 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2826 if not alias or not alias.this: 2827 self.raise_error("Expected CTE to have alias") 2828 2829 self._match(TokenType.ALIAS) 2830 2831 if self._match_text_seq("NOT", "MATERIALIZED"): 2832 materialized = False 2833 elif self._match_text_seq("MATERIALIZED"): 2834 materialized = True 2835 else: 2836 materialized = None 2837 2838 return self.expression( 2839 exp.CTE, 2840 this=self._parse_wrapped(self._parse_statement), 2841 alias=alias, 2842 materialized=materialized, 2843 ) 2844 2845 def _parse_table_alias( 2846 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2847 ) -> t.Optional[exp.TableAlias]: 2848 any_token = self._match(TokenType.ALIAS) 2849 alias = ( 2850 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2851 or self._parse_string_as_identifier() 2852 ) 2853 2854 index = self._index 2855 if self._match(TokenType.L_PAREN): 2856 columns = self._parse_csv(self._parse_function_parameter) 2857 self._match_r_paren() if columns else self._retreat(index) 2858 else: 2859 columns = None 2860 2861 if not alias and not columns: 2862 return None 2863 2864 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 2865 2866 # We bubble up comments from the Identifier to the TableAlias 2867 if isinstance(alias, exp.Identifier): 2868 table_alias.add_comments(alias.pop_comments()) 2869 2870 return table_alias 2871 2872 def _parse_subquery( 2873 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2874 ) -> t.Optional[exp.Subquery]: 2875 if not this: 2876 return None 2877 2878 return self.expression( 2879 exp.Subquery, 2880 this=this, 2881 pivots=self._parse_pivots(), 2882 alias=self._parse_table_alias() if parse_alias else None, 2883 ) 2884 2885 def _implicit_unnests_to_explicit(self, this: E) -> E: 2886 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2887 2888 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2889 for i, join in enumerate(this.args.get("joins") or []): 2890 table = join.this 2891 normalized_table = table.copy() 2892 normalized_table.meta["maybe_column"] = True 2893 normalized_table = _norm(normalized_table, dialect=self.dialect) 2894 2895 if isinstance(table, exp.Table) and not join.args.get("on"): 2896 if normalized_table.parts[0].name in refs: 2897 table_as_column = table.to_column() 2898 unnest = exp.Unnest(expressions=[table_as_column]) 2899 2900 # Table.to_column creates a parent Alias node that we want to convert to 2901 # a TableAlias and attach to the Unnest, so it matches the parser's output 2902 if isinstance(table.args.get("alias"), exp.TableAlias): 2903 table_as_column.replace(table_as_column.this) 2904 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2905 2906 table.replace(unnest) 2907 2908 refs.add(normalized_table.alias_or_name) 2909 2910 return this 2911 2912 def _parse_query_modifiers( 2913 self, this: t.Optional[exp.Expression] 2914 ) -> t.Optional[exp.Expression]: 2915 if isinstance(this, (exp.Query, exp.Table)): 2916 for join in self._parse_joins(): 2917 this.append("joins", join) 2918 for lateral in iter(self._parse_lateral, None): 2919 this.append("laterals", lateral) 2920 2921 while True: 2922 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2923 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2924 key, expression = parser(self) 2925 2926 if expression: 2927 this.set(key, expression) 2928 if key == "limit": 2929 offset = expression.args.pop("offset", None) 2930 2931 if offset: 2932 offset = exp.Offset(expression=offset) 2933 this.set("offset", offset) 2934 2935 limit_by_expressions = expression.expressions 2936 expression.set("expressions", None) 2937 offset.set("expressions", limit_by_expressions) 2938 continue 2939 break 2940 2941 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 2942 this = self._implicit_unnests_to_explicit(this) 2943 2944 return this 2945 2946 def _parse_hint(self) -> t.Optional[exp.Hint]: 2947 if self._match(TokenType.HINT): 2948 hints = [] 2949 for hint in iter( 2950 lambda: self._parse_csv( 2951 lambda: self._parse_function() or self._parse_var(upper=True) 2952 ), 2953 [], 2954 ): 2955 hints.extend(hint) 2956 2957 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2958 self.raise_error("Expected */ after HINT") 2959 2960 return self.expression(exp.Hint, expressions=hints) 2961 2962 return None 2963 2964 def _parse_into(self) -> t.Optional[exp.Into]: 2965 if not self._match(TokenType.INTO): 2966 return None 2967 2968 temp = self._match(TokenType.TEMPORARY) 2969 unlogged = self._match_text_seq("UNLOGGED") 2970 self._match(TokenType.TABLE) 2971 2972 return self.expression( 2973 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2974 ) 2975 2976 def _parse_from( 2977 self, joins: bool = False, skip_from_token: bool = False 2978 ) -> t.Optional[exp.From]: 2979 if not skip_from_token and not self._match(TokenType.FROM): 2980 return None 2981 2982 return self.expression( 2983 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2984 ) 2985 2986 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2987 return self.expression( 2988 exp.MatchRecognizeMeasure, 2989 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2990 this=self._parse_expression(), 2991 ) 2992 2993 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2994 if not self._match(TokenType.MATCH_RECOGNIZE): 2995 return None 2996 2997 self._match_l_paren() 2998 2999 partition = self._parse_partition_by() 3000 order = self._parse_order() 3001 3002 measures = ( 3003 self._parse_csv(self._parse_match_recognize_measure) 3004 if self._match_text_seq("MEASURES") 3005 else None 3006 ) 3007 3008 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3009 rows = exp.var("ONE ROW PER MATCH") 3010 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3011 text = "ALL ROWS PER MATCH" 3012 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3013 text += " SHOW EMPTY MATCHES" 3014 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3015 text += " OMIT EMPTY MATCHES" 3016 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3017 text += " WITH UNMATCHED ROWS" 3018 rows = exp.var(text) 3019 else: 3020 rows = None 3021 3022 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3023 text = "AFTER MATCH SKIP" 3024 if self._match_text_seq("PAST", "LAST", "ROW"): 3025 text += " PAST LAST ROW" 3026 elif self._match_text_seq("TO", "NEXT", "ROW"): 3027 text += " TO NEXT ROW" 3028 elif self._match_text_seq("TO", "FIRST"): 3029 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3030 elif self._match_text_seq("TO", "LAST"): 3031 text += f" TO LAST {self._advance_any().text}" # type: ignore 3032 after = exp.var(text) 3033 else: 3034 after = None 3035 3036 if self._match_text_seq("PATTERN"): 3037 self._match_l_paren() 3038 3039 if not self._curr: 3040 self.raise_error("Expecting )", self._curr) 3041 3042 paren = 1 3043 start = self._curr 3044 3045 while self._curr and paren > 0: 3046 if self._curr.token_type == TokenType.L_PAREN: 3047 paren += 1 3048 if self._curr.token_type == TokenType.R_PAREN: 3049 paren -= 1 3050 3051 end = self._prev 3052 self._advance() 3053 3054 if paren > 0: 3055 self.raise_error("Expecting )", self._curr) 3056 3057 pattern = exp.var(self._find_sql(start, end)) 3058 else: 3059 pattern = None 3060 3061 define = ( 3062 self._parse_csv(self._parse_name_as_expression) 3063 if self._match_text_seq("DEFINE") 3064 else None 3065 ) 3066 3067 self._match_r_paren() 3068 3069 return self.expression( 3070 exp.MatchRecognize, 3071 partition_by=partition, 3072 order=order, 3073 measures=measures, 3074 rows=rows, 3075 after=after, 3076 pattern=pattern, 3077 define=define, 3078 alias=self._parse_table_alias(), 3079 ) 3080 3081 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3082 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3083 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3084 cross_apply = False 3085 3086 if cross_apply is not None: 3087 this = self._parse_select(table=True) 3088 view = None 3089 outer = None 3090 elif self._match(TokenType.LATERAL): 3091 this = self._parse_select(table=True) 3092 view = self._match(TokenType.VIEW) 3093 outer = self._match(TokenType.OUTER) 3094 else: 3095 return None 3096 3097 if not this: 3098 this = ( 3099 self._parse_unnest() 3100 or self._parse_function() 3101 or self._parse_id_var(any_token=False) 3102 ) 3103 3104 while self._match(TokenType.DOT): 3105 this = exp.Dot( 3106 this=this, 3107 expression=self._parse_function() or self._parse_id_var(any_token=False), 3108 ) 3109 3110 if view: 3111 table = self._parse_id_var(any_token=False) 3112 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3113 table_alias: t.Optional[exp.TableAlias] = self.expression( 3114 exp.TableAlias, this=table, columns=columns 3115 ) 3116 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3117 # We move the alias from the lateral's child node to the lateral itself 3118 table_alias = this.args["alias"].pop() 3119 else: 3120 table_alias = self._parse_table_alias() 3121 3122 return self.expression( 3123 exp.Lateral, 3124 this=this, 3125 view=view, 3126 outer=outer, 3127 alias=table_alias, 3128 cross_apply=cross_apply, 3129 ) 3130 3131 def _parse_join_parts( 3132 self, 3133 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3134 return ( 3135 self._match_set(self.JOIN_METHODS) and self._prev, 3136 self._match_set(self.JOIN_SIDES) and self._prev, 3137 self._match_set(self.JOIN_KINDS) and self._prev, 3138 ) 3139 3140 def _parse_join( 3141 self, skip_join_token: bool = False, parse_bracket: bool = False 3142 ) -> t.Optional[exp.Join]: 3143 if self._match(TokenType.COMMA): 3144 return self.expression(exp.Join, this=self._parse_table()) 3145 3146 index = self._index 3147 method, side, kind = self._parse_join_parts() 3148 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3149 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3150 3151 if not skip_join_token and not join: 3152 self._retreat(index) 3153 kind = None 3154 method = None 3155 side = None 3156 3157 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3158 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3159 3160 if not skip_join_token and not join and not outer_apply and not cross_apply: 3161 return None 3162 3163 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3164 3165 if method: 3166 kwargs["method"] = method.text 3167 if side: 3168 kwargs["side"] = side.text 3169 if kind: 3170 kwargs["kind"] = kind.text 3171 if hint: 3172 kwargs["hint"] = hint 3173 3174 if self._match(TokenType.MATCH_CONDITION): 3175 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3176 3177 if self._match(TokenType.ON): 3178 kwargs["on"] = self._parse_assignment() 3179 elif self._match(TokenType.USING): 3180 kwargs["using"] = self._parse_wrapped_id_vars() 3181 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3182 kind and kind.token_type == TokenType.CROSS 3183 ): 3184 index = self._index 3185 joins: t.Optional[list] = list(self._parse_joins()) 3186 3187 if joins and self._match(TokenType.ON): 3188 kwargs["on"] = self._parse_assignment() 3189 elif joins and self._match(TokenType.USING): 3190 kwargs["using"] = self._parse_wrapped_id_vars() 3191 else: 3192 joins = None 3193 self._retreat(index) 3194 3195 kwargs["this"].set("joins", joins if joins else None) 3196 3197 comments = [c for token in (method, side, kind) if token for c in token.comments] 3198 return self.expression(exp.Join, comments=comments, **kwargs) 3199 3200 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3201 this = self._parse_assignment() 3202 3203 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3204 return this 3205 3206 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3207 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3208 3209 return this 3210 3211 def _parse_index_params(self) -> exp.IndexParameters: 3212 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3213 3214 if self._match(TokenType.L_PAREN, advance=False): 3215 columns = self._parse_wrapped_csv(self._parse_with_operator) 3216 else: 3217 columns = None 3218 3219 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3220 partition_by = self._parse_partition_by() 3221 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3222 tablespace = ( 3223 self._parse_var(any_token=True) 3224 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3225 else None 3226 ) 3227 where = self._parse_where() 3228 3229 on = self._parse_field() if self._match(TokenType.ON) else None 3230 3231 return self.expression( 3232 exp.IndexParameters, 3233 using=using, 3234 columns=columns, 3235 include=include, 3236 partition_by=partition_by, 3237 where=where, 3238 with_storage=with_storage, 3239 tablespace=tablespace, 3240 on=on, 3241 ) 3242 3243 def _parse_index( 3244 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3245 ) -> t.Optional[exp.Index]: 3246 if index or anonymous: 3247 unique = None 3248 primary = None 3249 amp = None 3250 3251 self._match(TokenType.ON) 3252 self._match(TokenType.TABLE) # hive 3253 table = self._parse_table_parts(schema=True) 3254 else: 3255 unique = self._match(TokenType.UNIQUE) 3256 primary = self._match_text_seq("PRIMARY") 3257 amp = self._match_text_seq("AMP") 3258 3259 if not self._match(TokenType.INDEX): 3260 return None 3261 3262 index = self._parse_id_var() 3263 table = None 3264 3265 params = self._parse_index_params() 3266 3267 return self.expression( 3268 exp.Index, 3269 this=index, 3270 table=table, 3271 unique=unique, 3272 primary=primary, 3273 amp=amp, 3274 params=params, 3275 ) 3276 3277 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3278 hints: t.List[exp.Expression] = [] 3279 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3280 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3281 hints.append( 3282 self.expression( 3283 exp.WithTableHint, 3284 expressions=self._parse_csv( 3285 lambda: self._parse_function() or self._parse_var(any_token=True) 3286 ), 3287 ) 3288 ) 3289 self._match_r_paren() 3290 else: 3291 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3292 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3293 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3294 3295 self._match_set((TokenType.INDEX, TokenType.KEY)) 3296 if self._match(TokenType.FOR): 3297 hint.set("target", self._advance_any() and self._prev.text.upper()) 3298 3299 hint.set("expressions", self._parse_wrapped_id_vars()) 3300 hints.append(hint) 3301 3302 return hints or None 3303 3304 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3305 return ( 3306 (not schema and self._parse_function(optional_parens=False)) 3307 or self._parse_id_var(any_token=False) 3308 or self._parse_string_as_identifier() 3309 or self._parse_placeholder() 3310 ) 3311 3312 def _parse_table_parts( 3313 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3314 ) -> exp.Table: 3315 catalog = None 3316 db = None 3317 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3318 3319 while self._match(TokenType.DOT): 3320 if catalog: 3321 # This allows nesting the table in arbitrarily many dot expressions if needed 3322 table = self.expression( 3323 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3324 ) 3325 else: 3326 catalog = db 3327 db = table 3328 # "" used for tsql FROM a..b case 3329 table = self._parse_table_part(schema=schema) or "" 3330 3331 if ( 3332 wildcard 3333 and self._is_connected() 3334 and (isinstance(table, exp.Identifier) or not table) 3335 and self._match(TokenType.STAR) 3336 ): 3337 if isinstance(table, exp.Identifier): 3338 table.args["this"] += "*" 3339 else: 3340 table = exp.Identifier(this="*") 3341 3342 # We bubble up comments from the Identifier to the Table 3343 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3344 3345 if is_db_reference: 3346 catalog = db 3347 db = table 3348 table = None 3349 3350 if not table and not is_db_reference: 3351 self.raise_error(f"Expected table name but got {self._curr}") 3352 if not db and is_db_reference: 3353 self.raise_error(f"Expected database name but got {self._curr}") 3354 3355 return self.expression( 3356 exp.Table, 3357 comments=comments, 3358 this=table, 3359 db=db, 3360 catalog=catalog, 3361 pivots=self._parse_pivots(), 3362 ) 3363 3364 def _parse_table( 3365 self, 3366 schema: bool = False, 3367 joins: bool = False, 3368 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3369 parse_bracket: bool = False, 3370 is_db_reference: bool = False, 3371 parse_partition: bool = False, 3372 ) -> t.Optional[exp.Expression]: 3373 lateral = self._parse_lateral() 3374 if lateral: 3375 return lateral 3376 3377 unnest = self._parse_unnest() 3378 if unnest: 3379 return unnest 3380 3381 values = self._parse_derived_table_values() 3382 if values: 3383 return values 3384 3385 subquery = self._parse_select(table=True) 3386 if subquery: 3387 if not subquery.args.get("pivots"): 3388 subquery.set("pivots", self._parse_pivots()) 3389 return subquery 3390 3391 bracket = parse_bracket and self._parse_bracket(None) 3392 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3393 3394 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3395 self._parse_table 3396 ) 3397 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3398 3399 only = self._match(TokenType.ONLY) 3400 3401 this = t.cast( 3402 exp.Expression, 3403 bracket 3404 or rows_from 3405 or self._parse_bracket( 3406 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3407 ), 3408 ) 3409 3410 if only: 3411 this.set("only", only) 3412 3413 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3414 self._match_text_seq("*") 3415 3416 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3417 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3418 this.set("partition", self._parse_partition()) 3419 3420 if schema: 3421 return self._parse_schema(this=this) 3422 3423 version = self._parse_version() 3424 3425 if version: 3426 this.set("version", version) 3427 3428 if self.dialect.ALIAS_POST_TABLESAMPLE: 3429 table_sample = self._parse_table_sample() 3430 3431 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3432 if alias: 3433 this.set("alias", alias) 3434 3435 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3436 return self.expression( 3437 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3438 ) 3439 3440 this.set("hints", self._parse_table_hints()) 3441 3442 if not this.args.get("pivots"): 3443 this.set("pivots", self._parse_pivots()) 3444 3445 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3446 table_sample = self._parse_table_sample() 3447 3448 if table_sample: 3449 table_sample.set("this", this) 3450 this = table_sample 3451 3452 if joins: 3453 for join in self._parse_joins(): 3454 this.append("joins", join) 3455 3456 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3457 this.set("ordinality", True) 3458 this.set("alias", self._parse_table_alias()) 3459 3460 return this 3461 3462 def _parse_version(self) -> t.Optional[exp.Version]: 3463 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3464 this = "TIMESTAMP" 3465 elif self._match(TokenType.VERSION_SNAPSHOT): 3466 this = "VERSION" 3467 else: 3468 return None 3469 3470 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3471 kind = self._prev.text.upper() 3472 start = self._parse_bitwise() 3473 self._match_texts(("TO", "AND")) 3474 end = self._parse_bitwise() 3475 expression: t.Optional[exp.Expression] = self.expression( 3476 exp.Tuple, expressions=[start, end] 3477 ) 3478 elif self._match_text_seq("CONTAINED", "IN"): 3479 kind = "CONTAINED IN" 3480 expression = self.expression( 3481 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3482 ) 3483 elif self._match(TokenType.ALL): 3484 kind = "ALL" 3485 expression = None 3486 else: 3487 self._match_text_seq("AS", "OF") 3488 kind = "AS OF" 3489 expression = self._parse_type() 3490 3491 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3492 3493 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3494 if not self._match(TokenType.UNNEST): 3495 return None 3496 3497 expressions = self._parse_wrapped_csv(self._parse_equality) 3498 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3499 3500 alias = self._parse_table_alias() if with_alias else None 3501 3502 if alias: 3503 if self.dialect.UNNEST_COLUMN_ONLY: 3504 if alias.args.get("columns"): 3505 self.raise_error("Unexpected extra column alias in unnest.") 3506 3507 alias.set("columns", [alias.this]) 3508 alias.set("this", None) 3509 3510 columns = alias.args.get("columns") or [] 3511 if offset and len(expressions) < len(columns): 3512 offset = columns.pop() 3513 3514 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3515 self._match(TokenType.ALIAS) 3516 offset = self._parse_id_var( 3517 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3518 ) or exp.to_identifier("offset") 3519 3520 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3521 3522 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3523 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3524 if not is_derived and not self._match_text_seq("VALUES"): 3525 return None 3526 3527 expressions = self._parse_csv(self._parse_value) 3528 alias = self._parse_table_alias() 3529 3530 if is_derived: 3531 self._match_r_paren() 3532 3533 return self.expression( 3534 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3535 ) 3536 3537 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3538 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3539 as_modifier and self._match_text_seq("USING", "SAMPLE") 3540 ): 3541 return None 3542 3543 bucket_numerator = None 3544 bucket_denominator = None 3545 bucket_field = None 3546 percent = None 3547 size = None 3548 seed = None 3549 3550 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3551 matched_l_paren = self._match(TokenType.L_PAREN) 3552 3553 if self.TABLESAMPLE_CSV: 3554 num = None 3555 expressions = self._parse_csv(self._parse_primary) 3556 else: 3557 expressions = None 3558 num = ( 3559 self._parse_factor() 3560 if self._match(TokenType.NUMBER, advance=False) 3561 else self._parse_primary() or self._parse_placeholder() 3562 ) 3563 3564 if self._match_text_seq("BUCKET"): 3565 bucket_numerator = self._parse_number() 3566 self._match_text_seq("OUT", "OF") 3567 bucket_denominator = bucket_denominator = self._parse_number() 3568 self._match(TokenType.ON) 3569 bucket_field = self._parse_field() 3570 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3571 percent = num 3572 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3573 size = num 3574 else: 3575 percent = num 3576 3577 if matched_l_paren: 3578 self._match_r_paren() 3579 3580 if self._match(TokenType.L_PAREN): 3581 method = self._parse_var(upper=True) 3582 seed = self._match(TokenType.COMMA) and self._parse_number() 3583 self._match_r_paren() 3584 elif self._match_texts(("SEED", "REPEATABLE")): 3585 seed = self._parse_wrapped(self._parse_number) 3586 3587 if not method and self.DEFAULT_SAMPLING_METHOD: 3588 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3589 3590 return self.expression( 3591 exp.TableSample, 3592 expressions=expressions, 3593 method=method, 3594 bucket_numerator=bucket_numerator, 3595 bucket_denominator=bucket_denominator, 3596 bucket_field=bucket_field, 3597 percent=percent, 3598 size=size, 3599 seed=seed, 3600 ) 3601 3602 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3603 return list(iter(self._parse_pivot, None)) or None 3604 3605 def _parse_joins(self) -> t.Iterator[exp.Join]: 3606 return iter(self._parse_join, None) 3607 3608 # https://duckdb.org/docs/sql/statements/pivot 3609 def _parse_simplified_pivot(self) -> exp.Pivot: 3610 def _parse_on() -> t.Optional[exp.Expression]: 3611 this = self._parse_bitwise() 3612 return self._parse_in(this) if self._match(TokenType.IN) else this 3613 3614 this = self._parse_table() 3615 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3616 using = self._match(TokenType.USING) and self._parse_csv( 3617 lambda: self._parse_alias(self._parse_function()) 3618 ) 3619 group = self._parse_group() 3620 return self.expression( 3621 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3622 ) 3623 3624 def _parse_pivot_in(self) -> exp.In: 3625 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3626 this = self._parse_assignment() 3627 3628 self._match(TokenType.ALIAS) 3629 alias = self._parse_field() 3630 if alias: 3631 return self.expression(exp.PivotAlias, this=this, alias=alias) 3632 3633 return this 3634 3635 value = self._parse_column() 3636 3637 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3638 self.raise_error("Expecting IN (") 3639 3640 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3641 3642 self._match_r_paren() 3643 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3644 3645 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3646 index = self._index 3647 include_nulls = None 3648 3649 if self._match(TokenType.PIVOT): 3650 unpivot = False 3651 elif self._match(TokenType.UNPIVOT): 3652 unpivot = True 3653 3654 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3655 if self._match_text_seq("INCLUDE", "NULLS"): 3656 include_nulls = True 3657 elif self._match_text_seq("EXCLUDE", "NULLS"): 3658 include_nulls = False 3659 else: 3660 return None 3661 3662 expressions = [] 3663 3664 if not self._match(TokenType.L_PAREN): 3665 self._retreat(index) 3666 return None 3667 3668 if unpivot: 3669 expressions = self._parse_csv(self._parse_column) 3670 else: 3671 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3672 3673 if not expressions: 3674 self.raise_error("Failed to parse PIVOT's aggregation list") 3675 3676 if not self._match(TokenType.FOR): 3677 self.raise_error("Expecting FOR") 3678 3679 field = self._parse_pivot_in() 3680 3681 self._match_r_paren() 3682 3683 pivot = self.expression( 3684 exp.Pivot, 3685 expressions=expressions, 3686 field=field, 3687 unpivot=unpivot, 3688 include_nulls=include_nulls, 3689 ) 3690 3691 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3692 pivot.set("alias", self._parse_table_alias()) 3693 3694 if not unpivot: 3695 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3696 3697 columns: t.List[exp.Expression] = [] 3698 for fld in pivot.args["field"].expressions: 3699 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3700 for name in names: 3701 if self.PREFIXED_PIVOT_COLUMNS: 3702 name = f"{name}_{field_name}" if name else field_name 3703 else: 3704 name = f"{field_name}_{name}" if name else field_name 3705 3706 columns.append(exp.to_identifier(name)) 3707 3708 pivot.set("columns", columns) 3709 3710 return pivot 3711 3712 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3713 return [agg.alias for agg in aggregations] 3714 3715 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3716 if not skip_where_token and not self._match(TokenType.PREWHERE): 3717 return None 3718 3719 return self.expression( 3720 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 3721 ) 3722 3723 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3724 if not skip_where_token and not self._match(TokenType.WHERE): 3725 return None 3726 3727 return self.expression( 3728 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 3729 ) 3730 3731 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3732 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3733 return None 3734 3735 elements: t.Dict[str, t.Any] = defaultdict(list) 3736 3737 if self._match(TokenType.ALL): 3738 elements["all"] = True 3739 elif self._match(TokenType.DISTINCT): 3740 elements["all"] = False 3741 3742 while True: 3743 expressions = self._parse_csv( 3744 lambda: None 3745 if self._match(TokenType.ROLLUP, advance=False) 3746 else self._parse_assignment() 3747 ) 3748 if expressions: 3749 elements["expressions"].extend(expressions) 3750 3751 grouping_sets = self._parse_grouping_sets() 3752 if grouping_sets: 3753 elements["grouping_sets"].extend(grouping_sets) 3754 3755 rollup = None 3756 cube = None 3757 totals = None 3758 3759 index = self._index 3760 with_ = self._match(TokenType.WITH) 3761 if self._match(TokenType.ROLLUP): 3762 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3763 elements["rollup"].extend(ensure_list(rollup)) 3764 3765 if self._match(TokenType.CUBE): 3766 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3767 elements["cube"].extend(ensure_list(cube)) 3768 3769 if self._match_text_seq("TOTALS"): 3770 totals = True 3771 elements["totals"] = True # type: ignore 3772 3773 if not (grouping_sets or rollup or cube or totals): 3774 if with_: 3775 self._retreat(index) 3776 break 3777 3778 return self.expression(exp.Group, **elements) # type: ignore 3779 3780 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3781 if not self._match(TokenType.GROUPING_SETS): 3782 return None 3783 3784 return self._parse_wrapped_csv(self._parse_grouping_set) 3785 3786 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3787 if self._match(TokenType.L_PAREN): 3788 grouping_set = self._parse_csv(self._parse_column) 3789 self._match_r_paren() 3790 return self.expression(exp.Tuple, expressions=grouping_set) 3791 3792 return self._parse_column() 3793 3794 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3795 if not skip_having_token and not self._match(TokenType.HAVING): 3796 return None 3797 return self.expression(exp.Having, this=self._parse_assignment()) 3798 3799 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3800 if not self._match(TokenType.QUALIFY): 3801 return None 3802 return self.expression(exp.Qualify, this=self._parse_assignment()) 3803 3804 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3805 if skip_start_token: 3806 start = None 3807 elif self._match(TokenType.START_WITH): 3808 start = self._parse_assignment() 3809 else: 3810 return None 3811 3812 self._match(TokenType.CONNECT_BY) 3813 nocycle = self._match_text_seq("NOCYCLE") 3814 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3815 exp.Prior, this=self._parse_bitwise() 3816 ) 3817 connect = self._parse_assignment() 3818 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3819 3820 if not start and self._match(TokenType.START_WITH): 3821 start = self._parse_assignment() 3822 3823 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3824 3825 def _parse_name_as_expression(self) -> exp.Alias: 3826 return self.expression( 3827 exp.Alias, 3828 alias=self._parse_id_var(any_token=True), 3829 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 3830 ) 3831 3832 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3833 if self._match_text_seq("INTERPOLATE"): 3834 return self._parse_wrapped_csv(self._parse_name_as_expression) 3835 return None 3836 3837 def _parse_order( 3838 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3839 ) -> t.Optional[exp.Expression]: 3840 siblings = None 3841 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3842 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3843 return this 3844 3845 siblings = True 3846 3847 return self.expression( 3848 exp.Order, 3849 this=this, 3850 expressions=self._parse_csv(self._parse_ordered), 3851 interpolate=self._parse_interpolate(), 3852 siblings=siblings, 3853 ) 3854 3855 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3856 if not self._match(token): 3857 return None 3858 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3859 3860 def _parse_ordered( 3861 self, parse_method: t.Optional[t.Callable] = None 3862 ) -> t.Optional[exp.Ordered]: 3863 this = parse_method() if parse_method else self._parse_assignment() 3864 if not this: 3865 return None 3866 3867 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 3868 this = exp.var("ALL") 3869 3870 asc = self._match(TokenType.ASC) 3871 desc = self._match(TokenType.DESC) or (asc and False) 3872 3873 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3874 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3875 3876 nulls_first = is_nulls_first or False 3877 explicitly_null_ordered = is_nulls_first or is_nulls_last 3878 3879 if ( 3880 not explicitly_null_ordered 3881 and ( 3882 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3883 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3884 ) 3885 and self.dialect.NULL_ORDERING != "nulls_are_last" 3886 ): 3887 nulls_first = True 3888 3889 if self._match_text_seq("WITH", "FILL"): 3890 with_fill = self.expression( 3891 exp.WithFill, 3892 **{ # type: ignore 3893 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3894 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3895 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3896 }, 3897 ) 3898 else: 3899 with_fill = None 3900 3901 return self.expression( 3902 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3903 ) 3904 3905 def _parse_limit( 3906 self, 3907 this: t.Optional[exp.Expression] = None, 3908 top: bool = False, 3909 skip_limit_token: bool = False, 3910 ) -> t.Optional[exp.Expression]: 3911 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3912 comments = self._prev_comments 3913 if top: 3914 limit_paren = self._match(TokenType.L_PAREN) 3915 expression = self._parse_term() if limit_paren else self._parse_number() 3916 3917 if limit_paren: 3918 self._match_r_paren() 3919 else: 3920 expression = self._parse_term() 3921 3922 if self._match(TokenType.COMMA): 3923 offset = expression 3924 expression = self._parse_term() 3925 else: 3926 offset = None 3927 3928 limit_exp = self.expression( 3929 exp.Limit, 3930 this=this, 3931 expression=expression, 3932 offset=offset, 3933 comments=comments, 3934 expressions=self._parse_limit_by(), 3935 ) 3936 3937 return limit_exp 3938 3939 if self._match(TokenType.FETCH): 3940 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3941 direction = self._prev.text.upper() if direction else "FIRST" 3942 3943 count = self._parse_field(tokens=self.FETCH_TOKENS) 3944 percent = self._match(TokenType.PERCENT) 3945 3946 self._match_set((TokenType.ROW, TokenType.ROWS)) 3947 3948 only = self._match_text_seq("ONLY") 3949 with_ties = self._match_text_seq("WITH", "TIES") 3950 3951 if only and with_ties: 3952 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3953 3954 return self.expression( 3955 exp.Fetch, 3956 direction=direction, 3957 count=count, 3958 percent=percent, 3959 with_ties=with_ties, 3960 ) 3961 3962 return this 3963 3964 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3965 if not self._match(TokenType.OFFSET): 3966 return this 3967 3968 count = self._parse_term() 3969 self._match_set((TokenType.ROW, TokenType.ROWS)) 3970 3971 return self.expression( 3972 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3973 ) 3974 3975 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3976 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3977 3978 def _parse_locks(self) -> t.List[exp.Lock]: 3979 locks = [] 3980 while True: 3981 if self._match_text_seq("FOR", "UPDATE"): 3982 update = True 3983 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3984 "LOCK", "IN", "SHARE", "MODE" 3985 ): 3986 update = False 3987 else: 3988 break 3989 3990 expressions = None 3991 if self._match_text_seq("OF"): 3992 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3993 3994 wait: t.Optional[bool | exp.Expression] = None 3995 if self._match_text_seq("NOWAIT"): 3996 wait = True 3997 elif self._match_text_seq("WAIT"): 3998 wait = self._parse_primary() 3999 elif self._match_text_seq("SKIP", "LOCKED"): 4000 wait = False 4001 4002 locks.append( 4003 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4004 ) 4005 4006 return locks 4007 4008 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4009 while this and self._match_set(self.SET_OPERATIONS): 4010 token_type = self._prev.token_type 4011 4012 if token_type == TokenType.UNION: 4013 operation: t.Type[exp.SetOperation] = exp.Union 4014 elif token_type == TokenType.EXCEPT: 4015 operation = exp.Except 4016 else: 4017 operation = exp.Intersect 4018 4019 comments = self._prev.comments 4020 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 4021 by_name = self._match_text_seq("BY", "NAME") 4022 expression = self._parse_select(nested=True, parse_set_operation=False) 4023 4024 this = self.expression( 4025 operation, 4026 comments=comments, 4027 this=this, 4028 distinct=distinct, 4029 by_name=by_name, 4030 expression=expression, 4031 ) 4032 4033 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4034 expression = this.expression 4035 4036 if expression: 4037 for arg in self.SET_OP_MODIFIERS: 4038 expr = expression.args.get(arg) 4039 if expr: 4040 this.set(arg, expr.pop()) 4041 4042 return this 4043 4044 def _parse_expression(self) -> t.Optional[exp.Expression]: 4045 return self._parse_alias(self._parse_assignment()) 4046 4047 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4048 this = self._parse_disjunction() 4049 4050 while self._match_set(self.ASSIGNMENT): 4051 this = self.expression( 4052 self.ASSIGNMENT[self._prev.token_type], 4053 this=this, 4054 comments=self._prev_comments, 4055 expression=self._parse_assignment(), 4056 ) 4057 4058 return this 4059 4060 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4061 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4062 4063 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4064 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4065 4066 def _parse_equality(self) -> t.Optional[exp.Expression]: 4067 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4068 4069 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4070 return self._parse_tokens(self._parse_range, self.COMPARISON) 4071 4072 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4073 this = this or self._parse_bitwise() 4074 negate = self._match(TokenType.NOT) 4075 4076 if self._match_set(self.RANGE_PARSERS): 4077 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4078 if not expression: 4079 return this 4080 4081 this = expression 4082 elif self._match(TokenType.ISNULL): 4083 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4084 4085 # Postgres supports ISNULL and NOTNULL for conditions. 4086 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4087 if self._match(TokenType.NOTNULL): 4088 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4089 this = self.expression(exp.Not, this=this) 4090 4091 if negate: 4092 this = self.expression(exp.Not, this=this) 4093 4094 if self._match(TokenType.IS): 4095 this = self._parse_is(this) 4096 4097 return this 4098 4099 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4100 index = self._index - 1 4101 negate = self._match(TokenType.NOT) 4102 4103 if self._match_text_seq("DISTINCT", "FROM"): 4104 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4105 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4106 4107 expression = self._parse_null() or self._parse_boolean() 4108 if not expression: 4109 self._retreat(index) 4110 return None 4111 4112 this = self.expression(exp.Is, this=this, expression=expression) 4113 return self.expression(exp.Not, this=this) if negate else this 4114 4115 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4116 unnest = self._parse_unnest(with_alias=False) 4117 if unnest: 4118 this = self.expression(exp.In, this=this, unnest=unnest) 4119 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4120 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4121 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4122 4123 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4124 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4125 else: 4126 this = self.expression(exp.In, this=this, expressions=expressions) 4127 4128 if matched_l_paren: 4129 self._match_r_paren(this) 4130 elif not self._match(TokenType.R_BRACKET, expression=this): 4131 self.raise_error("Expecting ]") 4132 else: 4133 this = self.expression(exp.In, this=this, field=self._parse_field()) 4134 4135 return this 4136 4137 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4138 low = self._parse_bitwise() 4139 self._match(TokenType.AND) 4140 high = self._parse_bitwise() 4141 return self.expression(exp.Between, this=this, low=low, high=high) 4142 4143 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4144 if not self._match(TokenType.ESCAPE): 4145 return this 4146 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4147 4148 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4149 index = self._index 4150 4151 if not self._match(TokenType.INTERVAL) and match_interval: 4152 return None 4153 4154 if self._match(TokenType.STRING, advance=False): 4155 this = self._parse_primary() 4156 else: 4157 this = self._parse_term() 4158 4159 if not this or ( 4160 isinstance(this, exp.Column) 4161 and not this.table 4162 and not this.this.quoted 4163 and this.name.upper() == "IS" 4164 ): 4165 self._retreat(index) 4166 return None 4167 4168 unit = self._parse_function() or ( 4169 not self._match(TokenType.ALIAS, advance=False) 4170 and self._parse_var(any_token=True, upper=True) 4171 ) 4172 4173 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4174 # each INTERVAL expression into this canonical form so it's easy to transpile 4175 if this and this.is_number: 4176 this = exp.Literal.string(this.to_py()) 4177 elif this and this.is_string: 4178 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4179 if len(parts) == 1: 4180 if unit: 4181 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4182 self._retreat(self._index - 1) 4183 4184 this = exp.Literal.string(parts[0][0]) 4185 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4186 4187 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4188 unit = self.expression( 4189 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4190 ) 4191 4192 interval = self.expression(exp.Interval, this=this, unit=unit) 4193 4194 index = self._index 4195 self._match(TokenType.PLUS) 4196 4197 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4198 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4199 return self.expression( 4200 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4201 ) 4202 4203 self._retreat(index) 4204 return interval 4205 4206 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4207 this = self._parse_term() 4208 4209 while True: 4210 if self._match_set(self.BITWISE): 4211 this = self.expression( 4212 self.BITWISE[self._prev.token_type], 4213 this=this, 4214 expression=self._parse_term(), 4215 ) 4216 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4217 this = self.expression( 4218 exp.DPipe, 4219 this=this, 4220 expression=self._parse_term(), 4221 safe=not self.dialect.STRICT_STRING_CONCAT, 4222 ) 4223 elif self._match(TokenType.DQMARK): 4224 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4225 elif self._match_pair(TokenType.LT, TokenType.LT): 4226 this = self.expression( 4227 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4228 ) 4229 elif self._match_pair(TokenType.GT, TokenType.GT): 4230 this = self.expression( 4231 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4232 ) 4233 else: 4234 break 4235 4236 return this 4237 4238 def _parse_term(self) -> t.Optional[exp.Expression]: 4239 return self._parse_tokens(self._parse_factor, self.TERM) 4240 4241 def _parse_factor(self) -> t.Optional[exp.Expression]: 4242 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4243 this = parse_method() 4244 4245 while self._match_set(self.FACTOR): 4246 klass = self.FACTOR[self._prev.token_type] 4247 comments = self._prev_comments 4248 expression = parse_method() 4249 4250 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4251 self._retreat(self._index - 1) 4252 return this 4253 4254 this = self.expression(klass, this=this, comments=comments, expression=expression) 4255 4256 if isinstance(this, exp.Div): 4257 this.args["typed"] = self.dialect.TYPED_DIVISION 4258 this.args["safe"] = self.dialect.SAFE_DIVISION 4259 4260 return this 4261 4262 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4263 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4264 4265 def _parse_unary(self) -> t.Optional[exp.Expression]: 4266 if self._match_set(self.UNARY_PARSERS): 4267 return self.UNARY_PARSERS[self._prev.token_type](self) 4268 return self._parse_at_time_zone(self._parse_type()) 4269 4270 def _parse_type( 4271 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4272 ) -> t.Optional[exp.Expression]: 4273 interval = parse_interval and self._parse_interval() 4274 if interval: 4275 return interval 4276 4277 index = self._index 4278 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4279 4280 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4281 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4282 if isinstance(data_type, exp.Cast): 4283 # This constructor can contain ops directly after it, for instance struct unnesting: 4284 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4285 return self._parse_column_ops(data_type) 4286 4287 if data_type: 4288 index2 = self._index 4289 this = self._parse_primary() 4290 4291 if isinstance(this, exp.Literal): 4292 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4293 if parser: 4294 return parser(self, this, data_type) 4295 4296 return self.expression(exp.Cast, this=this, to=data_type) 4297 4298 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4299 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4300 # 4301 # If the index difference here is greater than 1, that means the parser itself must have 4302 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4303 # 4304 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4305 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4306 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4307 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4308 # 4309 # In these cases, we don't really want to return the converted type, but instead retreat 4310 # and try to parse a Column or Identifier in the section below. 4311 if data_type.expressions and index2 - index > 1: 4312 self._retreat(index2) 4313 return self._parse_column_ops(data_type) 4314 4315 self._retreat(index) 4316 4317 if fallback_to_identifier: 4318 return self._parse_id_var() 4319 4320 this = self._parse_column() 4321 return this and self._parse_column_ops(this) 4322 4323 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4324 this = self._parse_type() 4325 if not this: 4326 return None 4327 4328 if isinstance(this, exp.Column) and not this.table: 4329 this = exp.var(this.name.upper()) 4330 4331 return self.expression( 4332 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4333 ) 4334 4335 def _parse_types( 4336 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4337 ) -> t.Optional[exp.Expression]: 4338 index = self._index 4339 4340 this: t.Optional[exp.Expression] = None 4341 prefix = self._match_text_seq("SYSUDTLIB", ".") 4342 4343 if not self._match_set(self.TYPE_TOKENS): 4344 identifier = allow_identifiers and self._parse_id_var( 4345 any_token=False, tokens=(TokenType.VAR,) 4346 ) 4347 if isinstance(identifier, exp.Identifier): 4348 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4349 4350 if len(tokens) != 1: 4351 self.raise_error("Unexpected identifier", self._prev) 4352 4353 if tokens[0].token_type in self.TYPE_TOKENS: 4354 self._prev = tokens[0] 4355 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4356 type_name = identifier.name 4357 4358 while self._match(TokenType.DOT): 4359 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4360 4361 this = exp.DataType.build(type_name, udt=True) 4362 else: 4363 self._retreat(self._index - 1) 4364 return None 4365 else: 4366 return None 4367 4368 type_token = self._prev.token_type 4369 4370 if type_token == TokenType.PSEUDO_TYPE: 4371 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4372 4373 if type_token == TokenType.OBJECT_IDENTIFIER: 4374 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4375 4376 # https://materialize.com/docs/sql/types/map/ 4377 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4378 key_type = self._parse_types( 4379 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4380 ) 4381 if not self._match(TokenType.FARROW): 4382 self._retreat(index) 4383 return None 4384 4385 value_type = self._parse_types( 4386 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4387 ) 4388 if not self._match(TokenType.R_BRACKET): 4389 self._retreat(index) 4390 return None 4391 4392 return exp.DataType( 4393 this=exp.DataType.Type.MAP, 4394 expressions=[key_type, value_type], 4395 nested=True, 4396 prefix=prefix, 4397 ) 4398 4399 nested = type_token in self.NESTED_TYPE_TOKENS 4400 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4401 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4402 expressions = None 4403 maybe_func = False 4404 4405 if self._match(TokenType.L_PAREN): 4406 if is_struct: 4407 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4408 elif nested: 4409 expressions = self._parse_csv( 4410 lambda: self._parse_types( 4411 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4412 ) 4413 ) 4414 elif type_token in self.ENUM_TYPE_TOKENS: 4415 expressions = self._parse_csv(self._parse_equality) 4416 elif is_aggregate: 4417 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4418 any_token=False, tokens=(TokenType.VAR,) 4419 ) 4420 if not func_or_ident or not self._match(TokenType.COMMA): 4421 return None 4422 expressions = self._parse_csv( 4423 lambda: self._parse_types( 4424 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4425 ) 4426 ) 4427 expressions.insert(0, func_or_ident) 4428 else: 4429 expressions = self._parse_csv(self._parse_type_size) 4430 4431 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4432 if type_token == TokenType.VECTOR and len(expressions) == 2: 4433 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4434 4435 if not expressions or not self._match(TokenType.R_PAREN): 4436 self._retreat(index) 4437 return None 4438 4439 maybe_func = True 4440 4441 values: t.Optional[t.List[exp.Expression]] = None 4442 4443 if nested and self._match(TokenType.LT): 4444 if is_struct: 4445 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4446 else: 4447 expressions = self._parse_csv( 4448 lambda: self._parse_types( 4449 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4450 ) 4451 ) 4452 4453 if not self._match(TokenType.GT): 4454 self.raise_error("Expecting >") 4455 4456 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4457 values = self._parse_csv(self._parse_assignment) 4458 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4459 4460 if type_token in self.TIMESTAMPS: 4461 if self._match_text_seq("WITH", "TIME", "ZONE"): 4462 maybe_func = False 4463 tz_type = ( 4464 exp.DataType.Type.TIMETZ 4465 if type_token in self.TIMES 4466 else exp.DataType.Type.TIMESTAMPTZ 4467 ) 4468 this = exp.DataType(this=tz_type, expressions=expressions) 4469 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4470 maybe_func = False 4471 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4472 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4473 maybe_func = False 4474 elif type_token == TokenType.INTERVAL: 4475 unit = self._parse_var(upper=True) 4476 if unit: 4477 if self._match_text_seq("TO"): 4478 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4479 4480 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4481 else: 4482 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4483 4484 if maybe_func and check_func: 4485 index2 = self._index 4486 peek = self._parse_string() 4487 4488 if not peek: 4489 self._retreat(index) 4490 return None 4491 4492 self._retreat(index2) 4493 4494 if not this: 4495 if self._match_text_seq("UNSIGNED"): 4496 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4497 if not unsigned_type_token: 4498 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4499 4500 type_token = unsigned_type_token or type_token 4501 4502 this = exp.DataType( 4503 this=exp.DataType.Type[type_token.value], 4504 expressions=expressions, 4505 nested=nested, 4506 prefix=prefix, 4507 ) 4508 4509 # Empty arrays/structs are allowed 4510 if values is not None: 4511 cls = exp.Struct if is_struct else exp.Array 4512 this = exp.cast(cls(expressions=values), this, copy=False) 4513 4514 elif expressions: 4515 this.set("expressions", expressions) 4516 4517 # https://materialize.com/docs/sql/types/list/#type-name 4518 while self._match(TokenType.LIST): 4519 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4520 4521 index = self._index 4522 4523 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4524 matched_array = self._match(TokenType.ARRAY) 4525 4526 while self._curr: 4527 matched_l_bracket = self._match(TokenType.L_BRACKET) 4528 if not matched_l_bracket and not matched_array: 4529 break 4530 4531 matched_array = False 4532 values = self._parse_csv(self._parse_assignment) or None 4533 if values and not schema: 4534 self._retreat(index) 4535 break 4536 4537 this = exp.DataType( 4538 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4539 ) 4540 self._match(TokenType.R_BRACKET) 4541 4542 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4543 converter = self.TYPE_CONVERTERS.get(this.this) 4544 if converter: 4545 this = converter(t.cast(exp.DataType, this)) 4546 4547 return this 4548 4549 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4550 index = self._index 4551 4552 if ( 4553 self._curr 4554 and self._next 4555 and self._curr.token_type in self.TYPE_TOKENS 4556 and self._next.token_type in self.TYPE_TOKENS 4557 ): 4558 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4559 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4560 this = self._parse_id_var() 4561 else: 4562 this = ( 4563 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4564 or self._parse_id_var() 4565 ) 4566 4567 self._match(TokenType.COLON) 4568 4569 if ( 4570 type_required 4571 and not isinstance(this, exp.DataType) 4572 and not self._match_set(self.TYPE_TOKENS, advance=False) 4573 ): 4574 self._retreat(index) 4575 return self._parse_types() 4576 4577 return self._parse_column_def(this) 4578 4579 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4580 if not self._match_text_seq("AT", "TIME", "ZONE"): 4581 return this 4582 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4583 4584 def _parse_column(self) -> t.Optional[exp.Expression]: 4585 this = self._parse_column_reference() 4586 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4587 4588 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4589 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4590 4591 return column 4592 4593 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4594 this = self._parse_field() 4595 if ( 4596 not this 4597 and self._match(TokenType.VALUES, advance=False) 4598 and self.VALUES_FOLLOWED_BY_PAREN 4599 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4600 ): 4601 this = self._parse_id_var() 4602 4603 if isinstance(this, exp.Identifier): 4604 # We bubble up comments from the Identifier to the Column 4605 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4606 4607 return this 4608 4609 def _parse_colon_as_variant_extract( 4610 self, this: t.Optional[exp.Expression] 4611 ) -> t.Optional[exp.Expression]: 4612 casts = [] 4613 json_path = [] 4614 4615 while self._match(TokenType.COLON): 4616 start_index = self._index 4617 4618 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4619 path = self._parse_column_ops( 4620 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4621 ) 4622 4623 # The cast :: operator has a lower precedence than the extraction operator :, so 4624 # we rearrange the AST appropriately to avoid casting the JSON path 4625 while isinstance(path, exp.Cast): 4626 casts.append(path.to) 4627 path = path.this 4628 4629 if casts: 4630 dcolon_offset = next( 4631 i 4632 for i, t in enumerate(self._tokens[start_index:]) 4633 if t.token_type == TokenType.DCOLON 4634 ) 4635 end_token = self._tokens[start_index + dcolon_offset - 1] 4636 else: 4637 end_token = self._prev 4638 4639 if path: 4640 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4641 4642 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 4643 # Databricks transforms it back to the colon/dot notation 4644 if json_path: 4645 this = self.expression( 4646 exp.JSONExtract, 4647 this=this, 4648 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4649 variant_extract=True, 4650 ) 4651 4652 while casts: 4653 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4654 4655 return this 4656 4657 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 4658 return self._parse_types() 4659 4660 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4661 this = self._parse_bracket(this) 4662 4663 while self._match_set(self.COLUMN_OPERATORS): 4664 op_token = self._prev.token_type 4665 op = self.COLUMN_OPERATORS.get(op_token) 4666 4667 if op_token == TokenType.DCOLON: 4668 field = self._parse_dcolon() 4669 if not field: 4670 self.raise_error("Expected type") 4671 elif op and self._curr: 4672 field = self._parse_column_reference() 4673 else: 4674 field = self._parse_field(any_token=True, anonymous_func=True) 4675 4676 if isinstance(field, exp.Func) and this: 4677 # bigquery allows function calls like x.y.count(...) 4678 # SAFE.SUBSTR(...) 4679 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4680 this = exp.replace_tree( 4681 this, 4682 lambda n: ( 4683 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4684 if n.table 4685 else n.this 4686 ) 4687 if isinstance(n, exp.Column) 4688 else n, 4689 ) 4690 4691 if op: 4692 this = op(self, this, field) 4693 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4694 this = self.expression( 4695 exp.Column, 4696 this=field, 4697 table=this.this, 4698 db=this.args.get("table"), 4699 catalog=this.args.get("db"), 4700 ) 4701 else: 4702 this = self.expression(exp.Dot, this=this, expression=field) 4703 4704 this = self._parse_bracket(this) 4705 4706 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 4707 4708 def _parse_primary(self) -> t.Optional[exp.Expression]: 4709 if self._match_set(self.PRIMARY_PARSERS): 4710 token_type = self._prev.token_type 4711 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4712 4713 if token_type == TokenType.STRING: 4714 expressions = [primary] 4715 while self._match(TokenType.STRING): 4716 expressions.append(exp.Literal.string(self._prev.text)) 4717 4718 if len(expressions) > 1: 4719 return self.expression(exp.Concat, expressions=expressions) 4720 4721 return primary 4722 4723 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4724 return exp.Literal.number(f"0.{self._prev.text}") 4725 4726 if self._match(TokenType.L_PAREN): 4727 comments = self._prev_comments 4728 query = self._parse_select() 4729 4730 if query: 4731 expressions = [query] 4732 else: 4733 expressions = self._parse_expressions() 4734 4735 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4736 4737 if not this and self._match(TokenType.R_PAREN, advance=False): 4738 this = self.expression(exp.Tuple) 4739 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4740 this = self._parse_subquery(this=this, parse_alias=False) 4741 elif isinstance(this, exp.Subquery): 4742 this = self._parse_subquery( 4743 this=self._parse_set_operations(this), parse_alias=False 4744 ) 4745 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4746 this = self.expression(exp.Tuple, expressions=expressions) 4747 else: 4748 this = self.expression(exp.Paren, this=this) 4749 4750 if this: 4751 this.add_comments(comments) 4752 4753 self._match_r_paren(expression=this) 4754 return this 4755 4756 return None 4757 4758 def _parse_field( 4759 self, 4760 any_token: bool = False, 4761 tokens: t.Optional[t.Collection[TokenType]] = None, 4762 anonymous_func: bool = False, 4763 ) -> t.Optional[exp.Expression]: 4764 if anonymous_func: 4765 field = ( 4766 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4767 or self._parse_primary() 4768 ) 4769 else: 4770 field = self._parse_primary() or self._parse_function( 4771 anonymous=anonymous_func, any_token=any_token 4772 ) 4773 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4774 4775 def _parse_function( 4776 self, 4777 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4778 anonymous: bool = False, 4779 optional_parens: bool = True, 4780 any_token: bool = False, 4781 ) -> t.Optional[exp.Expression]: 4782 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4783 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4784 fn_syntax = False 4785 if ( 4786 self._match(TokenType.L_BRACE, advance=False) 4787 and self._next 4788 and self._next.text.upper() == "FN" 4789 ): 4790 self._advance(2) 4791 fn_syntax = True 4792 4793 func = self._parse_function_call( 4794 functions=functions, 4795 anonymous=anonymous, 4796 optional_parens=optional_parens, 4797 any_token=any_token, 4798 ) 4799 4800 if fn_syntax: 4801 self._match(TokenType.R_BRACE) 4802 4803 return func 4804 4805 def _parse_function_call( 4806 self, 4807 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4808 anonymous: bool = False, 4809 optional_parens: bool = True, 4810 any_token: bool = False, 4811 ) -> t.Optional[exp.Expression]: 4812 if not self._curr: 4813 return None 4814 4815 comments = self._curr.comments 4816 token_type = self._curr.token_type 4817 this = self._curr.text 4818 upper = this.upper() 4819 4820 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4821 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4822 self._advance() 4823 return self._parse_window(parser(self)) 4824 4825 if not self._next or self._next.token_type != TokenType.L_PAREN: 4826 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4827 self._advance() 4828 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4829 4830 return None 4831 4832 if any_token: 4833 if token_type in self.RESERVED_TOKENS: 4834 return None 4835 elif token_type not in self.FUNC_TOKENS: 4836 return None 4837 4838 self._advance(2) 4839 4840 parser = self.FUNCTION_PARSERS.get(upper) 4841 if parser and not anonymous: 4842 this = parser(self) 4843 else: 4844 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4845 4846 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4847 this = self.expression(subquery_predicate, this=self._parse_select()) 4848 self._match_r_paren() 4849 return this 4850 4851 if functions is None: 4852 functions = self.FUNCTIONS 4853 4854 function = functions.get(upper) 4855 4856 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4857 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4858 4859 if alias: 4860 args = self._kv_to_prop_eq(args) 4861 4862 if function and not anonymous: 4863 if "dialect" in function.__code__.co_varnames: 4864 func = function(args, dialect=self.dialect) 4865 else: 4866 func = function(args) 4867 4868 func = self.validate_expression(func, args) 4869 if not self.dialect.NORMALIZE_FUNCTIONS: 4870 func.meta["name"] = this 4871 4872 this = func 4873 else: 4874 if token_type == TokenType.IDENTIFIER: 4875 this = exp.Identifier(this=this, quoted=True) 4876 this = self.expression(exp.Anonymous, this=this, expressions=args) 4877 4878 if isinstance(this, exp.Expression): 4879 this.add_comments(comments) 4880 4881 self._match_r_paren(this) 4882 return self._parse_window(this) 4883 4884 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4885 transformed = [] 4886 4887 for e in expressions: 4888 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4889 if isinstance(e, exp.Alias): 4890 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4891 4892 if not isinstance(e, exp.PropertyEQ): 4893 e = self.expression( 4894 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4895 ) 4896 4897 if isinstance(e.this, exp.Column): 4898 e.this.replace(e.this.this) 4899 4900 transformed.append(e) 4901 4902 return transformed 4903 4904 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4905 return self._parse_column_def(self._parse_id_var()) 4906 4907 def _parse_user_defined_function( 4908 self, kind: t.Optional[TokenType] = None 4909 ) -> t.Optional[exp.Expression]: 4910 this = self._parse_id_var() 4911 4912 while self._match(TokenType.DOT): 4913 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4914 4915 if not self._match(TokenType.L_PAREN): 4916 return this 4917 4918 expressions = self._parse_csv(self._parse_function_parameter) 4919 self._match_r_paren() 4920 return self.expression( 4921 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4922 ) 4923 4924 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4925 literal = self._parse_primary() 4926 if literal: 4927 return self.expression(exp.Introducer, this=token.text, expression=literal) 4928 4929 return self.expression(exp.Identifier, this=token.text) 4930 4931 def _parse_session_parameter(self) -> exp.SessionParameter: 4932 kind = None 4933 this = self._parse_id_var() or self._parse_primary() 4934 4935 if this and self._match(TokenType.DOT): 4936 kind = this.name 4937 this = self._parse_var() or self._parse_primary() 4938 4939 return self.expression(exp.SessionParameter, this=this, kind=kind) 4940 4941 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 4942 return self._parse_id_var() 4943 4944 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4945 index = self._index 4946 4947 if self._match(TokenType.L_PAREN): 4948 expressions = t.cast( 4949 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 4950 ) 4951 4952 if not self._match(TokenType.R_PAREN): 4953 self._retreat(index) 4954 else: 4955 expressions = [self._parse_lambda_arg()] 4956 4957 if self._match_set(self.LAMBDAS): 4958 return self.LAMBDAS[self._prev.token_type](self, expressions) 4959 4960 self._retreat(index) 4961 4962 this: t.Optional[exp.Expression] 4963 4964 if self._match(TokenType.DISTINCT): 4965 this = self.expression( 4966 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 4967 ) 4968 else: 4969 this = self._parse_select_or_expression(alias=alias) 4970 4971 return self._parse_limit( 4972 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4973 ) 4974 4975 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4976 index = self._index 4977 if not self._match(TokenType.L_PAREN): 4978 return this 4979 4980 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4981 # expr can be of both types 4982 if self._match_set(self.SELECT_START_TOKENS): 4983 self._retreat(index) 4984 return this 4985 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4986 self._match_r_paren() 4987 return self.expression(exp.Schema, this=this, expressions=args) 4988 4989 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4990 return self._parse_column_def(self._parse_field(any_token=True)) 4991 4992 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4993 # column defs are not really columns, they're identifiers 4994 if isinstance(this, exp.Column): 4995 this = this.this 4996 4997 kind = self._parse_types(schema=True) 4998 4999 if self._match_text_seq("FOR", "ORDINALITY"): 5000 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5001 5002 constraints: t.List[exp.Expression] = [] 5003 5004 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5005 ("ALIAS", "MATERIALIZED") 5006 ): 5007 persisted = self._prev.text.upper() == "MATERIALIZED" 5008 constraints.append( 5009 self.expression( 5010 exp.ComputedColumnConstraint, 5011 this=self._parse_assignment(), 5012 persisted=persisted or self._match_text_seq("PERSISTED"), 5013 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5014 ) 5015 ) 5016 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5017 self._match(TokenType.ALIAS) 5018 constraints.append( 5019 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 5020 ) 5021 5022 while True: 5023 constraint = self._parse_column_constraint() 5024 if not constraint: 5025 break 5026 constraints.append(constraint) 5027 5028 if not kind and not constraints: 5029 return this 5030 5031 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5032 5033 def _parse_auto_increment( 5034 self, 5035 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5036 start = None 5037 increment = None 5038 5039 if self._match(TokenType.L_PAREN, advance=False): 5040 args = self._parse_wrapped_csv(self._parse_bitwise) 5041 start = seq_get(args, 0) 5042 increment = seq_get(args, 1) 5043 elif self._match_text_seq("START"): 5044 start = self._parse_bitwise() 5045 self._match_text_seq("INCREMENT") 5046 increment = self._parse_bitwise() 5047 5048 if start and increment: 5049 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5050 5051 return exp.AutoIncrementColumnConstraint() 5052 5053 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5054 if not self._match_text_seq("REFRESH"): 5055 self._retreat(self._index - 1) 5056 return None 5057 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5058 5059 def _parse_compress(self) -> exp.CompressColumnConstraint: 5060 if self._match(TokenType.L_PAREN, advance=False): 5061 return self.expression( 5062 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5063 ) 5064 5065 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5066 5067 def _parse_generated_as_identity( 5068 self, 5069 ) -> ( 5070 exp.GeneratedAsIdentityColumnConstraint 5071 | exp.ComputedColumnConstraint 5072 | exp.GeneratedAsRowColumnConstraint 5073 ): 5074 if self._match_text_seq("BY", "DEFAULT"): 5075 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5076 this = self.expression( 5077 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5078 ) 5079 else: 5080 self._match_text_seq("ALWAYS") 5081 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5082 5083 self._match(TokenType.ALIAS) 5084 5085 if self._match_text_seq("ROW"): 5086 start = self._match_text_seq("START") 5087 if not start: 5088 self._match(TokenType.END) 5089 hidden = self._match_text_seq("HIDDEN") 5090 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5091 5092 identity = self._match_text_seq("IDENTITY") 5093 5094 if self._match(TokenType.L_PAREN): 5095 if self._match(TokenType.START_WITH): 5096 this.set("start", self._parse_bitwise()) 5097 if self._match_text_seq("INCREMENT", "BY"): 5098 this.set("increment", self._parse_bitwise()) 5099 if self._match_text_seq("MINVALUE"): 5100 this.set("minvalue", self._parse_bitwise()) 5101 if self._match_text_seq("MAXVALUE"): 5102 this.set("maxvalue", self._parse_bitwise()) 5103 5104 if self._match_text_seq("CYCLE"): 5105 this.set("cycle", True) 5106 elif self._match_text_seq("NO", "CYCLE"): 5107 this.set("cycle", False) 5108 5109 if not identity: 5110 this.set("expression", self._parse_range()) 5111 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5112 args = self._parse_csv(self._parse_bitwise) 5113 this.set("start", seq_get(args, 0)) 5114 this.set("increment", seq_get(args, 1)) 5115 5116 self._match_r_paren() 5117 5118 return this 5119 5120 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5121 self._match_text_seq("LENGTH") 5122 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5123 5124 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5125 if self._match_text_seq("NULL"): 5126 return self.expression(exp.NotNullColumnConstraint) 5127 if self._match_text_seq("CASESPECIFIC"): 5128 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5129 if self._match_text_seq("FOR", "REPLICATION"): 5130 return self.expression(exp.NotForReplicationColumnConstraint) 5131 return None 5132 5133 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5134 if self._match(TokenType.CONSTRAINT): 5135 this = self._parse_id_var() 5136 else: 5137 this = None 5138 5139 if self._match_texts(self.CONSTRAINT_PARSERS): 5140 return self.expression( 5141 exp.ColumnConstraint, 5142 this=this, 5143 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5144 ) 5145 5146 return this 5147 5148 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5149 if not self._match(TokenType.CONSTRAINT): 5150 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5151 5152 return self.expression( 5153 exp.Constraint, 5154 this=self._parse_id_var(), 5155 expressions=self._parse_unnamed_constraints(), 5156 ) 5157 5158 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5159 constraints = [] 5160 while True: 5161 constraint = self._parse_unnamed_constraint() or self._parse_function() 5162 if not constraint: 5163 break 5164 constraints.append(constraint) 5165 5166 return constraints 5167 5168 def _parse_unnamed_constraint( 5169 self, constraints: t.Optional[t.Collection[str]] = None 5170 ) -> t.Optional[exp.Expression]: 5171 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5172 constraints or self.CONSTRAINT_PARSERS 5173 ): 5174 return None 5175 5176 constraint = self._prev.text.upper() 5177 if constraint not in self.CONSTRAINT_PARSERS: 5178 self.raise_error(f"No parser found for schema constraint {constraint}.") 5179 5180 return self.CONSTRAINT_PARSERS[constraint](self) 5181 5182 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5183 return self._parse_id_var(any_token=False) 5184 5185 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5186 self._match_text_seq("KEY") 5187 return self.expression( 5188 exp.UniqueColumnConstraint, 5189 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5190 this=self._parse_schema(self._parse_unique_key()), 5191 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5192 on_conflict=self._parse_on_conflict(), 5193 ) 5194 5195 def _parse_key_constraint_options(self) -> t.List[str]: 5196 options = [] 5197 while True: 5198 if not self._curr: 5199 break 5200 5201 if self._match(TokenType.ON): 5202 action = None 5203 on = self._advance_any() and self._prev.text 5204 5205 if self._match_text_seq("NO", "ACTION"): 5206 action = "NO ACTION" 5207 elif self._match_text_seq("CASCADE"): 5208 action = "CASCADE" 5209 elif self._match_text_seq("RESTRICT"): 5210 action = "RESTRICT" 5211 elif self._match_pair(TokenType.SET, TokenType.NULL): 5212 action = "SET NULL" 5213 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5214 action = "SET DEFAULT" 5215 else: 5216 self.raise_error("Invalid key constraint") 5217 5218 options.append(f"ON {on} {action}") 5219 elif self._match_text_seq("NOT", "ENFORCED"): 5220 options.append("NOT ENFORCED") 5221 elif self._match_text_seq("DEFERRABLE"): 5222 options.append("DEFERRABLE") 5223 elif self._match_text_seq("INITIALLY", "DEFERRED"): 5224 options.append("INITIALLY DEFERRED") 5225 elif self._match_text_seq("NORELY"): 5226 options.append("NORELY") 5227 elif self._match_text_seq("MATCH", "FULL"): 5228 options.append("MATCH FULL") 5229 else: 5230 break 5231 5232 return options 5233 5234 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5235 if match and not self._match(TokenType.REFERENCES): 5236 return None 5237 5238 expressions = None 5239 this = self._parse_table(schema=True) 5240 options = self._parse_key_constraint_options() 5241 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5242 5243 def _parse_foreign_key(self) -> exp.ForeignKey: 5244 expressions = self._parse_wrapped_id_vars() 5245 reference = self._parse_references() 5246 options = {} 5247 5248 while self._match(TokenType.ON): 5249 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5250 self.raise_error("Expected DELETE or UPDATE") 5251 5252 kind = self._prev.text.lower() 5253 5254 if self._match_text_seq("NO", "ACTION"): 5255 action = "NO ACTION" 5256 elif self._match(TokenType.SET): 5257 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5258 action = "SET " + self._prev.text.upper() 5259 else: 5260 self._advance() 5261 action = self._prev.text.upper() 5262 5263 options[kind] = action 5264 5265 return self.expression( 5266 exp.ForeignKey, 5267 expressions=expressions, 5268 reference=reference, 5269 **options, # type: ignore 5270 ) 5271 5272 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5273 return self._parse_field() 5274 5275 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5276 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5277 self._retreat(self._index - 1) 5278 return None 5279 5280 id_vars = self._parse_wrapped_id_vars() 5281 return self.expression( 5282 exp.PeriodForSystemTimeConstraint, 5283 this=seq_get(id_vars, 0), 5284 expression=seq_get(id_vars, 1), 5285 ) 5286 5287 def _parse_primary_key( 5288 self, wrapped_optional: bool = False, in_props: bool = False 5289 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5290 desc = ( 5291 self._match_set((TokenType.ASC, TokenType.DESC)) 5292 and self._prev.token_type == TokenType.DESC 5293 ) 5294 5295 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5296 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5297 5298 expressions = self._parse_wrapped_csv( 5299 self._parse_primary_key_part, optional=wrapped_optional 5300 ) 5301 options = self._parse_key_constraint_options() 5302 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5303 5304 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5305 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5306 5307 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5308 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5309 return this 5310 5311 bracket_kind = self._prev.token_type 5312 expressions = self._parse_csv( 5313 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5314 ) 5315 5316 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5317 self.raise_error("Expected ]") 5318 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5319 self.raise_error("Expected }") 5320 5321 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5322 if bracket_kind == TokenType.L_BRACE: 5323 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5324 elif not this: 5325 this = self.expression(exp.Array, expressions=expressions) 5326 else: 5327 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5328 if constructor_type: 5329 return self.expression(constructor_type, expressions=expressions) 5330 5331 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5332 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5333 5334 self._add_comments(this) 5335 return self._parse_bracket(this) 5336 5337 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5338 if self._match(TokenType.COLON): 5339 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5340 return this 5341 5342 def _parse_case(self) -> t.Optional[exp.Expression]: 5343 ifs = [] 5344 default = None 5345 5346 comments = self._prev_comments 5347 expression = self._parse_assignment() 5348 5349 while self._match(TokenType.WHEN): 5350 this = self._parse_assignment() 5351 self._match(TokenType.THEN) 5352 then = self._parse_assignment() 5353 ifs.append(self.expression(exp.If, this=this, true=then)) 5354 5355 if self._match(TokenType.ELSE): 5356 default = self._parse_assignment() 5357 5358 if not self._match(TokenType.END): 5359 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5360 default = exp.column("interval") 5361 else: 5362 self.raise_error("Expected END after CASE", self._prev) 5363 5364 return self.expression( 5365 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5366 ) 5367 5368 def _parse_if(self) -> t.Optional[exp.Expression]: 5369 if self._match(TokenType.L_PAREN): 5370 args = self._parse_csv(self._parse_assignment) 5371 this = self.validate_expression(exp.If.from_arg_list(args), args) 5372 self._match_r_paren() 5373 else: 5374 index = self._index - 1 5375 5376 if self.NO_PAREN_IF_COMMANDS and index == 0: 5377 return self._parse_as_command(self._prev) 5378 5379 condition = self._parse_assignment() 5380 5381 if not condition: 5382 self._retreat(index) 5383 return None 5384 5385 self._match(TokenType.THEN) 5386 true = self._parse_assignment() 5387 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5388 self._match(TokenType.END) 5389 this = self.expression(exp.If, this=condition, true=true, false=false) 5390 5391 return this 5392 5393 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5394 if not self._match_text_seq("VALUE", "FOR"): 5395 self._retreat(self._index - 1) 5396 return None 5397 5398 return self.expression( 5399 exp.NextValueFor, 5400 this=self._parse_column(), 5401 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5402 ) 5403 5404 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5405 this = self._parse_function() or self._parse_var_or_string(upper=True) 5406 5407 if self._match(TokenType.FROM): 5408 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5409 5410 if not self._match(TokenType.COMMA): 5411 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5412 5413 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5414 5415 def _parse_gap_fill(self) -> exp.GapFill: 5416 self._match(TokenType.TABLE) 5417 this = self._parse_table() 5418 5419 self._match(TokenType.COMMA) 5420 args = [this, *self._parse_csv(self._parse_lambda)] 5421 5422 gap_fill = exp.GapFill.from_arg_list(args) 5423 return self.validate_expression(gap_fill, args) 5424 5425 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5426 this = self._parse_assignment() 5427 5428 if not self._match(TokenType.ALIAS): 5429 if self._match(TokenType.COMMA): 5430 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5431 5432 self.raise_error("Expected AS after CAST") 5433 5434 fmt = None 5435 to = self._parse_types() 5436 5437 if self._match(TokenType.FORMAT): 5438 fmt_string = self._parse_string() 5439 fmt = self._parse_at_time_zone(fmt_string) 5440 5441 if not to: 5442 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5443 if to.this in exp.DataType.TEMPORAL_TYPES: 5444 this = self.expression( 5445 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5446 this=this, 5447 format=exp.Literal.string( 5448 format_time( 5449 fmt_string.this if fmt_string else "", 5450 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5451 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5452 ) 5453 ), 5454 safe=safe, 5455 ) 5456 5457 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5458 this.set("zone", fmt.args["zone"]) 5459 return this 5460 elif not to: 5461 self.raise_error("Expected TYPE after CAST") 5462 elif isinstance(to, exp.Identifier): 5463 to = exp.DataType.build(to.name, udt=True) 5464 elif to.this == exp.DataType.Type.CHAR: 5465 if self._match(TokenType.CHARACTER_SET): 5466 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5467 5468 return self.expression( 5469 exp.Cast if strict else exp.TryCast, 5470 this=this, 5471 to=to, 5472 format=fmt, 5473 safe=safe, 5474 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5475 ) 5476 5477 def _parse_string_agg(self) -> exp.Expression: 5478 if self._match(TokenType.DISTINCT): 5479 args: t.List[t.Optional[exp.Expression]] = [ 5480 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5481 ] 5482 if self._match(TokenType.COMMA): 5483 args.extend(self._parse_csv(self._parse_assignment)) 5484 else: 5485 args = self._parse_csv(self._parse_assignment) # type: ignore 5486 5487 index = self._index 5488 if not self._match(TokenType.R_PAREN) and args: 5489 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5490 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5491 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5492 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5493 5494 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5495 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5496 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5497 if not self._match_text_seq("WITHIN", "GROUP"): 5498 self._retreat(index) 5499 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5500 5501 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5502 order = self._parse_order(this=seq_get(args, 0)) 5503 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5504 5505 def _parse_convert( 5506 self, strict: bool, safe: t.Optional[bool] = None 5507 ) -> t.Optional[exp.Expression]: 5508 this = self._parse_bitwise() 5509 5510 if self._match(TokenType.USING): 5511 to: t.Optional[exp.Expression] = self.expression( 5512 exp.CharacterSet, this=self._parse_var() 5513 ) 5514 elif self._match(TokenType.COMMA): 5515 to = self._parse_types() 5516 else: 5517 to = None 5518 5519 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5520 5521 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5522 """ 5523 There are generally two variants of the DECODE function: 5524 5525 - DECODE(bin, charset) 5526 - DECODE(expression, search, result [, search, result] ... [, default]) 5527 5528 The second variant will always be parsed into a CASE expression. Note that NULL 5529 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5530 instead of relying on pattern matching. 5531 """ 5532 args = self._parse_csv(self._parse_assignment) 5533 5534 if len(args) < 3: 5535 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5536 5537 expression, *expressions = args 5538 if not expression: 5539 return None 5540 5541 ifs = [] 5542 for search, result in zip(expressions[::2], expressions[1::2]): 5543 if not search or not result: 5544 return None 5545 5546 if isinstance(search, exp.Literal): 5547 ifs.append( 5548 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5549 ) 5550 elif isinstance(search, exp.Null): 5551 ifs.append( 5552 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5553 ) 5554 else: 5555 cond = exp.or_( 5556 exp.EQ(this=expression.copy(), expression=search), 5557 exp.and_( 5558 exp.Is(this=expression.copy(), expression=exp.Null()), 5559 exp.Is(this=search.copy(), expression=exp.Null()), 5560 copy=False, 5561 ), 5562 copy=False, 5563 ) 5564 ifs.append(exp.If(this=cond, true=result)) 5565 5566 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5567 5568 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5569 self._match_text_seq("KEY") 5570 key = self._parse_column() 5571 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5572 self._match_text_seq("VALUE") 5573 value = self._parse_bitwise() 5574 5575 if not key and not value: 5576 return None 5577 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5578 5579 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5580 if not this or not self._match_text_seq("FORMAT", "JSON"): 5581 return this 5582 5583 return self.expression(exp.FormatJson, this=this) 5584 5585 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5586 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5587 for value in values: 5588 if self._match_text_seq(value, "ON", on): 5589 return f"{value} ON {on}" 5590 5591 return None 5592 5593 @t.overload 5594 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5595 5596 @t.overload 5597 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5598 5599 def _parse_json_object(self, agg=False): 5600 star = self._parse_star() 5601 expressions = ( 5602 [star] 5603 if star 5604 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5605 ) 5606 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5607 5608 unique_keys = None 5609 if self._match_text_seq("WITH", "UNIQUE"): 5610 unique_keys = True 5611 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5612 unique_keys = False 5613 5614 self._match_text_seq("KEYS") 5615 5616 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5617 self._parse_type() 5618 ) 5619 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5620 5621 return self.expression( 5622 exp.JSONObjectAgg if agg else exp.JSONObject, 5623 expressions=expressions, 5624 null_handling=null_handling, 5625 unique_keys=unique_keys, 5626 return_type=return_type, 5627 encoding=encoding, 5628 ) 5629 5630 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5631 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5632 if not self._match_text_seq("NESTED"): 5633 this = self._parse_id_var() 5634 kind = self._parse_types(allow_identifiers=False) 5635 nested = None 5636 else: 5637 this = None 5638 kind = None 5639 nested = True 5640 5641 path = self._match_text_seq("PATH") and self._parse_string() 5642 nested_schema = nested and self._parse_json_schema() 5643 5644 return self.expression( 5645 exp.JSONColumnDef, 5646 this=this, 5647 kind=kind, 5648 path=path, 5649 nested_schema=nested_schema, 5650 ) 5651 5652 def _parse_json_schema(self) -> exp.JSONSchema: 5653 self._match_text_seq("COLUMNS") 5654 return self.expression( 5655 exp.JSONSchema, 5656 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5657 ) 5658 5659 def _parse_json_table(self) -> exp.JSONTable: 5660 this = self._parse_format_json(self._parse_bitwise()) 5661 path = self._match(TokenType.COMMA) and self._parse_string() 5662 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5663 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5664 schema = self._parse_json_schema() 5665 5666 return exp.JSONTable( 5667 this=this, 5668 schema=schema, 5669 path=path, 5670 error_handling=error_handling, 5671 empty_handling=empty_handling, 5672 ) 5673 5674 def _parse_match_against(self) -> exp.MatchAgainst: 5675 expressions = self._parse_csv(self._parse_column) 5676 5677 self._match_text_seq(")", "AGAINST", "(") 5678 5679 this = self._parse_string() 5680 5681 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5682 modifier = "IN NATURAL LANGUAGE MODE" 5683 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5684 modifier = f"{modifier} WITH QUERY EXPANSION" 5685 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5686 modifier = "IN BOOLEAN MODE" 5687 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5688 modifier = "WITH QUERY EXPANSION" 5689 else: 5690 modifier = None 5691 5692 return self.expression( 5693 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5694 ) 5695 5696 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5697 def _parse_open_json(self) -> exp.OpenJSON: 5698 this = self._parse_bitwise() 5699 path = self._match(TokenType.COMMA) and self._parse_string() 5700 5701 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5702 this = self._parse_field(any_token=True) 5703 kind = self._parse_types() 5704 path = self._parse_string() 5705 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5706 5707 return self.expression( 5708 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5709 ) 5710 5711 expressions = None 5712 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5713 self._match_l_paren() 5714 expressions = self._parse_csv(_parse_open_json_column_def) 5715 5716 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5717 5718 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5719 args = self._parse_csv(self._parse_bitwise) 5720 5721 if self._match(TokenType.IN): 5722 return self.expression( 5723 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5724 ) 5725 5726 if haystack_first: 5727 haystack = seq_get(args, 0) 5728 needle = seq_get(args, 1) 5729 else: 5730 needle = seq_get(args, 0) 5731 haystack = seq_get(args, 1) 5732 5733 return self.expression( 5734 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5735 ) 5736 5737 def _parse_predict(self) -> exp.Predict: 5738 self._match_text_seq("MODEL") 5739 this = self._parse_table() 5740 5741 self._match(TokenType.COMMA) 5742 self._match_text_seq("TABLE") 5743 5744 return self.expression( 5745 exp.Predict, 5746 this=this, 5747 expression=self._parse_table(), 5748 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5749 ) 5750 5751 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5752 args = self._parse_csv(self._parse_table) 5753 return exp.JoinHint(this=func_name.upper(), expressions=args) 5754 5755 def _parse_substring(self) -> exp.Substring: 5756 # Postgres supports the form: substring(string [from int] [for int]) 5757 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5758 5759 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5760 5761 if self._match(TokenType.FROM): 5762 args.append(self._parse_bitwise()) 5763 if self._match(TokenType.FOR): 5764 if len(args) == 1: 5765 args.append(exp.Literal.number(1)) 5766 args.append(self._parse_bitwise()) 5767 5768 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5769 5770 def _parse_trim(self) -> exp.Trim: 5771 # https://www.w3resource.com/sql/character-functions/trim.php 5772 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5773 5774 position = None 5775 collation = None 5776 expression = None 5777 5778 if self._match_texts(self.TRIM_TYPES): 5779 position = self._prev.text.upper() 5780 5781 this = self._parse_bitwise() 5782 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5783 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5784 expression = self._parse_bitwise() 5785 5786 if invert_order: 5787 this, expression = expression, this 5788 5789 if self._match(TokenType.COLLATE): 5790 collation = self._parse_bitwise() 5791 5792 return self.expression( 5793 exp.Trim, this=this, position=position, expression=expression, collation=collation 5794 ) 5795 5796 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5797 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5798 5799 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5800 return self._parse_window(self._parse_id_var(), alias=True) 5801 5802 def _parse_respect_or_ignore_nulls( 5803 self, this: t.Optional[exp.Expression] 5804 ) -> t.Optional[exp.Expression]: 5805 if self._match_text_seq("IGNORE", "NULLS"): 5806 return self.expression(exp.IgnoreNulls, this=this) 5807 if self._match_text_seq("RESPECT", "NULLS"): 5808 return self.expression(exp.RespectNulls, this=this) 5809 return this 5810 5811 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5812 if self._match(TokenType.HAVING): 5813 self._match_texts(("MAX", "MIN")) 5814 max = self._prev.text.upper() != "MIN" 5815 return self.expression( 5816 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5817 ) 5818 5819 return this 5820 5821 def _parse_window( 5822 self, this: t.Optional[exp.Expression], alias: bool = False 5823 ) -> t.Optional[exp.Expression]: 5824 func = this 5825 comments = func.comments if isinstance(func, exp.Expression) else None 5826 5827 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5828 self._match(TokenType.WHERE) 5829 this = self.expression( 5830 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5831 ) 5832 self._match_r_paren() 5833 5834 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5835 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5836 if self._match_text_seq("WITHIN", "GROUP"): 5837 order = self._parse_wrapped(self._parse_order) 5838 this = self.expression(exp.WithinGroup, this=this, expression=order) 5839 5840 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5841 # Some dialects choose to implement and some do not. 5842 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5843 5844 # There is some code above in _parse_lambda that handles 5845 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5846 5847 # The below changes handle 5848 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5849 5850 # Oracle allows both formats 5851 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5852 # and Snowflake chose to do the same for familiarity 5853 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5854 if isinstance(this, exp.AggFunc): 5855 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5856 5857 if ignore_respect and ignore_respect is not this: 5858 ignore_respect.replace(ignore_respect.this) 5859 this = self.expression(ignore_respect.__class__, this=this) 5860 5861 this = self._parse_respect_or_ignore_nulls(this) 5862 5863 # bigquery select from window x AS (partition by ...) 5864 if alias: 5865 over = None 5866 self._match(TokenType.ALIAS) 5867 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5868 return this 5869 else: 5870 over = self._prev.text.upper() 5871 5872 if comments and isinstance(func, exp.Expression): 5873 func.pop_comments() 5874 5875 if not self._match(TokenType.L_PAREN): 5876 return self.expression( 5877 exp.Window, 5878 comments=comments, 5879 this=this, 5880 alias=self._parse_id_var(False), 5881 over=over, 5882 ) 5883 5884 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5885 5886 first = self._match(TokenType.FIRST) 5887 if self._match_text_seq("LAST"): 5888 first = False 5889 5890 partition, order = self._parse_partition_and_order() 5891 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5892 5893 if kind: 5894 self._match(TokenType.BETWEEN) 5895 start = self._parse_window_spec() 5896 self._match(TokenType.AND) 5897 end = self._parse_window_spec() 5898 5899 spec = self.expression( 5900 exp.WindowSpec, 5901 kind=kind, 5902 start=start["value"], 5903 start_side=start["side"], 5904 end=end["value"], 5905 end_side=end["side"], 5906 ) 5907 else: 5908 spec = None 5909 5910 self._match_r_paren() 5911 5912 window = self.expression( 5913 exp.Window, 5914 comments=comments, 5915 this=this, 5916 partition_by=partition, 5917 order=order, 5918 spec=spec, 5919 alias=window_alias, 5920 over=over, 5921 first=first, 5922 ) 5923 5924 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5925 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5926 return self._parse_window(window, alias=alias) 5927 5928 return window 5929 5930 def _parse_partition_and_order( 5931 self, 5932 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5933 return self._parse_partition_by(), self._parse_order() 5934 5935 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5936 self._match(TokenType.BETWEEN) 5937 5938 return { 5939 "value": ( 5940 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5941 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5942 or self._parse_bitwise() 5943 ), 5944 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5945 } 5946 5947 def _parse_alias( 5948 self, this: t.Optional[exp.Expression], explicit: bool = False 5949 ) -> t.Optional[exp.Expression]: 5950 any_token = self._match(TokenType.ALIAS) 5951 comments = self._prev_comments or [] 5952 5953 if explicit and not any_token: 5954 return this 5955 5956 if self._match(TokenType.L_PAREN): 5957 aliases = self.expression( 5958 exp.Aliases, 5959 comments=comments, 5960 this=this, 5961 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5962 ) 5963 self._match_r_paren(aliases) 5964 return aliases 5965 5966 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5967 self.STRING_ALIASES and self._parse_string_as_identifier() 5968 ) 5969 5970 if alias: 5971 comments.extend(alias.pop_comments()) 5972 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5973 column = this.this 5974 5975 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5976 if not this.comments and column and column.comments: 5977 this.comments = column.pop_comments() 5978 5979 return this 5980 5981 def _parse_id_var( 5982 self, 5983 any_token: bool = True, 5984 tokens: t.Optional[t.Collection[TokenType]] = None, 5985 ) -> t.Optional[exp.Expression]: 5986 expression = self._parse_identifier() 5987 if not expression and ( 5988 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5989 ): 5990 quoted = self._prev.token_type == TokenType.STRING 5991 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5992 5993 return expression 5994 5995 def _parse_string(self) -> t.Optional[exp.Expression]: 5996 if self._match_set(self.STRING_PARSERS): 5997 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5998 return self._parse_placeholder() 5999 6000 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6001 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6002 6003 def _parse_number(self) -> t.Optional[exp.Expression]: 6004 if self._match_set(self.NUMERIC_PARSERS): 6005 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6006 return self._parse_placeholder() 6007 6008 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6009 if self._match(TokenType.IDENTIFIER): 6010 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6011 return self._parse_placeholder() 6012 6013 def _parse_var( 6014 self, 6015 any_token: bool = False, 6016 tokens: t.Optional[t.Collection[TokenType]] = None, 6017 upper: bool = False, 6018 ) -> t.Optional[exp.Expression]: 6019 if ( 6020 (any_token and self._advance_any()) 6021 or self._match(TokenType.VAR) 6022 or (self._match_set(tokens) if tokens else False) 6023 ): 6024 return self.expression( 6025 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6026 ) 6027 return self._parse_placeholder() 6028 6029 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6030 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6031 self._advance() 6032 return self._prev 6033 return None 6034 6035 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6036 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6037 6038 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6039 return self._parse_primary() or self._parse_var(any_token=True) 6040 6041 def _parse_null(self) -> t.Optional[exp.Expression]: 6042 if self._match_set(self.NULL_TOKENS): 6043 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6044 return self._parse_placeholder() 6045 6046 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6047 if self._match(TokenType.TRUE): 6048 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6049 if self._match(TokenType.FALSE): 6050 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6051 return self._parse_placeholder() 6052 6053 def _parse_star(self) -> t.Optional[exp.Expression]: 6054 if self._match(TokenType.STAR): 6055 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6056 return self._parse_placeholder() 6057 6058 def _parse_parameter(self) -> exp.Parameter: 6059 this = self._parse_identifier() or self._parse_primary_or_var() 6060 return self.expression(exp.Parameter, this=this) 6061 6062 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6063 if self._match_set(self.PLACEHOLDER_PARSERS): 6064 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6065 if placeholder: 6066 return placeholder 6067 self._advance(-1) 6068 return None 6069 6070 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6071 if not self._match_texts(keywords): 6072 return None 6073 if self._match(TokenType.L_PAREN, advance=False): 6074 return self._parse_wrapped_csv(self._parse_expression) 6075 6076 expression = self._parse_expression() 6077 return [expression] if expression else None 6078 6079 def _parse_csv( 6080 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6081 ) -> t.List[exp.Expression]: 6082 parse_result = parse_method() 6083 items = [parse_result] if parse_result is not None else [] 6084 6085 while self._match(sep): 6086 self._add_comments(parse_result) 6087 parse_result = parse_method() 6088 if parse_result is not None: 6089 items.append(parse_result) 6090 6091 return items 6092 6093 def _parse_tokens( 6094 self, parse_method: t.Callable, expressions: t.Dict 6095 ) -> t.Optional[exp.Expression]: 6096 this = parse_method() 6097 6098 while self._match_set(expressions): 6099 this = self.expression( 6100 expressions[self._prev.token_type], 6101 this=this, 6102 comments=self._prev_comments, 6103 expression=parse_method(), 6104 ) 6105 6106 return this 6107 6108 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6109 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6110 6111 def _parse_wrapped_csv( 6112 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6113 ) -> t.List[exp.Expression]: 6114 return self._parse_wrapped( 6115 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6116 ) 6117 6118 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6119 wrapped = self._match(TokenType.L_PAREN) 6120 if not wrapped and not optional: 6121 self.raise_error("Expecting (") 6122 parse_result = parse_method() 6123 if wrapped: 6124 self._match_r_paren() 6125 return parse_result 6126 6127 def _parse_expressions(self) -> t.List[exp.Expression]: 6128 return self._parse_csv(self._parse_expression) 6129 6130 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6131 return self._parse_select() or self._parse_set_operations( 6132 self._parse_expression() if alias else self._parse_assignment() 6133 ) 6134 6135 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6136 return self._parse_query_modifiers( 6137 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6138 ) 6139 6140 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6141 this = None 6142 if self._match_texts(self.TRANSACTION_KIND): 6143 this = self._prev.text 6144 6145 self._match_texts(("TRANSACTION", "WORK")) 6146 6147 modes = [] 6148 while True: 6149 mode = [] 6150 while self._match(TokenType.VAR): 6151 mode.append(self._prev.text) 6152 6153 if mode: 6154 modes.append(" ".join(mode)) 6155 if not self._match(TokenType.COMMA): 6156 break 6157 6158 return self.expression(exp.Transaction, this=this, modes=modes) 6159 6160 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6161 chain = None 6162 savepoint = None 6163 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6164 6165 self._match_texts(("TRANSACTION", "WORK")) 6166 6167 if self._match_text_seq("TO"): 6168 self._match_text_seq("SAVEPOINT") 6169 savepoint = self._parse_id_var() 6170 6171 if self._match(TokenType.AND): 6172 chain = not self._match_text_seq("NO") 6173 self._match_text_seq("CHAIN") 6174 6175 if is_rollback: 6176 return self.expression(exp.Rollback, savepoint=savepoint) 6177 6178 return self.expression(exp.Commit, chain=chain) 6179 6180 def _parse_refresh(self) -> exp.Refresh: 6181 self._match(TokenType.TABLE) 6182 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6183 6184 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6185 if not self._match_text_seq("ADD"): 6186 return None 6187 6188 self._match(TokenType.COLUMN) 6189 exists_column = self._parse_exists(not_=True) 6190 expression = self._parse_field_def() 6191 6192 if expression: 6193 expression.set("exists", exists_column) 6194 6195 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6196 if self._match_texts(("FIRST", "AFTER")): 6197 position = self._prev.text 6198 column_position = self.expression( 6199 exp.ColumnPosition, this=self._parse_column(), position=position 6200 ) 6201 expression.set("position", column_position) 6202 6203 return expression 6204 6205 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6206 drop = self._match(TokenType.DROP) and self._parse_drop() 6207 if drop and not isinstance(drop, exp.Command): 6208 drop.set("kind", drop.args.get("kind", "COLUMN")) 6209 return drop 6210 6211 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6212 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6213 return self.expression( 6214 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6215 ) 6216 6217 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6218 index = self._index - 1 6219 6220 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6221 return self._parse_csv( 6222 lambda: self.expression( 6223 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6224 ) 6225 ) 6226 6227 self._retreat(index) 6228 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6229 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6230 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6231 6232 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6233 if self._match_texts(self.ALTER_ALTER_PARSERS): 6234 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6235 6236 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6237 # keyword after ALTER we default to parsing this statement 6238 self._match(TokenType.COLUMN) 6239 column = self._parse_field(any_token=True) 6240 6241 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6242 return self.expression(exp.AlterColumn, this=column, drop=True) 6243 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6244 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6245 if self._match(TokenType.COMMENT): 6246 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6247 if self._match_text_seq("DROP", "NOT", "NULL"): 6248 return self.expression( 6249 exp.AlterColumn, 6250 this=column, 6251 drop=True, 6252 allow_null=True, 6253 ) 6254 if self._match_text_seq("SET", "NOT", "NULL"): 6255 return self.expression( 6256 exp.AlterColumn, 6257 this=column, 6258 allow_null=False, 6259 ) 6260 self._match_text_seq("SET", "DATA") 6261 self._match_text_seq("TYPE") 6262 return self.expression( 6263 exp.AlterColumn, 6264 this=column, 6265 dtype=self._parse_types(), 6266 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6267 using=self._match(TokenType.USING) and self._parse_assignment(), 6268 ) 6269 6270 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6271 if self._match_texts(("ALL", "EVEN", "AUTO")): 6272 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6273 6274 self._match_text_seq("KEY", "DISTKEY") 6275 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6276 6277 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6278 if compound: 6279 self._match_text_seq("SORTKEY") 6280 6281 if self._match(TokenType.L_PAREN, advance=False): 6282 return self.expression( 6283 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6284 ) 6285 6286 self._match_texts(("AUTO", "NONE")) 6287 return self.expression( 6288 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6289 ) 6290 6291 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6292 index = self._index - 1 6293 6294 partition_exists = self._parse_exists() 6295 if self._match(TokenType.PARTITION, advance=False): 6296 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6297 6298 self._retreat(index) 6299 return self._parse_csv(self._parse_drop_column) 6300 6301 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6302 if self._match(TokenType.COLUMN): 6303 exists = self._parse_exists() 6304 old_column = self._parse_column() 6305 to = self._match_text_seq("TO") 6306 new_column = self._parse_column() 6307 6308 if old_column is None or to is None or new_column is None: 6309 return None 6310 6311 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6312 6313 self._match_text_seq("TO") 6314 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6315 6316 def _parse_alter_table_set(self) -> exp.AlterSet: 6317 alter_set = self.expression(exp.AlterSet) 6318 6319 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6320 "TABLE", "PROPERTIES" 6321 ): 6322 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6323 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6324 alter_set.set("expressions", [self._parse_assignment()]) 6325 elif self._match_texts(("LOGGED", "UNLOGGED")): 6326 alter_set.set("option", exp.var(self._prev.text.upper())) 6327 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6328 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6329 elif self._match_text_seq("LOCATION"): 6330 alter_set.set("location", self._parse_field()) 6331 elif self._match_text_seq("ACCESS", "METHOD"): 6332 alter_set.set("access_method", self._parse_field()) 6333 elif self._match_text_seq("TABLESPACE"): 6334 alter_set.set("tablespace", self._parse_field()) 6335 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6336 alter_set.set("file_format", [self._parse_field()]) 6337 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6338 alter_set.set("file_format", self._parse_wrapped_options()) 6339 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6340 alter_set.set("copy_options", self._parse_wrapped_options()) 6341 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6342 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6343 else: 6344 if self._match_text_seq("SERDE"): 6345 alter_set.set("serde", self._parse_field()) 6346 6347 alter_set.set("expressions", [self._parse_properties()]) 6348 6349 return alter_set 6350 6351 def _parse_alter(self) -> exp.AlterTable | exp.Command: 6352 start = self._prev 6353 6354 if not self._match(TokenType.TABLE): 6355 return self._parse_as_command(start) 6356 6357 exists = self._parse_exists() 6358 only = self._match_text_seq("ONLY") 6359 this = self._parse_table(schema=True) 6360 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6361 6362 if self._next: 6363 self._advance() 6364 6365 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6366 if parser: 6367 actions = ensure_list(parser(self)) 6368 options = self._parse_csv(self._parse_property) 6369 6370 if not self._curr and actions: 6371 return self.expression( 6372 exp.AlterTable, 6373 this=this, 6374 exists=exists, 6375 actions=actions, 6376 only=only, 6377 options=options, 6378 cluster=cluster, 6379 ) 6380 6381 return self._parse_as_command(start) 6382 6383 def _parse_merge(self) -> exp.Merge: 6384 self._match(TokenType.INTO) 6385 target = self._parse_table() 6386 6387 if target and self._match(TokenType.ALIAS, advance=False): 6388 target.set("alias", self._parse_table_alias()) 6389 6390 self._match(TokenType.USING) 6391 using = self._parse_table() 6392 6393 self._match(TokenType.ON) 6394 on = self._parse_assignment() 6395 6396 return self.expression( 6397 exp.Merge, 6398 this=target, 6399 using=using, 6400 on=on, 6401 expressions=self._parse_when_matched(), 6402 ) 6403 6404 def _parse_when_matched(self) -> t.List[exp.When]: 6405 whens = [] 6406 6407 while self._match(TokenType.WHEN): 6408 matched = not self._match(TokenType.NOT) 6409 self._match_text_seq("MATCHED") 6410 source = ( 6411 False 6412 if self._match_text_seq("BY", "TARGET") 6413 else self._match_text_seq("BY", "SOURCE") 6414 ) 6415 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6416 6417 self._match(TokenType.THEN) 6418 6419 if self._match(TokenType.INSERT): 6420 _this = self._parse_star() 6421 if _this: 6422 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6423 else: 6424 then = self.expression( 6425 exp.Insert, 6426 this=self._parse_value(), 6427 expression=self._match_text_seq("VALUES") and self._parse_value(), 6428 ) 6429 elif self._match(TokenType.UPDATE): 6430 expressions = self._parse_star() 6431 if expressions: 6432 then = self.expression(exp.Update, expressions=expressions) 6433 else: 6434 then = self.expression( 6435 exp.Update, 6436 expressions=self._match(TokenType.SET) 6437 and self._parse_csv(self._parse_equality), 6438 ) 6439 elif self._match(TokenType.DELETE): 6440 then = self.expression(exp.Var, this=self._prev.text) 6441 else: 6442 then = None 6443 6444 whens.append( 6445 self.expression( 6446 exp.When, 6447 matched=matched, 6448 source=source, 6449 condition=condition, 6450 then=then, 6451 ) 6452 ) 6453 return whens 6454 6455 def _parse_show(self) -> t.Optional[exp.Expression]: 6456 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6457 if parser: 6458 return parser(self) 6459 return self._parse_as_command(self._prev) 6460 6461 def _parse_set_item_assignment( 6462 self, kind: t.Optional[str] = None 6463 ) -> t.Optional[exp.Expression]: 6464 index = self._index 6465 6466 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6467 return self._parse_set_transaction(global_=kind == "GLOBAL") 6468 6469 left = self._parse_primary() or self._parse_column() 6470 assignment_delimiter = self._match_texts(("=", "TO")) 6471 6472 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6473 self._retreat(index) 6474 return None 6475 6476 right = self._parse_statement() or self._parse_id_var() 6477 if isinstance(right, (exp.Column, exp.Identifier)): 6478 right = exp.var(right.name) 6479 6480 this = self.expression(exp.EQ, this=left, expression=right) 6481 return self.expression(exp.SetItem, this=this, kind=kind) 6482 6483 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6484 self._match_text_seq("TRANSACTION") 6485 characteristics = self._parse_csv( 6486 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6487 ) 6488 return self.expression( 6489 exp.SetItem, 6490 expressions=characteristics, 6491 kind="TRANSACTION", 6492 **{"global": global_}, # type: ignore 6493 ) 6494 6495 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6496 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6497 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6498 6499 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6500 index = self._index 6501 set_ = self.expression( 6502 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6503 ) 6504 6505 if self._curr: 6506 self._retreat(index) 6507 return self._parse_as_command(self._prev) 6508 6509 return set_ 6510 6511 def _parse_var_from_options( 6512 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6513 ) -> t.Optional[exp.Var]: 6514 start = self._curr 6515 if not start: 6516 return None 6517 6518 option = start.text.upper() 6519 continuations = options.get(option) 6520 6521 index = self._index 6522 self._advance() 6523 for keywords in continuations or []: 6524 if isinstance(keywords, str): 6525 keywords = (keywords,) 6526 6527 if self._match_text_seq(*keywords): 6528 option = f"{option} {' '.join(keywords)}" 6529 break 6530 else: 6531 if continuations or continuations is None: 6532 if raise_unmatched: 6533 self.raise_error(f"Unknown option {option}") 6534 6535 self._retreat(index) 6536 return None 6537 6538 return exp.var(option) 6539 6540 def _parse_as_command(self, start: Token) -> exp.Command: 6541 while self._curr: 6542 self._advance() 6543 text = self._find_sql(start, self._prev) 6544 size = len(start.text) 6545 self._warn_unsupported() 6546 return exp.Command(this=text[:size], expression=text[size:]) 6547 6548 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6549 settings = [] 6550 6551 self._match_l_paren() 6552 kind = self._parse_id_var() 6553 6554 if self._match(TokenType.L_PAREN): 6555 while True: 6556 key = self._parse_id_var() 6557 value = self._parse_primary() 6558 6559 if not key and value is None: 6560 break 6561 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6562 self._match(TokenType.R_PAREN) 6563 6564 self._match_r_paren() 6565 6566 return self.expression( 6567 exp.DictProperty, 6568 this=this, 6569 kind=kind.this if kind else None, 6570 settings=settings, 6571 ) 6572 6573 def _parse_dict_range(self, this: str) -> exp.DictRange: 6574 self._match_l_paren() 6575 has_min = self._match_text_seq("MIN") 6576 if has_min: 6577 min = self._parse_var() or self._parse_primary() 6578 self._match_text_seq("MAX") 6579 max = self._parse_var() or self._parse_primary() 6580 else: 6581 max = self._parse_var() or self._parse_primary() 6582 min = exp.Literal.number(0) 6583 self._match_r_paren() 6584 return self.expression(exp.DictRange, this=this, min=min, max=max) 6585 6586 def _parse_comprehension( 6587 self, this: t.Optional[exp.Expression] 6588 ) -> t.Optional[exp.Comprehension]: 6589 index = self._index 6590 expression = self._parse_column() 6591 if not self._match(TokenType.IN): 6592 self._retreat(index - 1) 6593 return None 6594 iterator = self._parse_column() 6595 condition = self._parse_assignment() if self._match_text_seq("IF") else None 6596 return self.expression( 6597 exp.Comprehension, 6598 this=this, 6599 expression=expression, 6600 iterator=iterator, 6601 condition=condition, 6602 ) 6603 6604 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6605 if self._match(TokenType.HEREDOC_STRING): 6606 return self.expression(exp.Heredoc, this=self._prev.text) 6607 6608 if not self._match_text_seq("$"): 6609 return None 6610 6611 tags = ["$"] 6612 tag_text = None 6613 6614 if self._is_connected(): 6615 self._advance() 6616 tags.append(self._prev.text.upper()) 6617 else: 6618 self.raise_error("No closing $ found") 6619 6620 if tags[-1] != "$": 6621 if self._is_connected() and self._match_text_seq("$"): 6622 tag_text = tags[-1] 6623 tags.append("$") 6624 else: 6625 self.raise_error("No closing $ found") 6626 6627 heredoc_start = self._curr 6628 6629 while self._curr: 6630 if self._match_text_seq(*tags, advance=False): 6631 this = self._find_sql(heredoc_start, self._prev) 6632 self._advance(len(tags)) 6633 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6634 6635 self._advance() 6636 6637 self.raise_error(f"No closing {''.join(tags)} found") 6638 return None 6639 6640 def _find_parser( 6641 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6642 ) -> t.Optional[t.Callable]: 6643 if not self._curr: 6644 return None 6645 6646 index = self._index 6647 this = [] 6648 while True: 6649 # The current token might be multiple words 6650 curr = self._curr.text.upper() 6651 key = curr.split(" ") 6652 this.append(curr) 6653 6654 self._advance() 6655 result, trie = in_trie(trie, key) 6656 if result == TrieResult.FAILED: 6657 break 6658 6659 if result == TrieResult.EXISTS: 6660 subparser = parsers[" ".join(this)] 6661 return subparser 6662 6663 self._retreat(index) 6664 return None 6665 6666 def _match(self, token_type, advance=True, expression=None): 6667 if not self._curr: 6668 return None 6669 6670 if self._curr.token_type == token_type: 6671 if advance: 6672 self._advance() 6673 self._add_comments(expression) 6674 return True 6675 6676 return None 6677 6678 def _match_set(self, types, advance=True): 6679 if not self._curr: 6680 return None 6681 6682 if self._curr.token_type in types: 6683 if advance: 6684 self._advance() 6685 return True 6686 6687 return None 6688 6689 def _match_pair(self, token_type_a, token_type_b, advance=True): 6690 if not self._curr or not self._next: 6691 return None 6692 6693 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6694 if advance: 6695 self._advance(2) 6696 return True 6697 6698 return None 6699 6700 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6701 if not self._match(TokenType.L_PAREN, expression=expression): 6702 self.raise_error("Expecting (") 6703 6704 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6705 if not self._match(TokenType.R_PAREN, expression=expression): 6706 self.raise_error("Expecting )") 6707 6708 def _match_texts(self, texts, advance=True): 6709 if self._curr and self._curr.text.upper() in texts: 6710 if advance: 6711 self._advance() 6712 return True 6713 return None 6714 6715 def _match_text_seq(self, *texts, advance=True): 6716 index = self._index 6717 for text in texts: 6718 if self._curr and self._curr.text.upper() == text: 6719 self._advance() 6720 else: 6721 self._retreat(index) 6722 return None 6723 6724 if not advance: 6725 self._retreat(index) 6726 6727 return True 6728 6729 def _replace_lambda( 6730 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6731 ) -> t.Optional[exp.Expression]: 6732 if not node: 6733 return node 6734 6735 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6736 6737 for column in node.find_all(exp.Column): 6738 typ = lambda_types.get(column.parts[0].name) 6739 if typ is not None: 6740 dot_or_id = column.to_dot() if column.table else column.this 6741 6742 if typ: 6743 dot_or_id = self.expression( 6744 exp.Cast, 6745 this=dot_or_id, 6746 to=typ, 6747 ) 6748 6749 parent = column.parent 6750 6751 while isinstance(parent, exp.Dot): 6752 if not isinstance(parent.parent, exp.Dot): 6753 parent.replace(dot_or_id) 6754 break 6755 parent = parent.parent 6756 else: 6757 if column is node: 6758 node = dot_or_id 6759 else: 6760 column.replace(dot_or_id) 6761 return node 6762 6763 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6764 start = self._prev 6765 6766 # Not to be confused with TRUNCATE(number, decimals) function call 6767 if self._match(TokenType.L_PAREN): 6768 self._retreat(self._index - 2) 6769 return self._parse_function() 6770 6771 # Clickhouse supports TRUNCATE DATABASE as well 6772 is_database = self._match(TokenType.DATABASE) 6773 6774 self._match(TokenType.TABLE) 6775 6776 exists = self._parse_exists(not_=False) 6777 6778 expressions = self._parse_csv( 6779 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6780 ) 6781 6782 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6783 6784 if self._match_text_seq("RESTART", "IDENTITY"): 6785 identity = "RESTART" 6786 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6787 identity = "CONTINUE" 6788 else: 6789 identity = None 6790 6791 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6792 option = self._prev.text 6793 else: 6794 option = None 6795 6796 partition = self._parse_partition() 6797 6798 # Fallback case 6799 if self._curr: 6800 return self._parse_as_command(start) 6801 6802 return self.expression( 6803 exp.TruncateTable, 6804 expressions=expressions, 6805 is_database=is_database, 6806 exists=exists, 6807 cluster=cluster, 6808 identity=identity, 6809 option=option, 6810 partition=partition, 6811 ) 6812 6813 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6814 this = self._parse_ordered(self._parse_opclass) 6815 6816 if not self._match(TokenType.WITH): 6817 return this 6818 6819 op = self._parse_var(any_token=True) 6820 6821 return self.expression(exp.WithOperator, this=this, op=op) 6822 6823 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6824 self._match(TokenType.EQ) 6825 self._match(TokenType.L_PAREN) 6826 6827 opts: t.List[t.Optional[exp.Expression]] = [] 6828 while self._curr and not self._match(TokenType.R_PAREN): 6829 if self._match_text_seq("FORMAT_NAME", "="): 6830 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 6831 # so we parse it separately to use _parse_field() 6832 prop = self.expression( 6833 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 6834 ) 6835 opts.append(prop) 6836 else: 6837 opts.append(self._parse_property()) 6838 6839 self._match(TokenType.COMMA) 6840 6841 return opts 6842 6843 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6844 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6845 6846 options = [] 6847 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6848 option = self._parse_var(any_token=True) 6849 prev = self._prev.text.upper() 6850 6851 # Different dialects might separate options and values by white space, "=" and "AS" 6852 self._match(TokenType.EQ) 6853 self._match(TokenType.ALIAS) 6854 6855 param = self.expression(exp.CopyParameter, this=option) 6856 6857 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 6858 TokenType.L_PAREN, advance=False 6859 ): 6860 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 6861 param.set("expressions", self._parse_wrapped_options()) 6862 elif prev == "FILE_FORMAT": 6863 # T-SQL's external file format case 6864 param.set("expression", self._parse_field()) 6865 else: 6866 param.set("expression", self._parse_unquoted_field()) 6867 6868 options.append(param) 6869 self._match(sep) 6870 6871 return options 6872 6873 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6874 expr = self.expression(exp.Credentials) 6875 6876 if self._match_text_seq("STORAGE_INTEGRATION", "="): 6877 expr.set("storage", self._parse_field()) 6878 if self._match_text_seq("CREDENTIALS"): 6879 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 6880 creds = ( 6881 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6882 ) 6883 expr.set("credentials", creds) 6884 if self._match_text_seq("ENCRYPTION"): 6885 expr.set("encryption", self._parse_wrapped_options()) 6886 if self._match_text_seq("IAM_ROLE"): 6887 expr.set("iam_role", self._parse_field()) 6888 if self._match_text_seq("REGION"): 6889 expr.set("region", self._parse_field()) 6890 6891 return expr 6892 6893 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6894 return self._parse_field() 6895 6896 def _parse_copy(self) -> exp.Copy | exp.Command: 6897 start = self._prev 6898 6899 self._match(TokenType.INTO) 6900 6901 this = ( 6902 self._parse_select(nested=True, parse_subquery_alias=False) 6903 if self._match(TokenType.L_PAREN, advance=False) 6904 else self._parse_table(schema=True) 6905 ) 6906 6907 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6908 6909 files = self._parse_csv(self._parse_file_location) 6910 credentials = self._parse_credentials() 6911 6912 self._match_text_seq("WITH") 6913 6914 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6915 6916 # Fallback case 6917 if self._curr: 6918 return self._parse_as_command(start) 6919 6920 return self.expression( 6921 exp.Copy, 6922 this=this, 6923 kind=kind, 6924 credentials=credentials, 6925 files=files, 6926 params=params, 6927 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range
58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder
101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression)
131class Parser(metaclass=_Parser): 132 """ 133 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 134 135 Args: 136 error_level: The desired error level. 137 Default: ErrorLevel.IMMEDIATE 138 error_message_context: The amount of context to capture from a query string when displaying 139 the error message (in number of characters). 140 Default: 100 141 max_errors: Maximum number of error messages to include in a raised ParseError. 142 This is only relevant if error_level is ErrorLevel.RAISE. 143 Default: 3 144 """ 145 146 FUNCTIONS: t.Dict[str, t.Callable] = { 147 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 148 "CONCAT": lambda args, dialect: exp.Concat( 149 expressions=args, 150 safe=not dialect.STRICT_STRING_CONCAT, 151 coalesce=dialect.CONCAT_COALESCE, 152 ), 153 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 154 expressions=args, 155 safe=not dialect.STRICT_STRING_CONCAT, 156 coalesce=dialect.CONCAT_COALESCE, 157 ), 158 "DATE_TO_DATE_STR": lambda args: exp.Cast( 159 this=seq_get(args, 0), 160 to=exp.DataType(this=exp.DataType.Type.TEXT), 161 ), 162 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 163 "HEX": build_hex, 164 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 165 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 166 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 167 "LIKE": build_like, 168 "LOG": build_logarithm, 169 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 170 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 171 "LOWER": build_lower, 172 "LPAD": lambda args: build_pad(args), 173 "LEFTPAD": lambda args: build_pad(args), 174 "MOD": build_mod, 175 "RPAD": lambda args: build_pad(args, is_left=False), 176 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 177 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 178 if len(args) != 2 179 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 180 "TIME_TO_TIME_STR": lambda args: exp.Cast( 181 this=seq_get(args, 0), 182 to=exp.DataType(this=exp.DataType.Type.TEXT), 183 ), 184 "TO_HEX": build_hex, 185 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 186 this=exp.Cast( 187 this=seq_get(args, 0), 188 to=exp.DataType(this=exp.DataType.Type.TEXT), 189 ), 190 start=exp.Literal.number(1), 191 length=exp.Literal.number(10), 192 ), 193 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 194 "UPPER": build_upper, 195 "VAR_MAP": build_var_map, 196 } 197 198 NO_PAREN_FUNCTIONS = { 199 TokenType.CURRENT_DATE: exp.CurrentDate, 200 TokenType.CURRENT_DATETIME: exp.CurrentDate, 201 TokenType.CURRENT_TIME: exp.CurrentTime, 202 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 203 TokenType.CURRENT_USER: exp.CurrentUser, 204 } 205 206 STRUCT_TYPE_TOKENS = { 207 TokenType.NESTED, 208 TokenType.OBJECT, 209 TokenType.STRUCT, 210 } 211 212 NESTED_TYPE_TOKENS = { 213 TokenType.ARRAY, 214 TokenType.LIST, 215 TokenType.LOWCARDINALITY, 216 TokenType.MAP, 217 TokenType.NULLABLE, 218 *STRUCT_TYPE_TOKENS, 219 } 220 221 ENUM_TYPE_TOKENS = { 222 TokenType.ENUM, 223 TokenType.ENUM8, 224 TokenType.ENUM16, 225 } 226 227 AGGREGATE_TYPE_TOKENS = { 228 TokenType.AGGREGATEFUNCTION, 229 TokenType.SIMPLEAGGREGATEFUNCTION, 230 } 231 232 TYPE_TOKENS = { 233 TokenType.BIT, 234 TokenType.BOOLEAN, 235 TokenType.TINYINT, 236 TokenType.UTINYINT, 237 TokenType.SMALLINT, 238 TokenType.USMALLINT, 239 TokenType.INT, 240 TokenType.UINT, 241 TokenType.BIGINT, 242 TokenType.UBIGINT, 243 TokenType.INT128, 244 TokenType.UINT128, 245 TokenType.INT256, 246 TokenType.UINT256, 247 TokenType.MEDIUMINT, 248 TokenType.UMEDIUMINT, 249 TokenType.FIXEDSTRING, 250 TokenType.FLOAT, 251 TokenType.DOUBLE, 252 TokenType.CHAR, 253 TokenType.NCHAR, 254 TokenType.VARCHAR, 255 TokenType.NVARCHAR, 256 TokenType.BPCHAR, 257 TokenType.TEXT, 258 TokenType.MEDIUMTEXT, 259 TokenType.LONGTEXT, 260 TokenType.MEDIUMBLOB, 261 TokenType.LONGBLOB, 262 TokenType.BINARY, 263 TokenType.VARBINARY, 264 TokenType.JSON, 265 TokenType.JSONB, 266 TokenType.INTERVAL, 267 TokenType.TINYBLOB, 268 TokenType.TINYTEXT, 269 TokenType.TIME, 270 TokenType.TIMETZ, 271 TokenType.TIMESTAMP, 272 TokenType.TIMESTAMP_S, 273 TokenType.TIMESTAMP_MS, 274 TokenType.TIMESTAMP_NS, 275 TokenType.TIMESTAMPTZ, 276 TokenType.TIMESTAMPLTZ, 277 TokenType.TIMESTAMPNTZ, 278 TokenType.DATETIME, 279 TokenType.DATETIME64, 280 TokenType.DATE, 281 TokenType.DATE32, 282 TokenType.INT4RANGE, 283 TokenType.INT4MULTIRANGE, 284 TokenType.INT8RANGE, 285 TokenType.INT8MULTIRANGE, 286 TokenType.NUMRANGE, 287 TokenType.NUMMULTIRANGE, 288 TokenType.TSRANGE, 289 TokenType.TSMULTIRANGE, 290 TokenType.TSTZRANGE, 291 TokenType.TSTZMULTIRANGE, 292 TokenType.DATERANGE, 293 TokenType.DATEMULTIRANGE, 294 TokenType.DECIMAL, 295 TokenType.UDECIMAL, 296 TokenType.BIGDECIMAL, 297 TokenType.UUID, 298 TokenType.GEOGRAPHY, 299 TokenType.GEOMETRY, 300 TokenType.HLLSKETCH, 301 TokenType.HSTORE, 302 TokenType.PSEUDO_TYPE, 303 TokenType.SUPER, 304 TokenType.SERIAL, 305 TokenType.SMALLSERIAL, 306 TokenType.BIGSERIAL, 307 TokenType.XML, 308 TokenType.YEAR, 309 TokenType.UNIQUEIDENTIFIER, 310 TokenType.USERDEFINED, 311 TokenType.MONEY, 312 TokenType.SMALLMONEY, 313 TokenType.ROWVERSION, 314 TokenType.IMAGE, 315 TokenType.VARIANT, 316 TokenType.VECTOR, 317 TokenType.OBJECT, 318 TokenType.OBJECT_IDENTIFIER, 319 TokenType.INET, 320 TokenType.IPADDRESS, 321 TokenType.IPPREFIX, 322 TokenType.IPV4, 323 TokenType.IPV6, 324 TokenType.UNKNOWN, 325 TokenType.NULL, 326 TokenType.NAME, 327 TokenType.TDIGEST, 328 *ENUM_TYPE_TOKENS, 329 *NESTED_TYPE_TOKENS, 330 *AGGREGATE_TYPE_TOKENS, 331 } 332 333 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 334 TokenType.BIGINT: TokenType.UBIGINT, 335 TokenType.INT: TokenType.UINT, 336 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 337 TokenType.SMALLINT: TokenType.USMALLINT, 338 TokenType.TINYINT: TokenType.UTINYINT, 339 TokenType.DECIMAL: TokenType.UDECIMAL, 340 } 341 342 SUBQUERY_PREDICATES = { 343 TokenType.ANY: exp.Any, 344 TokenType.ALL: exp.All, 345 TokenType.EXISTS: exp.Exists, 346 TokenType.SOME: exp.Any, 347 } 348 349 RESERVED_TOKENS = { 350 *Tokenizer.SINGLE_TOKENS.values(), 351 TokenType.SELECT, 352 } - {TokenType.IDENTIFIER} 353 354 DB_CREATABLES = { 355 TokenType.DATABASE, 356 TokenType.DICTIONARY, 357 TokenType.MODEL, 358 TokenType.SCHEMA, 359 TokenType.SEQUENCE, 360 TokenType.STORAGE_INTEGRATION, 361 TokenType.TABLE, 362 TokenType.TAG, 363 TokenType.VIEW, 364 TokenType.WAREHOUSE, 365 TokenType.STREAMLIT, 366 } 367 368 CREATABLES = { 369 TokenType.COLUMN, 370 TokenType.CONSTRAINT, 371 TokenType.FOREIGN_KEY, 372 TokenType.FUNCTION, 373 TokenType.INDEX, 374 TokenType.PROCEDURE, 375 *DB_CREATABLES, 376 } 377 378 # Tokens that can represent identifiers 379 ID_VAR_TOKENS = { 380 TokenType.VAR, 381 TokenType.ANTI, 382 TokenType.APPLY, 383 TokenType.ASC, 384 TokenType.ASOF, 385 TokenType.AUTO_INCREMENT, 386 TokenType.BEGIN, 387 TokenType.BPCHAR, 388 TokenType.CACHE, 389 TokenType.CASE, 390 TokenType.COLLATE, 391 TokenType.COMMAND, 392 TokenType.COMMENT, 393 TokenType.COMMIT, 394 TokenType.CONSTRAINT, 395 TokenType.COPY, 396 TokenType.DEFAULT, 397 TokenType.DELETE, 398 TokenType.DESC, 399 TokenType.DESCRIBE, 400 TokenType.DICTIONARY, 401 TokenType.DIV, 402 TokenType.END, 403 TokenType.EXECUTE, 404 TokenType.ESCAPE, 405 TokenType.FALSE, 406 TokenType.FIRST, 407 TokenType.FILTER, 408 TokenType.FINAL, 409 TokenType.FORMAT, 410 TokenType.FULL, 411 TokenType.IDENTIFIER, 412 TokenType.IS, 413 TokenType.ISNULL, 414 TokenType.INTERVAL, 415 TokenType.KEEP, 416 TokenType.KILL, 417 TokenType.LEFT, 418 TokenType.LOAD, 419 TokenType.MERGE, 420 TokenType.NATURAL, 421 TokenType.NEXT, 422 TokenType.OFFSET, 423 TokenType.OPERATOR, 424 TokenType.ORDINALITY, 425 TokenType.OVERLAPS, 426 TokenType.OVERWRITE, 427 TokenType.PARTITION, 428 TokenType.PERCENT, 429 TokenType.PIVOT, 430 TokenType.PRAGMA, 431 TokenType.RANGE, 432 TokenType.RECURSIVE, 433 TokenType.REFERENCES, 434 TokenType.REFRESH, 435 TokenType.REPLACE, 436 TokenType.RIGHT, 437 TokenType.ROLLUP, 438 TokenType.ROW, 439 TokenType.ROWS, 440 TokenType.SEMI, 441 TokenType.SET, 442 TokenType.SETTINGS, 443 TokenType.SHOW, 444 TokenType.TEMPORARY, 445 TokenType.TOP, 446 TokenType.TRUE, 447 TokenType.TRUNCATE, 448 TokenType.UNIQUE, 449 TokenType.UNNEST, 450 TokenType.UNPIVOT, 451 TokenType.UPDATE, 452 TokenType.USE, 453 TokenType.VOLATILE, 454 TokenType.WINDOW, 455 *CREATABLES, 456 *SUBQUERY_PREDICATES, 457 *TYPE_TOKENS, 458 *NO_PAREN_FUNCTIONS, 459 } 460 461 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 462 463 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 464 TokenType.ANTI, 465 TokenType.APPLY, 466 TokenType.ASOF, 467 TokenType.FULL, 468 TokenType.LEFT, 469 TokenType.LOCK, 470 TokenType.NATURAL, 471 TokenType.OFFSET, 472 TokenType.RIGHT, 473 TokenType.SEMI, 474 TokenType.WINDOW, 475 } 476 477 ALIAS_TOKENS = ID_VAR_TOKENS 478 479 ARRAY_CONSTRUCTORS = { 480 "ARRAY": exp.Array, 481 "LIST": exp.List, 482 } 483 484 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 485 486 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 487 488 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 489 490 FUNC_TOKENS = { 491 TokenType.COLLATE, 492 TokenType.COMMAND, 493 TokenType.CURRENT_DATE, 494 TokenType.CURRENT_DATETIME, 495 TokenType.CURRENT_TIMESTAMP, 496 TokenType.CURRENT_TIME, 497 TokenType.CURRENT_USER, 498 TokenType.FILTER, 499 TokenType.FIRST, 500 TokenType.FORMAT, 501 TokenType.GLOB, 502 TokenType.IDENTIFIER, 503 TokenType.INDEX, 504 TokenType.ISNULL, 505 TokenType.ILIKE, 506 TokenType.INSERT, 507 TokenType.LIKE, 508 TokenType.MERGE, 509 TokenType.OFFSET, 510 TokenType.PRIMARY_KEY, 511 TokenType.RANGE, 512 TokenType.REPLACE, 513 TokenType.RLIKE, 514 TokenType.ROW, 515 TokenType.UNNEST, 516 TokenType.VAR, 517 TokenType.LEFT, 518 TokenType.RIGHT, 519 TokenType.SEQUENCE, 520 TokenType.DATE, 521 TokenType.DATETIME, 522 TokenType.TABLE, 523 TokenType.TIMESTAMP, 524 TokenType.TIMESTAMPTZ, 525 TokenType.TRUNCATE, 526 TokenType.WINDOW, 527 TokenType.XOR, 528 *TYPE_TOKENS, 529 *SUBQUERY_PREDICATES, 530 } 531 532 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 533 TokenType.AND: exp.And, 534 } 535 536 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 537 TokenType.COLON_EQ: exp.PropertyEQ, 538 } 539 540 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 541 TokenType.OR: exp.Or, 542 } 543 544 EQUALITY = { 545 TokenType.EQ: exp.EQ, 546 TokenType.NEQ: exp.NEQ, 547 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 548 } 549 550 COMPARISON = { 551 TokenType.GT: exp.GT, 552 TokenType.GTE: exp.GTE, 553 TokenType.LT: exp.LT, 554 TokenType.LTE: exp.LTE, 555 } 556 557 BITWISE = { 558 TokenType.AMP: exp.BitwiseAnd, 559 TokenType.CARET: exp.BitwiseXor, 560 TokenType.PIPE: exp.BitwiseOr, 561 } 562 563 TERM = { 564 TokenType.DASH: exp.Sub, 565 TokenType.PLUS: exp.Add, 566 TokenType.MOD: exp.Mod, 567 TokenType.COLLATE: exp.Collate, 568 } 569 570 FACTOR = { 571 TokenType.DIV: exp.IntDiv, 572 TokenType.LR_ARROW: exp.Distance, 573 TokenType.SLASH: exp.Div, 574 TokenType.STAR: exp.Mul, 575 } 576 577 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 578 579 TIMES = { 580 TokenType.TIME, 581 TokenType.TIMETZ, 582 } 583 584 TIMESTAMPS = { 585 TokenType.TIMESTAMP, 586 TokenType.TIMESTAMPTZ, 587 TokenType.TIMESTAMPLTZ, 588 *TIMES, 589 } 590 591 SET_OPERATIONS = { 592 TokenType.UNION, 593 TokenType.INTERSECT, 594 TokenType.EXCEPT, 595 } 596 597 JOIN_METHODS = { 598 TokenType.ASOF, 599 TokenType.NATURAL, 600 TokenType.POSITIONAL, 601 } 602 603 JOIN_SIDES = { 604 TokenType.LEFT, 605 TokenType.RIGHT, 606 TokenType.FULL, 607 } 608 609 JOIN_KINDS = { 610 TokenType.ANTI, 611 TokenType.CROSS, 612 TokenType.INNER, 613 TokenType.OUTER, 614 TokenType.SEMI, 615 TokenType.STRAIGHT_JOIN, 616 } 617 618 JOIN_HINTS: t.Set[str] = set() 619 620 LAMBDAS = { 621 TokenType.ARROW: lambda self, expressions: self.expression( 622 exp.Lambda, 623 this=self._replace_lambda( 624 self._parse_assignment(), 625 expressions, 626 ), 627 expressions=expressions, 628 ), 629 TokenType.FARROW: lambda self, expressions: self.expression( 630 exp.Kwarg, 631 this=exp.var(expressions[0].name), 632 expression=self._parse_assignment(), 633 ), 634 } 635 636 COLUMN_OPERATORS = { 637 TokenType.DOT: None, 638 TokenType.DCOLON: lambda self, this, to: self.expression( 639 exp.Cast if self.STRICT_CAST else exp.TryCast, 640 this=this, 641 to=to, 642 ), 643 TokenType.ARROW: lambda self, this, path: self.expression( 644 exp.JSONExtract, 645 this=this, 646 expression=self.dialect.to_json_path(path), 647 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 648 ), 649 TokenType.DARROW: lambda self, this, path: self.expression( 650 exp.JSONExtractScalar, 651 this=this, 652 expression=self.dialect.to_json_path(path), 653 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 654 ), 655 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 656 exp.JSONBExtract, 657 this=this, 658 expression=path, 659 ), 660 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 661 exp.JSONBExtractScalar, 662 this=this, 663 expression=path, 664 ), 665 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 666 exp.JSONBContains, 667 this=this, 668 expression=key, 669 ), 670 } 671 672 EXPRESSION_PARSERS = { 673 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 674 exp.Column: lambda self: self._parse_column(), 675 exp.Condition: lambda self: self._parse_assignment(), 676 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 677 exp.Expression: lambda self: self._parse_expression(), 678 exp.From: lambda self: self._parse_from(joins=True), 679 exp.Group: lambda self: self._parse_group(), 680 exp.Having: lambda self: self._parse_having(), 681 exp.Identifier: lambda self: self._parse_id_var(), 682 exp.Join: lambda self: self._parse_join(), 683 exp.Lambda: lambda self: self._parse_lambda(), 684 exp.Lateral: lambda self: self._parse_lateral(), 685 exp.Limit: lambda self: self._parse_limit(), 686 exp.Offset: lambda self: self._parse_offset(), 687 exp.Order: lambda self: self._parse_order(), 688 exp.Ordered: lambda self: self._parse_ordered(), 689 exp.Properties: lambda self: self._parse_properties(), 690 exp.Qualify: lambda self: self._parse_qualify(), 691 exp.Returning: lambda self: self._parse_returning(), 692 exp.Select: lambda self: self._parse_select(), 693 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 694 exp.Table: lambda self: self._parse_table_parts(), 695 exp.TableAlias: lambda self: self._parse_table_alias(), 696 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 697 exp.Where: lambda self: self._parse_where(), 698 exp.Window: lambda self: self._parse_named_window(), 699 exp.With: lambda self: self._parse_with(), 700 "JOIN_TYPE": lambda self: self._parse_join_parts(), 701 } 702 703 STATEMENT_PARSERS = { 704 TokenType.ALTER: lambda self: self._parse_alter(), 705 TokenType.BEGIN: lambda self: self._parse_transaction(), 706 TokenType.CACHE: lambda self: self._parse_cache(), 707 TokenType.COMMENT: lambda self: self._parse_comment(), 708 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 709 TokenType.COPY: lambda self: self._parse_copy(), 710 TokenType.CREATE: lambda self: self._parse_create(), 711 TokenType.DELETE: lambda self: self._parse_delete(), 712 TokenType.DESC: lambda self: self._parse_describe(), 713 TokenType.DESCRIBE: lambda self: self._parse_describe(), 714 TokenType.DROP: lambda self: self._parse_drop(), 715 TokenType.INSERT: lambda self: self._parse_insert(), 716 TokenType.KILL: lambda self: self._parse_kill(), 717 TokenType.LOAD: lambda self: self._parse_load(), 718 TokenType.MERGE: lambda self: self._parse_merge(), 719 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 720 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 721 TokenType.REFRESH: lambda self: self._parse_refresh(), 722 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 723 TokenType.SET: lambda self: self._parse_set(), 724 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 725 TokenType.UNCACHE: lambda self: self._parse_uncache(), 726 TokenType.UPDATE: lambda self: self._parse_update(), 727 TokenType.USE: lambda self: self.expression( 728 exp.Use, 729 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 730 this=self._parse_table(schema=False), 731 ), 732 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 733 } 734 735 UNARY_PARSERS = { 736 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 737 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 738 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 739 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 740 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 741 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 742 } 743 744 STRING_PARSERS = { 745 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 746 exp.RawString, this=token.text 747 ), 748 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 749 exp.National, this=token.text 750 ), 751 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 752 TokenType.STRING: lambda self, token: self.expression( 753 exp.Literal, this=token.text, is_string=True 754 ), 755 TokenType.UNICODE_STRING: lambda self, token: self.expression( 756 exp.UnicodeString, 757 this=token.text, 758 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 759 ), 760 } 761 762 NUMERIC_PARSERS = { 763 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 764 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 765 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 766 TokenType.NUMBER: lambda self, token: self.expression( 767 exp.Literal, this=token.text, is_string=False 768 ), 769 } 770 771 PRIMARY_PARSERS = { 772 **STRING_PARSERS, 773 **NUMERIC_PARSERS, 774 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 775 TokenType.NULL: lambda self, _: self.expression(exp.Null), 776 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 777 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 778 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 779 TokenType.STAR: lambda self, _: self.expression( 780 exp.Star, 781 **{ 782 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 783 "replace": self._parse_star_op("REPLACE"), 784 "rename": self._parse_star_op("RENAME"), 785 }, 786 ), 787 } 788 789 PLACEHOLDER_PARSERS = { 790 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 791 TokenType.PARAMETER: lambda self: self._parse_parameter(), 792 TokenType.COLON: lambda self: ( 793 self.expression(exp.Placeholder, this=self._prev.text) 794 if self._match_set(self.ID_VAR_TOKENS) 795 else None 796 ), 797 } 798 799 RANGE_PARSERS = { 800 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 801 TokenType.GLOB: binary_range_parser(exp.Glob), 802 TokenType.ILIKE: binary_range_parser(exp.ILike), 803 TokenType.IN: lambda self, this: self._parse_in(this), 804 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 805 TokenType.IS: lambda self, this: self._parse_is(this), 806 TokenType.LIKE: binary_range_parser(exp.Like), 807 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 808 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 809 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 810 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 811 } 812 813 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 814 "ALLOWED_VALUES": lambda self: self.expression( 815 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 816 ), 817 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 818 "AUTO": lambda self: self._parse_auto_property(), 819 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 820 "BACKUP": lambda self: self.expression( 821 exp.BackupProperty, this=self._parse_var(any_token=True) 822 ), 823 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 824 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 825 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 826 "CHECKSUM": lambda self: self._parse_checksum(), 827 "CLUSTER BY": lambda self: self._parse_cluster(), 828 "CLUSTERED": lambda self: self._parse_clustered_by(), 829 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 830 exp.CollateProperty, **kwargs 831 ), 832 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 833 "CONTAINS": lambda self: self._parse_contains_property(), 834 "COPY": lambda self: self._parse_copy_property(), 835 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 836 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 837 "DEFINER": lambda self: self._parse_definer(), 838 "DETERMINISTIC": lambda self: self.expression( 839 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 840 ), 841 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 842 "DISTKEY": lambda self: self._parse_distkey(), 843 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 844 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 845 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 846 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 847 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 848 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 849 "FREESPACE": lambda self: self._parse_freespace(), 850 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 851 "HEAP": lambda self: self.expression(exp.HeapProperty), 852 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 853 "IMMUTABLE": lambda self: self.expression( 854 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 855 ), 856 "INHERITS": lambda self: self.expression( 857 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 858 ), 859 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 860 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 861 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 862 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 863 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 864 "LIKE": lambda self: self._parse_create_like(), 865 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 866 "LOCK": lambda self: self._parse_locking(), 867 "LOCKING": lambda self: self._parse_locking(), 868 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 869 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 870 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 871 "MODIFIES": lambda self: self._parse_modifies_property(), 872 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 873 "NO": lambda self: self._parse_no_property(), 874 "ON": lambda self: self._parse_on_property(), 875 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 876 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 877 "PARTITION": lambda self: self._parse_partitioned_of(), 878 "PARTITION BY": lambda self: self._parse_partitioned_by(), 879 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 880 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 881 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 882 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 883 "READS": lambda self: self._parse_reads_property(), 884 "REMOTE": lambda self: self._parse_remote_with_connection(), 885 "RETURNS": lambda self: self._parse_returns(), 886 "STRICT": lambda self: self.expression(exp.StrictProperty), 887 "ROW": lambda self: self._parse_row(), 888 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 889 "SAMPLE": lambda self: self.expression( 890 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 891 ), 892 "SECURE": lambda self: self.expression(exp.SecureProperty), 893 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 894 "SETTINGS": lambda self: self.expression( 895 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 896 ), 897 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 898 "SORTKEY": lambda self: self._parse_sortkey(), 899 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 900 "STABLE": lambda self: self.expression( 901 exp.StabilityProperty, this=exp.Literal.string("STABLE") 902 ), 903 "STORED": lambda self: self._parse_stored(), 904 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 905 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 906 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 907 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 908 "TO": lambda self: self._parse_to_table(), 909 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 910 "TRANSFORM": lambda self: self.expression( 911 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 912 ), 913 "TTL": lambda self: self._parse_ttl(), 914 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 915 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 916 "VOLATILE": lambda self: self._parse_volatile_property(), 917 "WITH": lambda self: self._parse_with_property(), 918 } 919 920 CONSTRAINT_PARSERS = { 921 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 922 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 923 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 924 "CHARACTER SET": lambda self: self.expression( 925 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 926 ), 927 "CHECK": lambda self: self.expression( 928 exp.CheckColumnConstraint, 929 this=self._parse_wrapped(self._parse_assignment), 930 enforced=self._match_text_seq("ENFORCED"), 931 ), 932 "COLLATE": lambda self: self.expression( 933 exp.CollateColumnConstraint, this=self._parse_var(any_token=True) 934 ), 935 "COMMENT": lambda self: self.expression( 936 exp.CommentColumnConstraint, this=self._parse_string() 937 ), 938 "COMPRESS": lambda self: self._parse_compress(), 939 "CLUSTERED": lambda self: self.expression( 940 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 941 ), 942 "NONCLUSTERED": lambda self: self.expression( 943 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 944 ), 945 "DEFAULT": lambda self: self.expression( 946 exp.DefaultColumnConstraint, this=self._parse_bitwise() 947 ), 948 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 949 "EPHEMERAL": lambda self: self.expression( 950 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 951 ), 952 "EXCLUDE": lambda self: self.expression( 953 exp.ExcludeColumnConstraint, this=self._parse_index_params() 954 ), 955 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 956 "FORMAT": lambda self: self.expression( 957 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 958 ), 959 "GENERATED": lambda self: self._parse_generated_as_identity(), 960 "IDENTITY": lambda self: self._parse_auto_increment(), 961 "INLINE": lambda self: self._parse_inline(), 962 "LIKE": lambda self: self._parse_create_like(), 963 "NOT": lambda self: self._parse_not_constraint(), 964 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 965 "ON": lambda self: ( 966 self._match(TokenType.UPDATE) 967 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 968 ) 969 or self.expression(exp.OnProperty, this=self._parse_id_var()), 970 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 971 "PERIOD": lambda self: self._parse_period_for_system_time(), 972 "PRIMARY KEY": lambda self: self._parse_primary_key(), 973 "REFERENCES": lambda self: self._parse_references(match=False), 974 "TITLE": lambda self: self.expression( 975 exp.TitleColumnConstraint, this=self._parse_var_or_string() 976 ), 977 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 978 "UNIQUE": lambda self: self._parse_unique(), 979 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 980 "WITH": lambda self: self.expression( 981 exp.Properties, expressions=self._parse_wrapped_properties() 982 ), 983 } 984 985 ALTER_PARSERS = { 986 "ADD": lambda self: self._parse_alter_table_add(), 987 "ALTER": lambda self: self._parse_alter_table_alter(), 988 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 989 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 990 "DROP": lambda self: self._parse_alter_table_drop(), 991 "RENAME": lambda self: self._parse_alter_table_rename(), 992 "SET": lambda self: self._parse_alter_table_set(), 993 } 994 995 ALTER_ALTER_PARSERS = { 996 "DISTKEY": lambda self: self._parse_alter_diststyle(), 997 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 998 "SORTKEY": lambda self: self._parse_alter_sortkey(), 999 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1000 } 1001 1002 SCHEMA_UNNAMED_CONSTRAINTS = { 1003 "CHECK", 1004 "EXCLUDE", 1005 "FOREIGN KEY", 1006 "LIKE", 1007 "PERIOD", 1008 "PRIMARY KEY", 1009 "UNIQUE", 1010 } 1011 1012 NO_PAREN_FUNCTION_PARSERS = { 1013 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1014 "CASE": lambda self: self._parse_case(), 1015 "CONNECT_BY_ROOT": lambda self: self.expression( 1016 exp.ConnectByRoot, this=self._parse_column() 1017 ), 1018 "IF": lambda self: self._parse_if(), 1019 "NEXT": lambda self: self._parse_next_value_for(), 1020 } 1021 1022 INVALID_FUNC_NAME_TOKENS = { 1023 TokenType.IDENTIFIER, 1024 TokenType.STRING, 1025 } 1026 1027 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1028 1029 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1030 1031 FUNCTION_PARSERS = { 1032 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1033 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1034 "DECODE": lambda self: self._parse_decode(), 1035 "EXTRACT": lambda self: self._parse_extract(), 1036 "GAP_FILL": lambda self: self._parse_gap_fill(), 1037 "JSON_OBJECT": lambda self: self._parse_json_object(), 1038 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1039 "JSON_TABLE": lambda self: self._parse_json_table(), 1040 "MATCH": lambda self: self._parse_match_against(), 1041 "OPENJSON": lambda self: self._parse_open_json(), 1042 "POSITION": lambda self: self._parse_position(), 1043 "PREDICT": lambda self: self._parse_predict(), 1044 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1045 "STRING_AGG": lambda self: self._parse_string_agg(), 1046 "SUBSTRING": lambda self: self._parse_substring(), 1047 "TRIM": lambda self: self._parse_trim(), 1048 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1049 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1050 } 1051 1052 QUERY_MODIFIER_PARSERS = { 1053 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1054 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1055 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1056 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1057 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1058 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1059 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1060 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1061 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1062 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1063 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1064 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1065 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1066 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1067 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1068 TokenType.CLUSTER_BY: lambda self: ( 1069 "cluster", 1070 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1071 ), 1072 TokenType.DISTRIBUTE_BY: lambda self: ( 1073 "distribute", 1074 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1075 ), 1076 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1077 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1078 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1079 } 1080 1081 SET_PARSERS = { 1082 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1083 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1084 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1085 "TRANSACTION": lambda self: self._parse_set_transaction(), 1086 } 1087 1088 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1089 1090 TYPE_LITERAL_PARSERS = { 1091 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1092 } 1093 1094 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1095 1096 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1097 1098 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1099 1100 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1101 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1102 "ISOLATION": ( 1103 ("LEVEL", "REPEATABLE", "READ"), 1104 ("LEVEL", "READ", "COMMITTED"), 1105 ("LEVEL", "READ", "UNCOMITTED"), 1106 ("LEVEL", "SERIALIZABLE"), 1107 ), 1108 "READ": ("WRITE", "ONLY"), 1109 } 1110 1111 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1112 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1113 ) 1114 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1115 1116 CREATE_SEQUENCE: OPTIONS_TYPE = { 1117 "SCALE": ("EXTEND", "NOEXTEND"), 1118 "SHARD": ("EXTEND", "NOEXTEND"), 1119 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1120 **dict.fromkeys( 1121 ( 1122 "SESSION", 1123 "GLOBAL", 1124 "KEEP", 1125 "NOKEEP", 1126 "ORDER", 1127 "NOORDER", 1128 "NOCACHE", 1129 "CYCLE", 1130 "NOCYCLE", 1131 "NOMINVALUE", 1132 "NOMAXVALUE", 1133 "NOSCALE", 1134 "NOSHARD", 1135 ), 1136 tuple(), 1137 ), 1138 } 1139 1140 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1141 1142 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1143 1144 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1145 1146 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1147 "TYPE": ("EVOLUTION",), 1148 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1149 } 1150 1151 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1152 1153 CLONE_KEYWORDS = {"CLONE", "COPY"} 1154 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1155 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1156 1157 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1158 1159 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1160 1161 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1162 1163 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1164 1165 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1166 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1167 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1168 1169 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1170 1171 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1172 1173 ADD_CONSTRAINT_TOKENS = { 1174 TokenType.CONSTRAINT, 1175 TokenType.FOREIGN_KEY, 1176 TokenType.INDEX, 1177 TokenType.KEY, 1178 TokenType.PRIMARY_KEY, 1179 TokenType.UNIQUE, 1180 } 1181 1182 DISTINCT_TOKENS = {TokenType.DISTINCT} 1183 1184 NULL_TOKENS = {TokenType.NULL} 1185 1186 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1187 1188 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1189 1190 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1191 1192 STRICT_CAST = True 1193 1194 PREFIXED_PIVOT_COLUMNS = False 1195 IDENTIFY_PIVOT_STRINGS = False 1196 1197 LOG_DEFAULTS_TO_LN = False 1198 1199 # Whether ADD is present for each column added by ALTER TABLE 1200 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1201 1202 # Whether the table sample clause expects CSV syntax 1203 TABLESAMPLE_CSV = False 1204 1205 # The default method used for table sampling 1206 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1207 1208 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1209 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1210 1211 # Whether the TRIM function expects the characters to trim as its first argument 1212 TRIM_PATTERN_FIRST = False 1213 1214 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1215 STRING_ALIASES = False 1216 1217 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1218 MODIFIERS_ATTACHED_TO_SET_OP = True 1219 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1220 1221 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1222 NO_PAREN_IF_COMMANDS = True 1223 1224 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1225 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1226 1227 # Whether the `:` operator is used to extract a value from a VARIANT column 1228 COLON_IS_VARIANT_EXTRACT = False 1229 1230 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1231 # If this is True and '(' is not found, the keyword will be treated as an identifier 1232 VALUES_FOLLOWED_BY_PAREN = True 1233 1234 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1235 SUPPORTS_IMPLICIT_UNNEST = False 1236 1237 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1238 INTERVAL_SPANS = True 1239 1240 # Whether a PARTITION clause can follow a table reference 1241 SUPPORTS_PARTITION_SELECTION = False 1242 1243 __slots__ = ( 1244 "error_level", 1245 "error_message_context", 1246 "max_errors", 1247 "dialect", 1248 "sql", 1249 "errors", 1250 "_tokens", 1251 "_index", 1252 "_curr", 1253 "_next", 1254 "_prev", 1255 "_prev_comments", 1256 ) 1257 1258 # Autofilled 1259 SHOW_TRIE: t.Dict = {} 1260 SET_TRIE: t.Dict = {} 1261 1262 def __init__( 1263 self, 1264 error_level: t.Optional[ErrorLevel] = None, 1265 error_message_context: int = 100, 1266 max_errors: int = 3, 1267 dialect: DialectType = None, 1268 ): 1269 from sqlglot.dialects import Dialect 1270 1271 self.error_level = error_level or ErrorLevel.IMMEDIATE 1272 self.error_message_context = error_message_context 1273 self.max_errors = max_errors 1274 self.dialect = Dialect.get_or_raise(dialect) 1275 self.reset() 1276 1277 def reset(self): 1278 self.sql = "" 1279 self.errors = [] 1280 self._tokens = [] 1281 self._index = 0 1282 self._curr = None 1283 self._next = None 1284 self._prev = None 1285 self._prev_comments = None 1286 1287 def parse( 1288 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1289 ) -> t.List[t.Optional[exp.Expression]]: 1290 """ 1291 Parses a list of tokens and returns a list of syntax trees, one tree 1292 per parsed SQL statement. 1293 1294 Args: 1295 raw_tokens: The list of tokens. 1296 sql: The original SQL string, used to produce helpful debug messages. 1297 1298 Returns: 1299 The list of the produced syntax trees. 1300 """ 1301 return self._parse( 1302 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1303 ) 1304 1305 def parse_into( 1306 self, 1307 expression_types: exp.IntoType, 1308 raw_tokens: t.List[Token], 1309 sql: t.Optional[str] = None, 1310 ) -> t.List[t.Optional[exp.Expression]]: 1311 """ 1312 Parses a list of tokens into a given Expression type. If a collection of Expression 1313 types is given instead, this method will try to parse the token list into each one 1314 of them, stopping at the first for which the parsing succeeds. 1315 1316 Args: 1317 expression_types: The expression type(s) to try and parse the token list into. 1318 raw_tokens: The list of tokens. 1319 sql: The original SQL string, used to produce helpful debug messages. 1320 1321 Returns: 1322 The target Expression. 1323 """ 1324 errors = [] 1325 for expression_type in ensure_list(expression_types): 1326 parser = self.EXPRESSION_PARSERS.get(expression_type) 1327 if not parser: 1328 raise TypeError(f"No parser registered for {expression_type}") 1329 1330 try: 1331 return self._parse(parser, raw_tokens, sql) 1332 except ParseError as e: 1333 e.errors[0]["into_expression"] = expression_type 1334 errors.append(e) 1335 1336 raise ParseError( 1337 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1338 errors=merge_errors(errors), 1339 ) from errors[-1] 1340 1341 def _parse( 1342 self, 1343 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1344 raw_tokens: t.List[Token], 1345 sql: t.Optional[str] = None, 1346 ) -> t.List[t.Optional[exp.Expression]]: 1347 self.reset() 1348 self.sql = sql or "" 1349 1350 total = len(raw_tokens) 1351 chunks: t.List[t.List[Token]] = [[]] 1352 1353 for i, token in enumerate(raw_tokens): 1354 if token.token_type == TokenType.SEMICOLON: 1355 if token.comments: 1356 chunks.append([token]) 1357 1358 if i < total - 1: 1359 chunks.append([]) 1360 else: 1361 chunks[-1].append(token) 1362 1363 expressions = [] 1364 1365 for tokens in chunks: 1366 self._index = -1 1367 self._tokens = tokens 1368 self._advance() 1369 1370 expressions.append(parse_method(self)) 1371 1372 if self._index < len(self._tokens): 1373 self.raise_error("Invalid expression / Unexpected token") 1374 1375 self.check_errors() 1376 1377 return expressions 1378 1379 def check_errors(self) -> None: 1380 """Logs or raises any found errors, depending on the chosen error level setting.""" 1381 if self.error_level == ErrorLevel.WARN: 1382 for error in self.errors: 1383 logger.error(str(error)) 1384 elif self.error_level == ErrorLevel.RAISE and self.errors: 1385 raise ParseError( 1386 concat_messages(self.errors, self.max_errors), 1387 errors=merge_errors(self.errors), 1388 ) 1389 1390 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1391 """ 1392 Appends an error in the list of recorded errors or raises it, depending on the chosen 1393 error level setting. 1394 """ 1395 token = token or self._curr or self._prev or Token.string("") 1396 start = token.start 1397 end = token.end + 1 1398 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1399 highlight = self.sql[start:end] 1400 end_context = self.sql[end : end + self.error_message_context] 1401 1402 error = ParseError.new( 1403 f"{message}. Line {token.line}, Col: {token.col}.\n" 1404 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1405 description=message, 1406 line=token.line, 1407 col=token.col, 1408 start_context=start_context, 1409 highlight=highlight, 1410 end_context=end_context, 1411 ) 1412 1413 if self.error_level == ErrorLevel.IMMEDIATE: 1414 raise error 1415 1416 self.errors.append(error) 1417 1418 def expression( 1419 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1420 ) -> E: 1421 """ 1422 Creates a new, validated Expression. 1423 1424 Args: 1425 exp_class: The expression class to instantiate. 1426 comments: An optional list of comments to attach to the expression. 1427 kwargs: The arguments to set for the expression along with their respective values. 1428 1429 Returns: 1430 The target expression. 1431 """ 1432 instance = exp_class(**kwargs) 1433 instance.add_comments(comments) if comments else self._add_comments(instance) 1434 return self.validate_expression(instance) 1435 1436 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1437 if expression and self._prev_comments: 1438 expression.add_comments(self._prev_comments) 1439 self._prev_comments = None 1440 1441 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1442 """ 1443 Validates an Expression, making sure that all its mandatory arguments are set. 1444 1445 Args: 1446 expression: The expression to validate. 1447 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1448 1449 Returns: 1450 The validated expression. 1451 """ 1452 if self.error_level != ErrorLevel.IGNORE: 1453 for error_message in expression.error_messages(args): 1454 self.raise_error(error_message) 1455 1456 return expression 1457 1458 def _find_sql(self, start: Token, end: Token) -> str: 1459 return self.sql[start.start : end.end + 1] 1460 1461 def _is_connected(self) -> bool: 1462 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1463 1464 def _advance(self, times: int = 1) -> None: 1465 self._index += times 1466 self._curr = seq_get(self._tokens, self._index) 1467 self._next = seq_get(self._tokens, self._index + 1) 1468 1469 if self._index > 0: 1470 self._prev = self._tokens[self._index - 1] 1471 self._prev_comments = self._prev.comments 1472 else: 1473 self._prev = None 1474 self._prev_comments = None 1475 1476 def _retreat(self, index: int) -> None: 1477 if index != self._index: 1478 self._advance(index - self._index) 1479 1480 def _warn_unsupported(self) -> None: 1481 if len(self._tokens) <= 1: 1482 return 1483 1484 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1485 # interested in emitting a warning for the one being currently processed. 1486 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1487 1488 logger.warning( 1489 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1490 ) 1491 1492 def _parse_command(self) -> exp.Command: 1493 self._warn_unsupported() 1494 return self.expression( 1495 exp.Command, 1496 comments=self._prev_comments, 1497 this=self._prev.text.upper(), 1498 expression=self._parse_string(), 1499 ) 1500 1501 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1502 """ 1503 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1504 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1505 solve this by setting & resetting the parser state accordingly 1506 """ 1507 index = self._index 1508 error_level = self.error_level 1509 1510 self.error_level = ErrorLevel.IMMEDIATE 1511 try: 1512 this = parse_method() 1513 except ParseError: 1514 this = None 1515 finally: 1516 if not this or retreat: 1517 self._retreat(index) 1518 self.error_level = error_level 1519 1520 return this 1521 1522 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1523 start = self._prev 1524 exists = self._parse_exists() if allow_exists else None 1525 1526 self._match(TokenType.ON) 1527 1528 materialized = self._match_text_seq("MATERIALIZED") 1529 kind = self._match_set(self.CREATABLES) and self._prev 1530 if not kind: 1531 return self._parse_as_command(start) 1532 1533 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1534 this = self._parse_user_defined_function(kind=kind.token_type) 1535 elif kind.token_type == TokenType.TABLE: 1536 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1537 elif kind.token_type == TokenType.COLUMN: 1538 this = self._parse_column() 1539 else: 1540 this = self._parse_id_var() 1541 1542 self._match(TokenType.IS) 1543 1544 return self.expression( 1545 exp.Comment, 1546 this=this, 1547 kind=kind.text, 1548 expression=self._parse_string(), 1549 exists=exists, 1550 materialized=materialized, 1551 ) 1552 1553 def _parse_to_table( 1554 self, 1555 ) -> exp.ToTableProperty: 1556 table = self._parse_table_parts(schema=True) 1557 return self.expression(exp.ToTableProperty, this=table) 1558 1559 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1560 def _parse_ttl(self) -> exp.Expression: 1561 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1562 this = self._parse_bitwise() 1563 1564 if self._match_text_seq("DELETE"): 1565 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1566 if self._match_text_seq("RECOMPRESS"): 1567 return self.expression( 1568 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1569 ) 1570 if self._match_text_seq("TO", "DISK"): 1571 return self.expression( 1572 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1573 ) 1574 if self._match_text_seq("TO", "VOLUME"): 1575 return self.expression( 1576 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1577 ) 1578 1579 return this 1580 1581 expressions = self._parse_csv(_parse_ttl_action) 1582 where = self._parse_where() 1583 group = self._parse_group() 1584 1585 aggregates = None 1586 if group and self._match(TokenType.SET): 1587 aggregates = self._parse_csv(self._parse_set_item) 1588 1589 return self.expression( 1590 exp.MergeTreeTTL, 1591 expressions=expressions, 1592 where=where, 1593 group=group, 1594 aggregates=aggregates, 1595 ) 1596 1597 def _parse_statement(self) -> t.Optional[exp.Expression]: 1598 if self._curr is None: 1599 return None 1600 1601 if self._match_set(self.STATEMENT_PARSERS): 1602 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1603 1604 if self._match_set(self.dialect.tokenizer.COMMANDS): 1605 return self._parse_command() 1606 1607 expression = self._parse_expression() 1608 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1609 return self._parse_query_modifiers(expression) 1610 1611 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1612 start = self._prev 1613 temporary = self._match(TokenType.TEMPORARY) 1614 materialized = self._match_text_seq("MATERIALIZED") 1615 1616 kind = self._match_set(self.CREATABLES) and self._prev.text 1617 if not kind: 1618 return self._parse_as_command(start) 1619 1620 if_exists = exists or self._parse_exists() 1621 table = self._parse_table_parts( 1622 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1623 ) 1624 1625 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1626 1627 if self._match(TokenType.L_PAREN, advance=False): 1628 expressions = self._parse_wrapped_csv(self._parse_types) 1629 else: 1630 expressions = None 1631 1632 return self.expression( 1633 exp.Drop, 1634 comments=start.comments, 1635 exists=if_exists, 1636 this=table, 1637 expressions=expressions, 1638 kind=kind.upper(), 1639 temporary=temporary, 1640 materialized=materialized, 1641 cascade=self._match_text_seq("CASCADE"), 1642 constraints=self._match_text_seq("CONSTRAINTS"), 1643 purge=self._match_text_seq("PURGE"), 1644 cluster=cluster, 1645 ) 1646 1647 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1648 return ( 1649 self._match_text_seq("IF") 1650 and (not not_ or self._match(TokenType.NOT)) 1651 and self._match(TokenType.EXISTS) 1652 ) 1653 1654 def _parse_create(self) -> exp.Create | exp.Command: 1655 # Note: this can't be None because we've matched a statement parser 1656 start = self._prev 1657 comments = self._prev_comments 1658 1659 replace = ( 1660 start.token_type == TokenType.REPLACE 1661 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1662 or self._match_pair(TokenType.OR, TokenType.ALTER) 1663 ) 1664 1665 unique = self._match(TokenType.UNIQUE) 1666 1667 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1668 self._advance() 1669 1670 properties = None 1671 create_token = self._match_set(self.CREATABLES) and self._prev 1672 1673 if not create_token: 1674 # exp.Properties.Location.POST_CREATE 1675 properties = self._parse_properties() 1676 create_token = self._match_set(self.CREATABLES) and self._prev 1677 1678 if not properties or not create_token: 1679 return self._parse_as_command(start) 1680 1681 exists = self._parse_exists(not_=True) 1682 this = None 1683 expression: t.Optional[exp.Expression] = None 1684 indexes = None 1685 no_schema_binding = None 1686 begin = None 1687 end = None 1688 clone = None 1689 1690 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1691 nonlocal properties 1692 if properties and temp_props: 1693 properties.expressions.extend(temp_props.expressions) 1694 elif temp_props: 1695 properties = temp_props 1696 1697 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1698 this = self._parse_user_defined_function(kind=create_token.token_type) 1699 1700 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1701 extend_props(self._parse_properties()) 1702 1703 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1704 extend_props(self._parse_properties()) 1705 1706 if not expression: 1707 if self._match(TokenType.COMMAND): 1708 expression = self._parse_as_command(self._prev) 1709 else: 1710 begin = self._match(TokenType.BEGIN) 1711 return_ = self._match_text_seq("RETURN") 1712 1713 if self._match(TokenType.STRING, advance=False): 1714 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1715 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1716 expression = self._parse_string() 1717 extend_props(self._parse_properties()) 1718 else: 1719 expression = self._parse_statement() 1720 1721 end = self._match_text_seq("END") 1722 1723 if return_: 1724 expression = self.expression(exp.Return, this=expression) 1725 elif create_token.token_type == TokenType.INDEX: 1726 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1727 if not self._match(TokenType.ON): 1728 index = self._parse_id_var() 1729 anonymous = False 1730 else: 1731 index = None 1732 anonymous = True 1733 1734 this = self._parse_index(index=index, anonymous=anonymous) 1735 elif create_token.token_type in self.DB_CREATABLES: 1736 table_parts = self._parse_table_parts( 1737 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1738 ) 1739 1740 # exp.Properties.Location.POST_NAME 1741 self._match(TokenType.COMMA) 1742 extend_props(self._parse_properties(before=True)) 1743 1744 this = self._parse_schema(this=table_parts) 1745 1746 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1747 extend_props(self._parse_properties()) 1748 1749 self._match(TokenType.ALIAS) 1750 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1751 # exp.Properties.Location.POST_ALIAS 1752 extend_props(self._parse_properties()) 1753 1754 if create_token.token_type == TokenType.SEQUENCE: 1755 expression = self._parse_types() 1756 extend_props(self._parse_properties()) 1757 else: 1758 expression = self._parse_ddl_select() 1759 1760 if create_token.token_type == TokenType.TABLE: 1761 # exp.Properties.Location.POST_EXPRESSION 1762 extend_props(self._parse_properties()) 1763 1764 indexes = [] 1765 while True: 1766 index = self._parse_index() 1767 1768 # exp.Properties.Location.POST_INDEX 1769 extend_props(self._parse_properties()) 1770 1771 if not index: 1772 break 1773 else: 1774 self._match(TokenType.COMMA) 1775 indexes.append(index) 1776 elif create_token.token_type == TokenType.VIEW: 1777 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1778 no_schema_binding = True 1779 1780 shallow = self._match_text_seq("SHALLOW") 1781 1782 if self._match_texts(self.CLONE_KEYWORDS): 1783 copy = self._prev.text.lower() == "copy" 1784 clone = self.expression( 1785 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1786 ) 1787 1788 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1789 return self._parse_as_command(start) 1790 1791 return self.expression( 1792 exp.Create, 1793 comments=comments, 1794 this=this, 1795 kind=create_token.text.upper(), 1796 replace=replace, 1797 unique=unique, 1798 expression=expression, 1799 exists=exists, 1800 properties=properties, 1801 indexes=indexes, 1802 no_schema_binding=no_schema_binding, 1803 begin=begin, 1804 end=end, 1805 clone=clone, 1806 ) 1807 1808 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1809 seq = exp.SequenceProperties() 1810 1811 options = [] 1812 index = self._index 1813 1814 while self._curr: 1815 self._match(TokenType.COMMA) 1816 if self._match_text_seq("INCREMENT"): 1817 self._match_text_seq("BY") 1818 self._match_text_seq("=") 1819 seq.set("increment", self._parse_term()) 1820 elif self._match_text_seq("MINVALUE"): 1821 seq.set("minvalue", self._parse_term()) 1822 elif self._match_text_seq("MAXVALUE"): 1823 seq.set("maxvalue", self._parse_term()) 1824 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1825 self._match_text_seq("=") 1826 seq.set("start", self._parse_term()) 1827 elif self._match_text_seq("CACHE"): 1828 # T-SQL allows empty CACHE which is initialized dynamically 1829 seq.set("cache", self._parse_number() or True) 1830 elif self._match_text_seq("OWNED", "BY"): 1831 # "OWNED BY NONE" is the default 1832 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1833 else: 1834 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1835 if opt: 1836 options.append(opt) 1837 else: 1838 break 1839 1840 seq.set("options", options if options else None) 1841 return None if self._index == index else seq 1842 1843 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1844 # only used for teradata currently 1845 self._match(TokenType.COMMA) 1846 1847 kwargs = { 1848 "no": self._match_text_seq("NO"), 1849 "dual": self._match_text_seq("DUAL"), 1850 "before": self._match_text_seq("BEFORE"), 1851 "default": self._match_text_seq("DEFAULT"), 1852 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1853 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1854 "after": self._match_text_seq("AFTER"), 1855 "minimum": self._match_texts(("MIN", "MINIMUM")), 1856 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1857 } 1858 1859 if self._match_texts(self.PROPERTY_PARSERS): 1860 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1861 try: 1862 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1863 except TypeError: 1864 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1865 1866 return None 1867 1868 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1869 return self._parse_wrapped_csv(self._parse_property) 1870 1871 def _parse_property(self) -> t.Optional[exp.Expression]: 1872 if self._match_texts(self.PROPERTY_PARSERS): 1873 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1874 1875 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1876 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1877 1878 if self._match_text_seq("COMPOUND", "SORTKEY"): 1879 return self._parse_sortkey(compound=True) 1880 1881 if self._match_text_seq("SQL", "SECURITY"): 1882 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1883 1884 index = self._index 1885 key = self._parse_column() 1886 1887 if not self._match(TokenType.EQ): 1888 self._retreat(index) 1889 return self._parse_sequence_properties() 1890 1891 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1892 if isinstance(key, exp.Column): 1893 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1894 1895 value = self._parse_bitwise() or self._parse_var(any_token=True) 1896 1897 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 1898 if isinstance(value, exp.Column): 1899 value = exp.var(value.name) 1900 1901 return self.expression(exp.Property, this=key, value=value) 1902 1903 def _parse_stored(self) -> exp.FileFormatProperty: 1904 self._match(TokenType.ALIAS) 1905 1906 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1907 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1908 1909 return self.expression( 1910 exp.FileFormatProperty, 1911 this=( 1912 self.expression( 1913 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1914 ) 1915 if input_format or output_format 1916 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1917 ), 1918 ) 1919 1920 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 1921 field = self._parse_field() 1922 if isinstance(field, exp.Identifier) and not field.quoted: 1923 field = exp.var(field) 1924 1925 return field 1926 1927 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1928 self._match(TokenType.EQ) 1929 self._match(TokenType.ALIAS) 1930 1931 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1932 1933 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1934 properties = [] 1935 while True: 1936 if before: 1937 prop = self._parse_property_before() 1938 else: 1939 prop = self._parse_property() 1940 if not prop: 1941 break 1942 for p in ensure_list(prop): 1943 properties.append(p) 1944 1945 if properties: 1946 return self.expression(exp.Properties, expressions=properties) 1947 1948 return None 1949 1950 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1951 return self.expression( 1952 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1953 ) 1954 1955 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1956 if self._index >= 2: 1957 pre_volatile_token = self._tokens[self._index - 2] 1958 else: 1959 pre_volatile_token = None 1960 1961 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1962 return exp.VolatileProperty() 1963 1964 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1965 1966 def _parse_retention_period(self) -> exp.Var: 1967 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 1968 number = self._parse_number() 1969 number_str = f"{number} " if number else "" 1970 unit = self._parse_var(any_token=True) 1971 return exp.var(f"{number_str}{unit}") 1972 1973 def _parse_system_versioning_property( 1974 self, with_: bool = False 1975 ) -> exp.WithSystemVersioningProperty: 1976 self._match(TokenType.EQ) 1977 prop = self.expression( 1978 exp.WithSystemVersioningProperty, 1979 **{ # type: ignore 1980 "on": True, 1981 "with": with_, 1982 }, 1983 ) 1984 1985 if self._match_text_seq("OFF"): 1986 prop.set("on", False) 1987 return prop 1988 1989 self._match(TokenType.ON) 1990 if self._match(TokenType.L_PAREN): 1991 while self._curr and not self._match(TokenType.R_PAREN): 1992 if self._match_text_seq("HISTORY_TABLE", "="): 1993 prop.set("this", self._parse_table_parts()) 1994 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 1995 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 1996 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 1997 prop.set("retention_period", self._parse_retention_period()) 1998 1999 self._match(TokenType.COMMA) 2000 2001 return prop 2002 2003 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2004 self._match(TokenType.EQ) 2005 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2006 prop = self.expression(exp.DataDeletionProperty, on=on) 2007 2008 if self._match(TokenType.L_PAREN): 2009 while self._curr and not self._match(TokenType.R_PAREN): 2010 if self._match_text_seq("FILTER_COLUMN", "="): 2011 prop.set("filter_column", self._parse_column()) 2012 elif self._match_text_seq("RETENTION_PERIOD", "="): 2013 prop.set("retention_period", self._parse_retention_period()) 2014 2015 self._match(TokenType.COMMA) 2016 2017 return prop 2018 2019 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2020 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2021 prop = self._parse_system_versioning_property(with_=True) 2022 self._match_r_paren() 2023 return prop 2024 2025 if self._match(TokenType.L_PAREN, advance=False): 2026 return self._parse_wrapped_properties() 2027 2028 if self._match_text_seq("JOURNAL"): 2029 return self._parse_withjournaltable() 2030 2031 if self._match_texts(self.VIEW_ATTRIBUTES): 2032 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2033 2034 if self._match_text_seq("DATA"): 2035 return self._parse_withdata(no=False) 2036 elif self._match_text_seq("NO", "DATA"): 2037 return self._parse_withdata(no=True) 2038 2039 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2040 return self._parse_serde_properties(with_=True) 2041 2042 if self._match(TokenType.SCHEMA): 2043 return self.expression( 2044 exp.WithSchemaBindingProperty, 2045 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2046 ) 2047 2048 if not self._next: 2049 return None 2050 2051 return self._parse_withisolatedloading() 2052 2053 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2054 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2055 self._match(TokenType.EQ) 2056 2057 user = self._parse_id_var() 2058 self._match(TokenType.PARAMETER) 2059 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2060 2061 if not user or not host: 2062 return None 2063 2064 return exp.DefinerProperty(this=f"{user}@{host}") 2065 2066 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2067 self._match(TokenType.TABLE) 2068 self._match(TokenType.EQ) 2069 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2070 2071 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2072 return self.expression(exp.LogProperty, no=no) 2073 2074 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2075 return self.expression(exp.JournalProperty, **kwargs) 2076 2077 def _parse_checksum(self) -> exp.ChecksumProperty: 2078 self._match(TokenType.EQ) 2079 2080 on = None 2081 if self._match(TokenType.ON): 2082 on = True 2083 elif self._match_text_seq("OFF"): 2084 on = False 2085 2086 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2087 2088 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2089 return self.expression( 2090 exp.Cluster, 2091 expressions=( 2092 self._parse_wrapped_csv(self._parse_ordered) 2093 if wrapped 2094 else self._parse_csv(self._parse_ordered) 2095 ), 2096 ) 2097 2098 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2099 self._match_text_seq("BY") 2100 2101 self._match_l_paren() 2102 expressions = self._parse_csv(self._parse_column) 2103 self._match_r_paren() 2104 2105 if self._match_text_seq("SORTED", "BY"): 2106 self._match_l_paren() 2107 sorted_by = self._parse_csv(self._parse_ordered) 2108 self._match_r_paren() 2109 else: 2110 sorted_by = None 2111 2112 self._match(TokenType.INTO) 2113 buckets = self._parse_number() 2114 self._match_text_seq("BUCKETS") 2115 2116 return self.expression( 2117 exp.ClusteredByProperty, 2118 expressions=expressions, 2119 sorted_by=sorted_by, 2120 buckets=buckets, 2121 ) 2122 2123 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2124 if not self._match_text_seq("GRANTS"): 2125 self._retreat(self._index - 1) 2126 return None 2127 2128 return self.expression(exp.CopyGrantsProperty) 2129 2130 def _parse_freespace(self) -> exp.FreespaceProperty: 2131 self._match(TokenType.EQ) 2132 return self.expression( 2133 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2134 ) 2135 2136 def _parse_mergeblockratio( 2137 self, no: bool = False, default: bool = False 2138 ) -> exp.MergeBlockRatioProperty: 2139 if self._match(TokenType.EQ): 2140 return self.expression( 2141 exp.MergeBlockRatioProperty, 2142 this=self._parse_number(), 2143 percent=self._match(TokenType.PERCENT), 2144 ) 2145 2146 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2147 2148 def _parse_datablocksize( 2149 self, 2150 default: t.Optional[bool] = None, 2151 minimum: t.Optional[bool] = None, 2152 maximum: t.Optional[bool] = None, 2153 ) -> exp.DataBlocksizeProperty: 2154 self._match(TokenType.EQ) 2155 size = self._parse_number() 2156 2157 units = None 2158 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2159 units = self._prev.text 2160 2161 return self.expression( 2162 exp.DataBlocksizeProperty, 2163 size=size, 2164 units=units, 2165 default=default, 2166 minimum=minimum, 2167 maximum=maximum, 2168 ) 2169 2170 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2171 self._match(TokenType.EQ) 2172 always = self._match_text_seq("ALWAYS") 2173 manual = self._match_text_seq("MANUAL") 2174 never = self._match_text_seq("NEVER") 2175 default = self._match_text_seq("DEFAULT") 2176 2177 autotemp = None 2178 if self._match_text_seq("AUTOTEMP"): 2179 autotemp = self._parse_schema() 2180 2181 return self.expression( 2182 exp.BlockCompressionProperty, 2183 always=always, 2184 manual=manual, 2185 never=never, 2186 default=default, 2187 autotemp=autotemp, 2188 ) 2189 2190 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2191 index = self._index 2192 no = self._match_text_seq("NO") 2193 concurrent = self._match_text_seq("CONCURRENT") 2194 2195 if not self._match_text_seq("ISOLATED", "LOADING"): 2196 self._retreat(index) 2197 return None 2198 2199 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2200 return self.expression( 2201 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2202 ) 2203 2204 def _parse_locking(self) -> exp.LockingProperty: 2205 if self._match(TokenType.TABLE): 2206 kind = "TABLE" 2207 elif self._match(TokenType.VIEW): 2208 kind = "VIEW" 2209 elif self._match(TokenType.ROW): 2210 kind = "ROW" 2211 elif self._match_text_seq("DATABASE"): 2212 kind = "DATABASE" 2213 else: 2214 kind = None 2215 2216 if kind in ("DATABASE", "TABLE", "VIEW"): 2217 this = self._parse_table_parts() 2218 else: 2219 this = None 2220 2221 if self._match(TokenType.FOR): 2222 for_or_in = "FOR" 2223 elif self._match(TokenType.IN): 2224 for_or_in = "IN" 2225 else: 2226 for_or_in = None 2227 2228 if self._match_text_seq("ACCESS"): 2229 lock_type = "ACCESS" 2230 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2231 lock_type = "EXCLUSIVE" 2232 elif self._match_text_seq("SHARE"): 2233 lock_type = "SHARE" 2234 elif self._match_text_seq("READ"): 2235 lock_type = "READ" 2236 elif self._match_text_seq("WRITE"): 2237 lock_type = "WRITE" 2238 elif self._match_text_seq("CHECKSUM"): 2239 lock_type = "CHECKSUM" 2240 else: 2241 lock_type = None 2242 2243 override = self._match_text_seq("OVERRIDE") 2244 2245 return self.expression( 2246 exp.LockingProperty, 2247 this=this, 2248 kind=kind, 2249 for_or_in=for_or_in, 2250 lock_type=lock_type, 2251 override=override, 2252 ) 2253 2254 def _parse_partition_by(self) -> t.List[exp.Expression]: 2255 if self._match(TokenType.PARTITION_BY): 2256 return self._parse_csv(self._parse_assignment) 2257 return [] 2258 2259 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2260 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2261 if self._match_text_seq("MINVALUE"): 2262 return exp.var("MINVALUE") 2263 if self._match_text_seq("MAXVALUE"): 2264 return exp.var("MAXVALUE") 2265 return self._parse_bitwise() 2266 2267 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2268 expression = None 2269 from_expressions = None 2270 to_expressions = None 2271 2272 if self._match(TokenType.IN): 2273 this = self._parse_wrapped_csv(self._parse_bitwise) 2274 elif self._match(TokenType.FROM): 2275 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2276 self._match_text_seq("TO") 2277 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2278 elif self._match_text_seq("WITH", "(", "MODULUS"): 2279 this = self._parse_number() 2280 self._match_text_seq(",", "REMAINDER") 2281 expression = self._parse_number() 2282 self._match_r_paren() 2283 else: 2284 self.raise_error("Failed to parse partition bound spec.") 2285 2286 return self.expression( 2287 exp.PartitionBoundSpec, 2288 this=this, 2289 expression=expression, 2290 from_expressions=from_expressions, 2291 to_expressions=to_expressions, 2292 ) 2293 2294 # https://www.postgresql.org/docs/current/sql-createtable.html 2295 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2296 if not self._match_text_seq("OF"): 2297 self._retreat(self._index - 1) 2298 return None 2299 2300 this = self._parse_table(schema=True) 2301 2302 if self._match(TokenType.DEFAULT): 2303 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2304 elif self._match_text_seq("FOR", "VALUES"): 2305 expression = self._parse_partition_bound_spec() 2306 else: 2307 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2308 2309 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2310 2311 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2312 self._match(TokenType.EQ) 2313 return self.expression( 2314 exp.PartitionedByProperty, 2315 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2316 ) 2317 2318 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2319 if self._match_text_seq("AND", "STATISTICS"): 2320 statistics = True 2321 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2322 statistics = False 2323 else: 2324 statistics = None 2325 2326 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2327 2328 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2329 if self._match_text_seq("SQL"): 2330 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2331 return None 2332 2333 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2334 if self._match_text_seq("SQL", "DATA"): 2335 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2336 return None 2337 2338 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2339 if self._match_text_seq("PRIMARY", "INDEX"): 2340 return exp.NoPrimaryIndexProperty() 2341 if self._match_text_seq("SQL"): 2342 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2343 return None 2344 2345 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2346 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2347 return exp.OnCommitProperty() 2348 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2349 return exp.OnCommitProperty(delete=True) 2350 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2351 2352 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2353 if self._match_text_seq("SQL", "DATA"): 2354 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2355 return None 2356 2357 def _parse_distkey(self) -> exp.DistKeyProperty: 2358 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2359 2360 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2361 table = self._parse_table(schema=True) 2362 2363 options = [] 2364 while self._match_texts(("INCLUDING", "EXCLUDING")): 2365 this = self._prev.text.upper() 2366 2367 id_var = self._parse_id_var() 2368 if not id_var: 2369 return None 2370 2371 options.append( 2372 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2373 ) 2374 2375 return self.expression(exp.LikeProperty, this=table, expressions=options) 2376 2377 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2378 return self.expression( 2379 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2380 ) 2381 2382 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2383 self._match(TokenType.EQ) 2384 return self.expression( 2385 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2386 ) 2387 2388 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2389 self._match_text_seq("WITH", "CONNECTION") 2390 return self.expression( 2391 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2392 ) 2393 2394 def _parse_returns(self) -> exp.ReturnsProperty: 2395 value: t.Optional[exp.Expression] 2396 null = None 2397 is_table = self._match(TokenType.TABLE) 2398 2399 if is_table: 2400 if self._match(TokenType.LT): 2401 value = self.expression( 2402 exp.Schema, 2403 this="TABLE", 2404 expressions=self._parse_csv(self._parse_struct_types), 2405 ) 2406 if not self._match(TokenType.GT): 2407 self.raise_error("Expecting >") 2408 else: 2409 value = self._parse_schema(exp.var("TABLE")) 2410 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2411 null = True 2412 value = None 2413 else: 2414 value = self._parse_types() 2415 2416 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2417 2418 def _parse_describe(self) -> exp.Describe: 2419 kind = self._match_set(self.CREATABLES) and self._prev.text 2420 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2421 if self._match(TokenType.DOT): 2422 style = None 2423 self._retreat(self._index - 2) 2424 this = self._parse_table(schema=True) 2425 properties = self._parse_properties() 2426 expressions = properties.expressions if properties else None 2427 return self.expression( 2428 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2429 ) 2430 2431 def _parse_insert(self) -> exp.Insert: 2432 comments = ensure_list(self._prev_comments) 2433 hint = self._parse_hint() 2434 overwrite = self._match(TokenType.OVERWRITE) 2435 ignore = self._match(TokenType.IGNORE) 2436 local = self._match_text_seq("LOCAL") 2437 alternative = None 2438 is_function = None 2439 2440 if self._match_text_seq("DIRECTORY"): 2441 this: t.Optional[exp.Expression] = self.expression( 2442 exp.Directory, 2443 this=self._parse_var_or_string(), 2444 local=local, 2445 row_format=self._parse_row_format(match_row=True), 2446 ) 2447 else: 2448 if self._match(TokenType.OR): 2449 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2450 2451 self._match(TokenType.INTO) 2452 comments += ensure_list(self._prev_comments) 2453 self._match(TokenType.TABLE) 2454 is_function = self._match(TokenType.FUNCTION) 2455 2456 this = ( 2457 self._parse_table(schema=True, parse_partition=True) 2458 if not is_function 2459 else self._parse_function() 2460 ) 2461 2462 returning = self._parse_returning() 2463 2464 return self.expression( 2465 exp.Insert, 2466 comments=comments, 2467 hint=hint, 2468 is_function=is_function, 2469 this=this, 2470 stored=self._match_text_seq("STORED") and self._parse_stored(), 2471 by_name=self._match_text_seq("BY", "NAME"), 2472 exists=self._parse_exists(), 2473 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2474 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2475 conflict=self._parse_on_conflict(), 2476 returning=returning or self._parse_returning(), 2477 overwrite=overwrite, 2478 alternative=alternative, 2479 ignore=ignore, 2480 ) 2481 2482 def _parse_kill(self) -> exp.Kill: 2483 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2484 2485 return self.expression( 2486 exp.Kill, 2487 this=self._parse_primary(), 2488 kind=kind, 2489 ) 2490 2491 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2492 conflict = self._match_text_seq("ON", "CONFLICT") 2493 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2494 2495 if not conflict and not duplicate: 2496 return None 2497 2498 conflict_keys = None 2499 constraint = None 2500 2501 if conflict: 2502 if self._match_text_seq("ON", "CONSTRAINT"): 2503 constraint = self._parse_id_var() 2504 elif self._match(TokenType.L_PAREN): 2505 conflict_keys = self._parse_csv(self._parse_id_var) 2506 self._match_r_paren() 2507 2508 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2509 if self._prev.token_type == TokenType.UPDATE: 2510 self._match(TokenType.SET) 2511 expressions = self._parse_csv(self._parse_equality) 2512 else: 2513 expressions = None 2514 2515 return self.expression( 2516 exp.OnConflict, 2517 duplicate=duplicate, 2518 expressions=expressions, 2519 action=action, 2520 conflict_keys=conflict_keys, 2521 constraint=constraint, 2522 ) 2523 2524 def _parse_returning(self) -> t.Optional[exp.Returning]: 2525 if not self._match(TokenType.RETURNING): 2526 return None 2527 return self.expression( 2528 exp.Returning, 2529 expressions=self._parse_csv(self._parse_expression), 2530 into=self._match(TokenType.INTO) and self._parse_table_part(), 2531 ) 2532 2533 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2534 if not self._match(TokenType.FORMAT): 2535 return None 2536 return self._parse_row_format() 2537 2538 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2539 index = self._index 2540 with_ = with_ or self._match_text_seq("WITH") 2541 2542 if not self._match(TokenType.SERDE_PROPERTIES): 2543 self._retreat(index) 2544 return None 2545 return self.expression( 2546 exp.SerdeProperties, 2547 **{ # type: ignore 2548 "expressions": self._parse_wrapped_properties(), 2549 "with": with_, 2550 }, 2551 ) 2552 2553 def _parse_row_format( 2554 self, match_row: bool = False 2555 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2556 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2557 return None 2558 2559 if self._match_text_seq("SERDE"): 2560 this = self._parse_string() 2561 2562 serde_properties = self._parse_serde_properties() 2563 2564 return self.expression( 2565 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2566 ) 2567 2568 self._match_text_seq("DELIMITED") 2569 2570 kwargs = {} 2571 2572 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2573 kwargs["fields"] = self._parse_string() 2574 if self._match_text_seq("ESCAPED", "BY"): 2575 kwargs["escaped"] = self._parse_string() 2576 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2577 kwargs["collection_items"] = self._parse_string() 2578 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2579 kwargs["map_keys"] = self._parse_string() 2580 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2581 kwargs["lines"] = self._parse_string() 2582 if self._match_text_seq("NULL", "DEFINED", "AS"): 2583 kwargs["null"] = self._parse_string() 2584 2585 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2586 2587 def _parse_load(self) -> exp.LoadData | exp.Command: 2588 if self._match_text_seq("DATA"): 2589 local = self._match_text_seq("LOCAL") 2590 self._match_text_seq("INPATH") 2591 inpath = self._parse_string() 2592 overwrite = self._match(TokenType.OVERWRITE) 2593 self._match_pair(TokenType.INTO, TokenType.TABLE) 2594 2595 return self.expression( 2596 exp.LoadData, 2597 this=self._parse_table(schema=True), 2598 local=local, 2599 overwrite=overwrite, 2600 inpath=inpath, 2601 partition=self._parse_partition(), 2602 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2603 serde=self._match_text_seq("SERDE") and self._parse_string(), 2604 ) 2605 return self._parse_as_command(self._prev) 2606 2607 def _parse_delete(self) -> exp.Delete: 2608 # This handles MySQL's "Multiple-Table Syntax" 2609 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2610 tables = None 2611 comments = self._prev_comments 2612 if not self._match(TokenType.FROM, advance=False): 2613 tables = self._parse_csv(self._parse_table) or None 2614 2615 returning = self._parse_returning() 2616 2617 return self.expression( 2618 exp.Delete, 2619 comments=comments, 2620 tables=tables, 2621 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2622 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2623 where=self._parse_where(), 2624 returning=returning or self._parse_returning(), 2625 limit=self._parse_limit(), 2626 ) 2627 2628 def _parse_update(self) -> exp.Update: 2629 comments = self._prev_comments 2630 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2631 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2632 returning = self._parse_returning() 2633 return self.expression( 2634 exp.Update, 2635 comments=comments, 2636 **{ # type: ignore 2637 "this": this, 2638 "expressions": expressions, 2639 "from": self._parse_from(joins=True), 2640 "where": self._parse_where(), 2641 "returning": returning or self._parse_returning(), 2642 "order": self._parse_order(), 2643 "limit": self._parse_limit(), 2644 }, 2645 ) 2646 2647 def _parse_uncache(self) -> exp.Uncache: 2648 if not self._match(TokenType.TABLE): 2649 self.raise_error("Expecting TABLE after UNCACHE") 2650 2651 return self.expression( 2652 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2653 ) 2654 2655 def _parse_cache(self) -> exp.Cache: 2656 lazy = self._match_text_seq("LAZY") 2657 self._match(TokenType.TABLE) 2658 table = self._parse_table(schema=True) 2659 2660 options = [] 2661 if self._match_text_seq("OPTIONS"): 2662 self._match_l_paren() 2663 k = self._parse_string() 2664 self._match(TokenType.EQ) 2665 v = self._parse_string() 2666 options = [k, v] 2667 self._match_r_paren() 2668 2669 self._match(TokenType.ALIAS) 2670 return self.expression( 2671 exp.Cache, 2672 this=table, 2673 lazy=lazy, 2674 options=options, 2675 expression=self._parse_select(nested=True), 2676 ) 2677 2678 def _parse_partition(self) -> t.Optional[exp.Partition]: 2679 if not self._match(TokenType.PARTITION): 2680 return None 2681 2682 return self.expression( 2683 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2684 ) 2685 2686 def _parse_value(self) -> t.Optional[exp.Tuple]: 2687 if self._match(TokenType.L_PAREN): 2688 expressions = self._parse_csv(self._parse_expression) 2689 self._match_r_paren() 2690 return self.expression(exp.Tuple, expressions=expressions) 2691 2692 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2693 expression = self._parse_expression() 2694 if expression: 2695 return self.expression(exp.Tuple, expressions=[expression]) 2696 return None 2697 2698 def _parse_projections(self) -> t.List[exp.Expression]: 2699 return self._parse_expressions() 2700 2701 def _parse_select( 2702 self, 2703 nested: bool = False, 2704 table: bool = False, 2705 parse_subquery_alias: bool = True, 2706 parse_set_operation: bool = True, 2707 ) -> t.Optional[exp.Expression]: 2708 cte = self._parse_with() 2709 2710 if cte: 2711 this = self._parse_statement() 2712 2713 if not this: 2714 self.raise_error("Failed to parse any statement following CTE") 2715 return cte 2716 2717 if "with" in this.arg_types: 2718 this.set("with", cte) 2719 else: 2720 self.raise_error(f"{this.key} does not support CTE") 2721 this = cte 2722 2723 return this 2724 2725 # duckdb supports leading with FROM x 2726 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2727 2728 if self._match(TokenType.SELECT): 2729 comments = self._prev_comments 2730 2731 hint = self._parse_hint() 2732 all_ = self._match(TokenType.ALL) 2733 distinct = self._match_set(self.DISTINCT_TOKENS) 2734 2735 kind = ( 2736 self._match(TokenType.ALIAS) 2737 and self._match_texts(("STRUCT", "VALUE")) 2738 and self._prev.text.upper() 2739 ) 2740 2741 if distinct: 2742 distinct = self.expression( 2743 exp.Distinct, 2744 on=self._parse_value() if self._match(TokenType.ON) else None, 2745 ) 2746 2747 if all_ and distinct: 2748 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2749 2750 limit = self._parse_limit(top=True) 2751 projections = self._parse_projections() 2752 2753 this = self.expression( 2754 exp.Select, 2755 kind=kind, 2756 hint=hint, 2757 distinct=distinct, 2758 expressions=projections, 2759 limit=limit, 2760 ) 2761 this.comments = comments 2762 2763 into = self._parse_into() 2764 if into: 2765 this.set("into", into) 2766 2767 if not from_: 2768 from_ = self._parse_from() 2769 2770 if from_: 2771 this.set("from", from_) 2772 2773 this = self._parse_query_modifiers(this) 2774 elif (table or nested) and self._match(TokenType.L_PAREN): 2775 if self._match(TokenType.PIVOT): 2776 this = self._parse_simplified_pivot() 2777 elif self._match(TokenType.FROM): 2778 this = exp.select("*").from_( 2779 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2780 ) 2781 else: 2782 this = ( 2783 self._parse_table() 2784 if table 2785 else self._parse_select(nested=True, parse_set_operation=False) 2786 ) 2787 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2788 2789 self._match_r_paren() 2790 2791 # We return early here so that the UNION isn't attached to the subquery by the 2792 # following call to _parse_set_operations, but instead becomes the parent node 2793 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2794 elif self._match(TokenType.VALUES, advance=False): 2795 this = self._parse_derived_table_values() 2796 elif from_: 2797 this = exp.select("*").from_(from_.this, copy=False) 2798 else: 2799 this = None 2800 2801 if parse_set_operation: 2802 return self._parse_set_operations(this) 2803 return this 2804 2805 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2806 if not skip_with_token and not self._match(TokenType.WITH): 2807 return None 2808 2809 comments = self._prev_comments 2810 recursive = self._match(TokenType.RECURSIVE) 2811 2812 expressions = [] 2813 while True: 2814 expressions.append(self._parse_cte()) 2815 2816 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2817 break 2818 else: 2819 self._match(TokenType.WITH) 2820 2821 return self.expression( 2822 exp.With, comments=comments, expressions=expressions, recursive=recursive 2823 ) 2824 2825 def _parse_cte(self) -> exp.CTE: 2826 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2827 if not alias or not alias.this: 2828 self.raise_error("Expected CTE to have alias") 2829 2830 self._match(TokenType.ALIAS) 2831 2832 if self._match_text_seq("NOT", "MATERIALIZED"): 2833 materialized = False 2834 elif self._match_text_seq("MATERIALIZED"): 2835 materialized = True 2836 else: 2837 materialized = None 2838 2839 return self.expression( 2840 exp.CTE, 2841 this=self._parse_wrapped(self._parse_statement), 2842 alias=alias, 2843 materialized=materialized, 2844 ) 2845 2846 def _parse_table_alias( 2847 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2848 ) -> t.Optional[exp.TableAlias]: 2849 any_token = self._match(TokenType.ALIAS) 2850 alias = ( 2851 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2852 or self._parse_string_as_identifier() 2853 ) 2854 2855 index = self._index 2856 if self._match(TokenType.L_PAREN): 2857 columns = self._parse_csv(self._parse_function_parameter) 2858 self._match_r_paren() if columns else self._retreat(index) 2859 else: 2860 columns = None 2861 2862 if not alias and not columns: 2863 return None 2864 2865 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 2866 2867 # We bubble up comments from the Identifier to the TableAlias 2868 if isinstance(alias, exp.Identifier): 2869 table_alias.add_comments(alias.pop_comments()) 2870 2871 return table_alias 2872 2873 def _parse_subquery( 2874 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2875 ) -> t.Optional[exp.Subquery]: 2876 if not this: 2877 return None 2878 2879 return self.expression( 2880 exp.Subquery, 2881 this=this, 2882 pivots=self._parse_pivots(), 2883 alias=self._parse_table_alias() if parse_alias else None, 2884 ) 2885 2886 def _implicit_unnests_to_explicit(self, this: E) -> E: 2887 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2888 2889 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2890 for i, join in enumerate(this.args.get("joins") or []): 2891 table = join.this 2892 normalized_table = table.copy() 2893 normalized_table.meta["maybe_column"] = True 2894 normalized_table = _norm(normalized_table, dialect=self.dialect) 2895 2896 if isinstance(table, exp.Table) and not join.args.get("on"): 2897 if normalized_table.parts[0].name in refs: 2898 table_as_column = table.to_column() 2899 unnest = exp.Unnest(expressions=[table_as_column]) 2900 2901 # Table.to_column creates a parent Alias node that we want to convert to 2902 # a TableAlias and attach to the Unnest, so it matches the parser's output 2903 if isinstance(table.args.get("alias"), exp.TableAlias): 2904 table_as_column.replace(table_as_column.this) 2905 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2906 2907 table.replace(unnest) 2908 2909 refs.add(normalized_table.alias_or_name) 2910 2911 return this 2912 2913 def _parse_query_modifiers( 2914 self, this: t.Optional[exp.Expression] 2915 ) -> t.Optional[exp.Expression]: 2916 if isinstance(this, (exp.Query, exp.Table)): 2917 for join in self._parse_joins(): 2918 this.append("joins", join) 2919 for lateral in iter(self._parse_lateral, None): 2920 this.append("laterals", lateral) 2921 2922 while True: 2923 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2924 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2925 key, expression = parser(self) 2926 2927 if expression: 2928 this.set(key, expression) 2929 if key == "limit": 2930 offset = expression.args.pop("offset", None) 2931 2932 if offset: 2933 offset = exp.Offset(expression=offset) 2934 this.set("offset", offset) 2935 2936 limit_by_expressions = expression.expressions 2937 expression.set("expressions", None) 2938 offset.set("expressions", limit_by_expressions) 2939 continue 2940 break 2941 2942 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 2943 this = self._implicit_unnests_to_explicit(this) 2944 2945 return this 2946 2947 def _parse_hint(self) -> t.Optional[exp.Hint]: 2948 if self._match(TokenType.HINT): 2949 hints = [] 2950 for hint in iter( 2951 lambda: self._parse_csv( 2952 lambda: self._parse_function() or self._parse_var(upper=True) 2953 ), 2954 [], 2955 ): 2956 hints.extend(hint) 2957 2958 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2959 self.raise_error("Expected */ after HINT") 2960 2961 return self.expression(exp.Hint, expressions=hints) 2962 2963 return None 2964 2965 def _parse_into(self) -> t.Optional[exp.Into]: 2966 if not self._match(TokenType.INTO): 2967 return None 2968 2969 temp = self._match(TokenType.TEMPORARY) 2970 unlogged = self._match_text_seq("UNLOGGED") 2971 self._match(TokenType.TABLE) 2972 2973 return self.expression( 2974 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2975 ) 2976 2977 def _parse_from( 2978 self, joins: bool = False, skip_from_token: bool = False 2979 ) -> t.Optional[exp.From]: 2980 if not skip_from_token and not self._match(TokenType.FROM): 2981 return None 2982 2983 return self.expression( 2984 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2985 ) 2986 2987 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2988 return self.expression( 2989 exp.MatchRecognizeMeasure, 2990 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2991 this=self._parse_expression(), 2992 ) 2993 2994 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2995 if not self._match(TokenType.MATCH_RECOGNIZE): 2996 return None 2997 2998 self._match_l_paren() 2999 3000 partition = self._parse_partition_by() 3001 order = self._parse_order() 3002 3003 measures = ( 3004 self._parse_csv(self._parse_match_recognize_measure) 3005 if self._match_text_seq("MEASURES") 3006 else None 3007 ) 3008 3009 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3010 rows = exp.var("ONE ROW PER MATCH") 3011 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3012 text = "ALL ROWS PER MATCH" 3013 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3014 text += " SHOW EMPTY MATCHES" 3015 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3016 text += " OMIT EMPTY MATCHES" 3017 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3018 text += " WITH UNMATCHED ROWS" 3019 rows = exp.var(text) 3020 else: 3021 rows = None 3022 3023 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3024 text = "AFTER MATCH SKIP" 3025 if self._match_text_seq("PAST", "LAST", "ROW"): 3026 text += " PAST LAST ROW" 3027 elif self._match_text_seq("TO", "NEXT", "ROW"): 3028 text += " TO NEXT ROW" 3029 elif self._match_text_seq("TO", "FIRST"): 3030 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3031 elif self._match_text_seq("TO", "LAST"): 3032 text += f" TO LAST {self._advance_any().text}" # type: ignore 3033 after = exp.var(text) 3034 else: 3035 after = None 3036 3037 if self._match_text_seq("PATTERN"): 3038 self._match_l_paren() 3039 3040 if not self._curr: 3041 self.raise_error("Expecting )", self._curr) 3042 3043 paren = 1 3044 start = self._curr 3045 3046 while self._curr and paren > 0: 3047 if self._curr.token_type == TokenType.L_PAREN: 3048 paren += 1 3049 if self._curr.token_type == TokenType.R_PAREN: 3050 paren -= 1 3051 3052 end = self._prev 3053 self._advance() 3054 3055 if paren > 0: 3056 self.raise_error("Expecting )", self._curr) 3057 3058 pattern = exp.var(self._find_sql(start, end)) 3059 else: 3060 pattern = None 3061 3062 define = ( 3063 self._parse_csv(self._parse_name_as_expression) 3064 if self._match_text_seq("DEFINE") 3065 else None 3066 ) 3067 3068 self._match_r_paren() 3069 3070 return self.expression( 3071 exp.MatchRecognize, 3072 partition_by=partition, 3073 order=order, 3074 measures=measures, 3075 rows=rows, 3076 after=after, 3077 pattern=pattern, 3078 define=define, 3079 alias=self._parse_table_alias(), 3080 ) 3081 3082 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3083 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3084 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3085 cross_apply = False 3086 3087 if cross_apply is not None: 3088 this = self._parse_select(table=True) 3089 view = None 3090 outer = None 3091 elif self._match(TokenType.LATERAL): 3092 this = self._parse_select(table=True) 3093 view = self._match(TokenType.VIEW) 3094 outer = self._match(TokenType.OUTER) 3095 else: 3096 return None 3097 3098 if not this: 3099 this = ( 3100 self._parse_unnest() 3101 or self._parse_function() 3102 or self._parse_id_var(any_token=False) 3103 ) 3104 3105 while self._match(TokenType.DOT): 3106 this = exp.Dot( 3107 this=this, 3108 expression=self._parse_function() or self._parse_id_var(any_token=False), 3109 ) 3110 3111 if view: 3112 table = self._parse_id_var(any_token=False) 3113 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3114 table_alias: t.Optional[exp.TableAlias] = self.expression( 3115 exp.TableAlias, this=table, columns=columns 3116 ) 3117 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3118 # We move the alias from the lateral's child node to the lateral itself 3119 table_alias = this.args["alias"].pop() 3120 else: 3121 table_alias = self._parse_table_alias() 3122 3123 return self.expression( 3124 exp.Lateral, 3125 this=this, 3126 view=view, 3127 outer=outer, 3128 alias=table_alias, 3129 cross_apply=cross_apply, 3130 ) 3131 3132 def _parse_join_parts( 3133 self, 3134 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3135 return ( 3136 self._match_set(self.JOIN_METHODS) and self._prev, 3137 self._match_set(self.JOIN_SIDES) and self._prev, 3138 self._match_set(self.JOIN_KINDS) and self._prev, 3139 ) 3140 3141 def _parse_join( 3142 self, skip_join_token: bool = False, parse_bracket: bool = False 3143 ) -> t.Optional[exp.Join]: 3144 if self._match(TokenType.COMMA): 3145 return self.expression(exp.Join, this=self._parse_table()) 3146 3147 index = self._index 3148 method, side, kind = self._parse_join_parts() 3149 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3150 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3151 3152 if not skip_join_token and not join: 3153 self._retreat(index) 3154 kind = None 3155 method = None 3156 side = None 3157 3158 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3159 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3160 3161 if not skip_join_token and not join and not outer_apply and not cross_apply: 3162 return None 3163 3164 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3165 3166 if method: 3167 kwargs["method"] = method.text 3168 if side: 3169 kwargs["side"] = side.text 3170 if kind: 3171 kwargs["kind"] = kind.text 3172 if hint: 3173 kwargs["hint"] = hint 3174 3175 if self._match(TokenType.MATCH_CONDITION): 3176 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3177 3178 if self._match(TokenType.ON): 3179 kwargs["on"] = self._parse_assignment() 3180 elif self._match(TokenType.USING): 3181 kwargs["using"] = self._parse_wrapped_id_vars() 3182 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3183 kind and kind.token_type == TokenType.CROSS 3184 ): 3185 index = self._index 3186 joins: t.Optional[list] = list(self._parse_joins()) 3187 3188 if joins and self._match(TokenType.ON): 3189 kwargs["on"] = self._parse_assignment() 3190 elif joins and self._match(TokenType.USING): 3191 kwargs["using"] = self._parse_wrapped_id_vars() 3192 else: 3193 joins = None 3194 self._retreat(index) 3195 3196 kwargs["this"].set("joins", joins if joins else None) 3197 3198 comments = [c for token in (method, side, kind) if token for c in token.comments] 3199 return self.expression(exp.Join, comments=comments, **kwargs) 3200 3201 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3202 this = self._parse_assignment() 3203 3204 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3205 return this 3206 3207 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3208 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3209 3210 return this 3211 3212 def _parse_index_params(self) -> exp.IndexParameters: 3213 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3214 3215 if self._match(TokenType.L_PAREN, advance=False): 3216 columns = self._parse_wrapped_csv(self._parse_with_operator) 3217 else: 3218 columns = None 3219 3220 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3221 partition_by = self._parse_partition_by() 3222 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3223 tablespace = ( 3224 self._parse_var(any_token=True) 3225 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3226 else None 3227 ) 3228 where = self._parse_where() 3229 3230 on = self._parse_field() if self._match(TokenType.ON) else None 3231 3232 return self.expression( 3233 exp.IndexParameters, 3234 using=using, 3235 columns=columns, 3236 include=include, 3237 partition_by=partition_by, 3238 where=where, 3239 with_storage=with_storage, 3240 tablespace=tablespace, 3241 on=on, 3242 ) 3243 3244 def _parse_index( 3245 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3246 ) -> t.Optional[exp.Index]: 3247 if index or anonymous: 3248 unique = None 3249 primary = None 3250 amp = None 3251 3252 self._match(TokenType.ON) 3253 self._match(TokenType.TABLE) # hive 3254 table = self._parse_table_parts(schema=True) 3255 else: 3256 unique = self._match(TokenType.UNIQUE) 3257 primary = self._match_text_seq("PRIMARY") 3258 amp = self._match_text_seq("AMP") 3259 3260 if not self._match(TokenType.INDEX): 3261 return None 3262 3263 index = self._parse_id_var() 3264 table = None 3265 3266 params = self._parse_index_params() 3267 3268 return self.expression( 3269 exp.Index, 3270 this=index, 3271 table=table, 3272 unique=unique, 3273 primary=primary, 3274 amp=amp, 3275 params=params, 3276 ) 3277 3278 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3279 hints: t.List[exp.Expression] = [] 3280 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3281 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3282 hints.append( 3283 self.expression( 3284 exp.WithTableHint, 3285 expressions=self._parse_csv( 3286 lambda: self._parse_function() or self._parse_var(any_token=True) 3287 ), 3288 ) 3289 ) 3290 self._match_r_paren() 3291 else: 3292 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3293 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3294 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3295 3296 self._match_set((TokenType.INDEX, TokenType.KEY)) 3297 if self._match(TokenType.FOR): 3298 hint.set("target", self._advance_any() and self._prev.text.upper()) 3299 3300 hint.set("expressions", self._parse_wrapped_id_vars()) 3301 hints.append(hint) 3302 3303 return hints or None 3304 3305 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3306 return ( 3307 (not schema and self._parse_function(optional_parens=False)) 3308 or self._parse_id_var(any_token=False) 3309 or self._parse_string_as_identifier() 3310 or self._parse_placeholder() 3311 ) 3312 3313 def _parse_table_parts( 3314 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3315 ) -> exp.Table: 3316 catalog = None 3317 db = None 3318 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3319 3320 while self._match(TokenType.DOT): 3321 if catalog: 3322 # This allows nesting the table in arbitrarily many dot expressions if needed 3323 table = self.expression( 3324 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3325 ) 3326 else: 3327 catalog = db 3328 db = table 3329 # "" used for tsql FROM a..b case 3330 table = self._parse_table_part(schema=schema) or "" 3331 3332 if ( 3333 wildcard 3334 and self._is_connected() 3335 and (isinstance(table, exp.Identifier) or not table) 3336 and self._match(TokenType.STAR) 3337 ): 3338 if isinstance(table, exp.Identifier): 3339 table.args["this"] += "*" 3340 else: 3341 table = exp.Identifier(this="*") 3342 3343 # We bubble up comments from the Identifier to the Table 3344 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3345 3346 if is_db_reference: 3347 catalog = db 3348 db = table 3349 table = None 3350 3351 if not table and not is_db_reference: 3352 self.raise_error(f"Expected table name but got {self._curr}") 3353 if not db and is_db_reference: 3354 self.raise_error(f"Expected database name but got {self._curr}") 3355 3356 return self.expression( 3357 exp.Table, 3358 comments=comments, 3359 this=table, 3360 db=db, 3361 catalog=catalog, 3362 pivots=self._parse_pivots(), 3363 ) 3364 3365 def _parse_table( 3366 self, 3367 schema: bool = False, 3368 joins: bool = False, 3369 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3370 parse_bracket: bool = False, 3371 is_db_reference: bool = False, 3372 parse_partition: bool = False, 3373 ) -> t.Optional[exp.Expression]: 3374 lateral = self._parse_lateral() 3375 if lateral: 3376 return lateral 3377 3378 unnest = self._parse_unnest() 3379 if unnest: 3380 return unnest 3381 3382 values = self._parse_derived_table_values() 3383 if values: 3384 return values 3385 3386 subquery = self._parse_select(table=True) 3387 if subquery: 3388 if not subquery.args.get("pivots"): 3389 subquery.set("pivots", self._parse_pivots()) 3390 return subquery 3391 3392 bracket = parse_bracket and self._parse_bracket(None) 3393 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3394 3395 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3396 self._parse_table 3397 ) 3398 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3399 3400 only = self._match(TokenType.ONLY) 3401 3402 this = t.cast( 3403 exp.Expression, 3404 bracket 3405 or rows_from 3406 or self._parse_bracket( 3407 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3408 ), 3409 ) 3410 3411 if only: 3412 this.set("only", only) 3413 3414 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3415 self._match_text_seq("*") 3416 3417 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3418 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3419 this.set("partition", self._parse_partition()) 3420 3421 if schema: 3422 return self._parse_schema(this=this) 3423 3424 version = self._parse_version() 3425 3426 if version: 3427 this.set("version", version) 3428 3429 if self.dialect.ALIAS_POST_TABLESAMPLE: 3430 table_sample = self._parse_table_sample() 3431 3432 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3433 if alias: 3434 this.set("alias", alias) 3435 3436 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3437 return self.expression( 3438 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3439 ) 3440 3441 this.set("hints", self._parse_table_hints()) 3442 3443 if not this.args.get("pivots"): 3444 this.set("pivots", self._parse_pivots()) 3445 3446 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3447 table_sample = self._parse_table_sample() 3448 3449 if table_sample: 3450 table_sample.set("this", this) 3451 this = table_sample 3452 3453 if joins: 3454 for join in self._parse_joins(): 3455 this.append("joins", join) 3456 3457 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3458 this.set("ordinality", True) 3459 this.set("alias", self._parse_table_alias()) 3460 3461 return this 3462 3463 def _parse_version(self) -> t.Optional[exp.Version]: 3464 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3465 this = "TIMESTAMP" 3466 elif self._match(TokenType.VERSION_SNAPSHOT): 3467 this = "VERSION" 3468 else: 3469 return None 3470 3471 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3472 kind = self._prev.text.upper() 3473 start = self._parse_bitwise() 3474 self._match_texts(("TO", "AND")) 3475 end = self._parse_bitwise() 3476 expression: t.Optional[exp.Expression] = self.expression( 3477 exp.Tuple, expressions=[start, end] 3478 ) 3479 elif self._match_text_seq("CONTAINED", "IN"): 3480 kind = "CONTAINED IN" 3481 expression = self.expression( 3482 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3483 ) 3484 elif self._match(TokenType.ALL): 3485 kind = "ALL" 3486 expression = None 3487 else: 3488 self._match_text_seq("AS", "OF") 3489 kind = "AS OF" 3490 expression = self._parse_type() 3491 3492 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3493 3494 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3495 if not self._match(TokenType.UNNEST): 3496 return None 3497 3498 expressions = self._parse_wrapped_csv(self._parse_equality) 3499 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3500 3501 alias = self._parse_table_alias() if with_alias else None 3502 3503 if alias: 3504 if self.dialect.UNNEST_COLUMN_ONLY: 3505 if alias.args.get("columns"): 3506 self.raise_error("Unexpected extra column alias in unnest.") 3507 3508 alias.set("columns", [alias.this]) 3509 alias.set("this", None) 3510 3511 columns = alias.args.get("columns") or [] 3512 if offset and len(expressions) < len(columns): 3513 offset = columns.pop() 3514 3515 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3516 self._match(TokenType.ALIAS) 3517 offset = self._parse_id_var( 3518 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3519 ) or exp.to_identifier("offset") 3520 3521 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3522 3523 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3524 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3525 if not is_derived and not self._match_text_seq("VALUES"): 3526 return None 3527 3528 expressions = self._parse_csv(self._parse_value) 3529 alias = self._parse_table_alias() 3530 3531 if is_derived: 3532 self._match_r_paren() 3533 3534 return self.expression( 3535 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3536 ) 3537 3538 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3539 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3540 as_modifier and self._match_text_seq("USING", "SAMPLE") 3541 ): 3542 return None 3543 3544 bucket_numerator = None 3545 bucket_denominator = None 3546 bucket_field = None 3547 percent = None 3548 size = None 3549 seed = None 3550 3551 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3552 matched_l_paren = self._match(TokenType.L_PAREN) 3553 3554 if self.TABLESAMPLE_CSV: 3555 num = None 3556 expressions = self._parse_csv(self._parse_primary) 3557 else: 3558 expressions = None 3559 num = ( 3560 self._parse_factor() 3561 if self._match(TokenType.NUMBER, advance=False) 3562 else self._parse_primary() or self._parse_placeholder() 3563 ) 3564 3565 if self._match_text_seq("BUCKET"): 3566 bucket_numerator = self._parse_number() 3567 self._match_text_seq("OUT", "OF") 3568 bucket_denominator = bucket_denominator = self._parse_number() 3569 self._match(TokenType.ON) 3570 bucket_field = self._parse_field() 3571 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3572 percent = num 3573 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3574 size = num 3575 else: 3576 percent = num 3577 3578 if matched_l_paren: 3579 self._match_r_paren() 3580 3581 if self._match(TokenType.L_PAREN): 3582 method = self._parse_var(upper=True) 3583 seed = self._match(TokenType.COMMA) and self._parse_number() 3584 self._match_r_paren() 3585 elif self._match_texts(("SEED", "REPEATABLE")): 3586 seed = self._parse_wrapped(self._parse_number) 3587 3588 if not method and self.DEFAULT_SAMPLING_METHOD: 3589 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3590 3591 return self.expression( 3592 exp.TableSample, 3593 expressions=expressions, 3594 method=method, 3595 bucket_numerator=bucket_numerator, 3596 bucket_denominator=bucket_denominator, 3597 bucket_field=bucket_field, 3598 percent=percent, 3599 size=size, 3600 seed=seed, 3601 ) 3602 3603 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3604 return list(iter(self._parse_pivot, None)) or None 3605 3606 def _parse_joins(self) -> t.Iterator[exp.Join]: 3607 return iter(self._parse_join, None) 3608 3609 # https://duckdb.org/docs/sql/statements/pivot 3610 def _parse_simplified_pivot(self) -> exp.Pivot: 3611 def _parse_on() -> t.Optional[exp.Expression]: 3612 this = self._parse_bitwise() 3613 return self._parse_in(this) if self._match(TokenType.IN) else this 3614 3615 this = self._parse_table() 3616 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3617 using = self._match(TokenType.USING) and self._parse_csv( 3618 lambda: self._parse_alias(self._parse_function()) 3619 ) 3620 group = self._parse_group() 3621 return self.expression( 3622 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3623 ) 3624 3625 def _parse_pivot_in(self) -> exp.In: 3626 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3627 this = self._parse_assignment() 3628 3629 self._match(TokenType.ALIAS) 3630 alias = self._parse_field() 3631 if alias: 3632 return self.expression(exp.PivotAlias, this=this, alias=alias) 3633 3634 return this 3635 3636 value = self._parse_column() 3637 3638 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3639 self.raise_error("Expecting IN (") 3640 3641 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3642 3643 self._match_r_paren() 3644 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3645 3646 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3647 index = self._index 3648 include_nulls = None 3649 3650 if self._match(TokenType.PIVOT): 3651 unpivot = False 3652 elif self._match(TokenType.UNPIVOT): 3653 unpivot = True 3654 3655 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3656 if self._match_text_seq("INCLUDE", "NULLS"): 3657 include_nulls = True 3658 elif self._match_text_seq("EXCLUDE", "NULLS"): 3659 include_nulls = False 3660 else: 3661 return None 3662 3663 expressions = [] 3664 3665 if not self._match(TokenType.L_PAREN): 3666 self._retreat(index) 3667 return None 3668 3669 if unpivot: 3670 expressions = self._parse_csv(self._parse_column) 3671 else: 3672 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3673 3674 if not expressions: 3675 self.raise_error("Failed to parse PIVOT's aggregation list") 3676 3677 if not self._match(TokenType.FOR): 3678 self.raise_error("Expecting FOR") 3679 3680 field = self._parse_pivot_in() 3681 3682 self._match_r_paren() 3683 3684 pivot = self.expression( 3685 exp.Pivot, 3686 expressions=expressions, 3687 field=field, 3688 unpivot=unpivot, 3689 include_nulls=include_nulls, 3690 ) 3691 3692 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3693 pivot.set("alias", self._parse_table_alias()) 3694 3695 if not unpivot: 3696 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3697 3698 columns: t.List[exp.Expression] = [] 3699 for fld in pivot.args["field"].expressions: 3700 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3701 for name in names: 3702 if self.PREFIXED_PIVOT_COLUMNS: 3703 name = f"{name}_{field_name}" if name else field_name 3704 else: 3705 name = f"{field_name}_{name}" if name else field_name 3706 3707 columns.append(exp.to_identifier(name)) 3708 3709 pivot.set("columns", columns) 3710 3711 return pivot 3712 3713 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3714 return [agg.alias for agg in aggregations] 3715 3716 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3717 if not skip_where_token and not self._match(TokenType.PREWHERE): 3718 return None 3719 3720 return self.expression( 3721 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 3722 ) 3723 3724 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3725 if not skip_where_token and not self._match(TokenType.WHERE): 3726 return None 3727 3728 return self.expression( 3729 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 3730 ) 3731 3732 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3733 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3734 return None 3735 3736 elements: t.Dict[str, t.Any] = defaultdict(list) 3737 3738 if self._match(TokenType.ALL): 3739 elements["all"] = True 3740 elif self._match(TokenType.DISTINCT): 3741 elements["all"] = False 3742 3743 while True: 3744 expressions = self._parse_csv( 3745 lambda: None 3746 if self._match(TokenType.ROLLUP, advance=False) 3747 else self._parse_assignment() 3748 ) 3749 if expressions: 3750 elements["expressions"].extend(expressions) 3751 3752 grouping_sets = self._parse_grouping_sets() 3753 if grouping_sets: 3754 elements["grouping_sets"].extend(grouping_sets) 3755 3756 rollup = None 3757 cube = None 3758 totals = None 3759 3760 index = self._index 3761 with_ = self._match(TokenType.WITH) 3762 if self._match(TokenType.ROLLUP): 3763 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3764 elements["rollup"].extend(ensure_list(rollup)) 3765 3766 if self._match(TokenType.CUBE): 3767 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3768 elements["cube"].extend(ensure_list(cube)) 3769 3770 if self._match_text_seq("TOTALS"): 3771 totals = True 3772 elements["totals"] = True # type: ignore 3773 3774 if not (grouping_sets or rollup or cube or totals): 3775 if with_: 3776 self._retreat(index) 3777 break 3778 3779 return self.expression(exp.Group, **elements) # type: ignore 3780 3781 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3782 if not self._match(TokenType.GROUPING_SETS): 3783 return None 3784 3785 return self._parse_wrapped_csv(self._parse_grouping_set) 3786 3787 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3788 if self._match(TokenType.L_PAREN): 3789 grouping_set = self._parse_csv(self._parse_column) 3790 self._match_r_paren() 3791 return self.expression(exp.Tuple, expressions=grouping_set) 3792 3793 return self._parse_column() 3794 3795 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3796 if not skip_having_token and not self._match(TokenType.HAVING): 3797 return None 3798 return self.expression(exp.Having, this=self._parse_assignment()) 3799 3800 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3801 if not self._match(TokenType.QUALIFY): 3802 return None 3803 return self.expression(exp.Qualify, this=self._parse_assignment()) 3804 3805 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3806 if skip_start_token: 3807 start = None 3808 elif self._match(TokenType.START_WITH): 3809 start = self._parse_assignment() 3810 else: 3811 return None 3812 3813 self._match(TokenType.CONNECT_BY) 3814 nocycle = self._match_text_seq("NOCYCLE") 3815 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3816 exp.Prior, this=self._parse_bitwise() 3817 ) 3818 connect = self._parse_assignment() 3819 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3820 3821 if not start and self._match(TokenType.START_WITH): 3822 start = self._parse_assignment() 3823 3824 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3825 3826 def _parse_name_as_expression(self) -> exp.Alias: 3827 return self.expression( 3828 exp.Alias, 3829 alias=self._parse_id_var(any_token=True), 3830 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 3831 ) 3832 3833 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3834 if self._match_text_seq("INTERPOLATE"): 3835 return self._parse_wrapped_csv(self._parse_name_as_expression) 3836 return None 3837 3838 def _parse_order( 3839 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3840 ) -> t.Optional[exp.Expression]: 3841 siblings = None 3842 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3843 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3844 return this 3845 3846 siblings = True 3847 3848 return self.expression( 3849 exp.Order, 3850 this=this, 3851 expressions=self._parse_csv(self._parse_ordered), 3852 interpolate=self._parse_interpolate(), 3853 siblings=siblings, 3854 ) 3855 3856 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3857 if not self._match(token): 3858 return None 3859 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3860 3861 def _parse_ordered( 3862 self, parse_method: t.Optional[t.Callable] = None 3863 ) -> t.Optional[exp.Ordered]: 3864 this = parse_method() if parse_method else self._parse_assignment() 3865 if not this: 3866 return None 3867 3868 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 3869 this = exp.var("ALL") 3870 3871 asc = self._match(TokenType.ASC) 3872 desc = self._match(TokenType.DESC) or (asc and False) 3873 3874 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3875 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3876 3877 nulls_first = is_nulls_first or False 3878 explicitly_null_ordered = is_nulls_first or is_nulls_last 3879 3880 if ( 3881 not explicitly_null_ordered 3882 and ( 3883 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3884 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3885 ) 3886 and self.dialect.NULL_ORDERING != "nulls_are_last" 3887 ): 3888 nulls_first = True 3889 3890 if self._match_text_seq("WITH", "FILL"): 3891 with_fill = self.expression( 3892 exp.WithFill, 3893 **{ # type: ignore 3894 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3895 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3896 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3897 }, 3898 ) 3899 else: 3900 with_fill = None 3901 3902 return self.expression( 3903 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3904 ) 3905 3906 def _parse_limit( 3907 self, 3908 this: t.Optional[exp.Expression] = None, 3909 top: bool = False, 3910 skip_limit_token: bool = False, 3911 ) -> t.Optional[exp.Expression]: 3912 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3913 comments = self._prev_comments 3914 if top: 3915 limit_paren = self._match(TokenType.L_PAREN) 3916 expression = self._parse_term() if limit_paren else self._parse_number() 3917 3918 if limit_paren: 3919 self._match_r_paren() 3920 else: 3921 expression = self._parse_term() 3922 3923 if self._match(TokenType.COMMA): 3924 offset = expression 3925 expression = self._parse_term() 3926 else: 3927 offset = None 3928 3929 limit_exp = self.expression( 3930 exp.Limit, 3931 this=this, 3932 expression=expression, 3933 offset=offset, 3934 comments=comments, 3935 expressions=self._parse_limit_by(), 3936 ) 3937 3938 return limit_exp 3939 3940 if self._match(TokenType.FETCH): 3941 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3942 direction = self._prev.text.upper() if direction else "FIRST" 3943 3944 count = self._parse_field(tokens=self.FETCH_TOKENS) 3945 percent = self._match(TokenType.PERCENT) 3946 3947 self._match_set((TokenType.ROW, TokenType.ROWS)) 3948 3949 only = self._match_text_seq("ONLY") 3950 with_ties = self._match_text_seq("WITH", "TIES") 3951 3952 if only and with_ties: 3953 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3954 3955 return self.expression( 3956 exp.Fetch, 3957 direction=direction, 3958 count=count, 3959 percent=percent, 3960 with_ties=with_ties, 3961 ) 3962 3963 return this 3964 3965 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3966 if not self._match(TokenType.OFFSET): 3967 return this 3968 3969 count = self._parse_term() 3970 self._match_set((TokenType.ROW, TokenType.ROWS)) 3971 3972 return self.expression( 3973 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3974 ) 3975 3976 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3977 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3978 3979 def _parse_locks(self) -> t.List[exp.Lock]: 3980 locks = [] 3981 while True: 3982 if self._match_text_seq("FOR", "UPDATE"): 3983 update = True 3984 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3985 "LOCK", "IN", "SHARE", "MODE" 3986 ): 3987 update = False 3988 else: 3989 break 3990 3991 expressions = None 3992 if self._match_text_seq("OF"): 3993 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3994 3995 wait: t.Optional[bool | exp.Expression] = None 3996 if self._match_text_seq("NOWAIT"): 3997 wait = True 3998 elif self._match_text_seq("WAIT"): 3999 wait = self._parse_primary() 4000 elif self._match_text_seq("SKIP", "LOCKED"): 4001 wait = False 4002 4003 locks.append( 4004 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4005 ) 4006 4007 return locks 4008 4009 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4010 while this and self._match_set(self.SET_OPERATIONS): 4011 token_type = self._prev.token_type 4012 4013 if token_type == TokenType.UNION: 4014 operation: t.Type[exp.SetOperation] = exp.Union 4015 elif token_type == TokenType.EXCEPT: 4016 operation = exp.Except 4017 else: 4018 operation = exp.Intersect 4019 4020 comments = self._prev.comments 4021 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 4022 by_name = self._match_text_seq("BY", "NAME") 4023 expression = self._parse_select(nested=True, parse_set_operation=False) 4024 4025 this = self.expression( 4026 operation, 4027 comments=comments, 4028 this=this, 4029 distinct=distinct, 4030 by_name=by_name, 4031 expression=expression, 4032 ) 4033 4034 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4035 expression = this.expression 4036 4037 if expression: 4038 for arg in self.SET_OP_MODIFIERS: 4039 expr = expression.args.get(arg) 4040 if expr: 4041 this.set(arg, expr.pop()) 4042 4043 return this 4044 4045 def _parse_expression(self) -> t.Optional[exp.Expression]: 4046 return self._parse_alias(self._parse_assignment()) 4047 4048 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4049 this = self._parse_disjunction() 4050 4051 while self._match_set(self.ASSIGNMENT): 4052 this = self.expression( 4053 self.ASSIGNMENT[self._prev.token_type], 4054 this=this, 4055 comments=self._prev_comments, 4056 expression=self._parse_assignment(), 4057 ) 4058 4059 return this 4060 4061 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4062 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4063 4064 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4065 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4066 4067 def _parse_equality(self) -> t.Optional[exp.Expression]: 4068 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4069 4070 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4071 return self._parse_tokens(self._parse_range, self.COMPARISON) 4072 4073 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4074 this = this or self._parse_bitwise() 4075 negate = self._match(TokenType.NOT) 4076 4077 if self._match_set(self.RANGE_PARSERS): 4078 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4079 if not expression: 4080 return this 4081 4082 this = expression 4083 elif self._match(TokenType.ISNULL): 4084 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4085 4086 # Postgres supports ISNULL and NOTNULL for conditions. 4087 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4088 if self._match(TokenType.NOTNULL): 4089 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4090 this = self.expression(exp.Not, this=this) 4091 4092 if negate: 4093 this = self.expression(exp.Not, this=this) 4094 4095 if self._match(TokenType.IS): 4096 this = self._parse_is(this) 4097 4098 return this 4099 4100 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4101 index = self._index - 1 4102 negate = self._match(TokenType.NOT) 4103 4104 if self._match_text_seq("DISTINCT", "FROM"): 4105 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4106 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4107 4108 expression = self._parse_null() or self._parse_boolean() 4109 if not expression: 4110 self._retreat(index) 4111 return None 4112 4113 this = self.expression(exp.Is, this=this, expression=expression) 4114 return self.expression(exp.Not, this=this) if negate else this 4115 4116 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4117 unnest = self._parse_unnest(with_alias=False) 4118 if unnest: 4119 this = self.expression(exp.In, this=this, unnest=unnest) 4120 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4121 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4122 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4123 4124 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4125 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4126 else: 4127 this = self.expression(exp.In, this=this, expressions=expressions) 4128 4129 if matched_l_paren: 4130 self._match_r_paren(this) 4131 elif not self._match(TokenType.R_BRACKET, expression=this): 4132 self.raise_error("Expecting ]") 4133 else: 4134 this = self.expression(exp.In, this=this, field=self._parse_field()) 4135 4136 return this 4137 4138 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4139 low = self._parse_bitwise() 4140 self._match(TokenType.AND) 4141 high = self._parse_bitwise() 4142 return self.expression(exp.Between, this=this, low=low, high=high) 4143 4144 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4145 if not self._match(TokenType.ESCAPE): 4146 return this 4147 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4148 4149 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4150 index = self._index 4151 4152 if not self._match(TokenType.INTERVAL) and match_interval: 4153 return None 4154 4155 if self._match(TokenType.STRING, advance=False): 4156 this = self._parse_primary() 4157 else: 4158 this = self._parse_term() 4159 4160 if not this or ( 4161 isinstance(this, exp.Column) 4162 and not this.table 4163 and not this.this.quoted 4164 and this.name.upper() == "IS" 4165 ): 4166 self._retreat(index) 4167 return None 4168 4169 unit = self._parse_function() or ( 4170 not self._match(TokenType.ALIAS, advance=False) 4171 and self._parse_var(any_token=True, upper=True) 4172 ) 4173 4174 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4175 # each INTERVAL expression into this canonical form so it's easy to transpile 4176 if this and this.is_number: 4177 this = exp.Literal.string(this.to_py()) 4178 elif this and this.is_string: 4179 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4180 if len(parts) == 1: 4181 if unit: 4182 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4183 self._retreat(self._index - 1) 4184 4185 this = exp.Literal.string(parts[0][0]) 4186 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4187 4188 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4189 unit = self.expression( 4190 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4191 ) 4192 4193 interval = self.expression(exp.Interval, this=this, unit=unit) 4194 4195 index = self._index 4196 self._match(TokenType.PLUS) 4197 4198 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4199 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4200 return self.expression( 4201 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4202 ) 4203 4204 self._retreat(index) 4205 return interval 4206 4207 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4208 this = self._parse_term() 4209 4210 while True: 4211 if self._match_set(self.BITWISE): 4212 this = self.expression( 4213 self.BITWISE[self._prev.token_type], 4214 this=this, 4215 expression=self._parse_term(), 4216 ) 4217 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4218 this = self.expression( 4219 exp.DPipe, 4220 this=this, 4221 expression=self._parse_term(), 4222 safe=not self.dialect.STRICT_STRING_CONCAT, 4223 ) 4224 elif self._match(TokenType.DQMARK): 4225 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4226 elif self._match_pair(TokenType.LT, TokenType.LT): 4227 this = self.expression( 4228 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4229 ) 4230 elif self._match_pair(TokenType.GT, TokenType.GT): 4231 this = self.expression( 4232 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4233 ) 4234 else: 4235 break 4236 4237 return this 4238 4239 def _parse_term(self) -> t.Optional[exp.Expression]: 4240 return self._parse_tokens(self._parse_factor, self.TERM) 4241 4242 def _parse_factor(self) -> t.Optional[exp.Expression]: 4243 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4244 this = parse_method() 4245 4246 while self._match_set(self.FACTOR): 4247 klass = self.FACTOR[self._prev.token_type] 4248 comments = self._prev_comments 4249 expression = parse_method() 4250 4251 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4252 self._retreat(self._index - 1) 4253 return this 4254 4255 this = self.expression(klass, this=this, comments=comments, expression=expression) 4256 4257 if isinstance(this, exp.Div): 4258 this.args["typed"] = self.dialect.TYPED_DIVISION 4259 this.args["safe"] = self.dialect.SAFE_DIVISION 4260 4261 return this 4262 4263 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4264 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4265 4266 def _parse_unary(self) -> t.Optional[exp.Expression]: 4267 if self._match_set(self.UNARY_PARSERS): 4268 return self.UNARY_PARSERS[self._prev.token_type](self) 4269 return self._parse_at_time_zone(self._parse_type()) 4270 4271 def _parse_type( 4272 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4273 ) -> t.Optional[exp.Expression]: 4274 interval = parse_interval and self._parse_interval() 4275 if interval: 4276 return interval 4277 4278 index = self._index 4279 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4280 4281 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4282 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4283 if isinstance(data_type, exp.Cast): 4284 # This constructor can contain ops directly after it, for instance struct unnesting: 4285 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4286 return self._parse_column_ops(data_type) 4287 4288 if data_type: 4289 index2 = self._index 4290 this = self._parse_primary() 4291 4292 if isinstance(this, exp.Literal): 4293 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4294 if parser: 4295 return parser(self, this, data_type) 4296 4297 return self.expression(exp.Cast, this=this, to=data_type) 4298 4299 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4300 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4301 # 4302 # If the index difference here is greater than 1, that means the parser itself must have 4303 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4304 # 4305 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4306 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4307 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4308 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4309 # 4310 # In these cases, we don't really want to return the converted type, but instead retreat 4311 # and try to parse a Column or Identifier in the section below. 4312 if data_type.expressions and index2 - index > 1: 4313 self._retreat(index2) 4314 return self._parse_column_ops(data_type) 4315 4316 self._retreat(index) 4317 4318 if fallback_to_identifier: 4319 return self._parse_id_var() 4320 4321 this = self._parse_column() 4322 return this and self._parse_column_ops(this) 4323 4324 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4325 this = self._parse_type() 4326 if not this: 4327 return None 4328 4329 if isinstance(this, exp.Column) and not this.table: 4330 this = exp.var(this.name.upper()) 4331 4332 return self.expression( 4333 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4334 ) 4335 4336 def _parse_types( 4337 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4338 ) -> t.Optional[exp.Expression]: 4339 index = self._index 4340 4341 this: t.Optional[exp.Expression] = None 4342 prefix = self._match_text_seq("SYSUDTLIB", ".") 4343 4344 if not self._match_set(self.TYPE_TOKENS): 4345 identifier = allow_identifiers and self._parse_id_var( 4346 any_token=False, tokens=(TokenType.VAR,) 4347 ) 4348 if isinstance(identifier, exp.Identifier): 4349 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4350 4351 if len(tokens) != 1: 4352 self.raise_error("Unexpected identifier", self._prev) 4353 4354 if tokens[0].token_type in self.TYPE_TOKENS: 4355 self._prev = tokens[0] 4356 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4357 type_name = identifier.name 4358 4359 while self._match(TokenType.DOT): 4360 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4361 4362 this = exp.DataType.build(type_name, udt=True) 4363 else: 4364 self._retreat(self._index - 1) 4365 return None 4366 else: 4367 return None 4368 4369 type_token = self._prev.token_type 4370 4371 if type_token == TokenType.PSEUDO_TYPE: 4372 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4373 4374 if type_token == TokenType.OBJECT_IDENTIFIER: 4375 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4376 4377 # https://materialize.com/docs/sql/types/map/ 4378 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4379 key_type = self._parse_types( 4380 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4381 ) 4382 if not self._match(TokenType.FARROW): 4383 self._retreat(index) 4384 return None 4385 4386 value_type = self._parse_types( 4387 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4388 ) 4389 if not self._match(TokenType.R_BRACKET): 4390 self._retreat(index) 4391 return None 4392 4393 return exp.DataType( 4394 this=exp.DataType.Type.MAP, 4395 expressions=[key_type, value_type], 4396 nested=True, 4397 prefix=prefix, 4398 ) 4399 4400 nested = type_token in self.NESTED_TYPE_TOKENS 4401 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4402 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4403 expressions = None 4404 maybe_func = False 4405 4406 if self._match(TokenType.L_PAREN): 4407 if is_struct: 4408 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4409 elif nested: 4410 expressions = self._parse_csv( 4411 lambda: self._parse_types( 4412 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4413 ) 4414 ) 4415 elif type_token in self.ENUM_TYPE_TOKENS: 4416 expressions = self._parse_csv(self._parse_equality) 4417 elif is_aggregate: 4418 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4419 any_token=False, tokens=(TokenType.VAR,) 4420 ) 4421 if not func_or_ident or not self._match(TokenType.COMMA): 4422 return None 4423 expressions = self._parse_csv( 4424 lambda: self._parse_types( 4425 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4426 ) 4427 ) 4428 expressions.insert(0, func_or_ident) 4429 else: 4430 expressions = self._parse_csv(self._parse_type_size) 4431 4432 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4433 if type_token == TokenType.VECTOR and len(expressions) == 2: 4434 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4435 4436 if not expressions or not self._match(TokenType.R_PAREN): 4437 self._retreat(index) 4438 return None 4439 4440 maybe_func = True 4441 4442 values: t.Optional[t.List[exp.Expression]] = None 4443 4444 if nested and self._match(TokenType.LT): 4445 if is_struct: 4446 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4447 else: 4448 expressions = self._parse_csv( 4449 lambda: self._parse_types( 4450 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4451 ) 4452 ) 4453 4454 if not self._match(TokenType.GT): 4455 self.raise_error("Expecting >") 4456 4457 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4458 values = self._parse_csv(self._parse_assignment) 4459 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4460 4461 if type_token in self.TIMESTAMPS: 4462 if self._match_text_seq("WITH", "TIME", "ZONE"): 4463 maybe_func = False 4464 tz_type = ( 4465 exp.DataType.Type.TIMETZ 4466 if type_token in self.TIMES 4467 else exp.DataType.Type.TIMESTAMPTZ 4468 ) 4469 this = exp.DataType(this=tz_type, expressions=expressions) 4470 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4471 maybe_func = False 4472 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4473 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4474 maybe_func = False 4475 elif type_token == TokenType.INTERVAL: 4476 unit = self._parse_var(upper=True) 4477 if unit: 4478 if self._match_text_seq("TO"): 4479 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4480 4481 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4482 else: 4483 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4484 4485 if maybe_func and check_func: 4486 index2 = self._index 4487 peek = self._parse_string() 4488 4489 if not peek: 4490 self._retreat(index) 4491 return None 4492 4493 self._retreat(index2) 4494 4495 if not this: 4496 if self._match_text_seq("UNSIGNED"): 4497 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4498 if not unsigned_type_token: 4499 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4500 4501 type_token = unsigned_type_token or type_token 4502 4503 this = exp.DataType( 4504 this=exp.DataType.Type[type_token.value], 4505 expressions=expressions, 4506 nested=nested, 4507 prefix=prefix, 4508 ) 4509 4510 # Empty arrays/structs are allowed 4511 if values is not None: 4512 cls = exp.Struct if is_struct else exp.Array 4513 this = exp.cast(cls(expressions=values), this, copy=False) 4514 4515 elif expressions: 4516 this.set("expressions", expressions) 4517 4518 # https://materialize.com/docs/sql/types/list/#type-name 4519 while self._match(TokenType.LIST): 4520 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4521 4522 index = self._index 4523 4524 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4525 matched_array = self._match(TokenType.ARRAY) 4526 4527 while self._curr: 4528 matched_l_bracket = self._match(TokenType.L_BRACKET) 4529 if not matched_l_bracket and not matched_array: 4530 break 4531 4532 matched_array = False 4533 values = self._parse_csv(self._parse_assignment) or None 4534 if values and not schema: 4535 self._retreat(index) 4536 break 4537 4538 this = exp.DataType( 4539 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4540 ) 4541 self._match(TokenType.R_BRACKET) 4542 4543 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4544 converter = self.TYPE_CONVERTERS.get(this.this) 4545 if converter: 4546 this = converter(t.cast(exp.DataType, this)) 4547 4548 return this 4549 4550 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4551 index = self._index 4552 4553 if ( 4554 self._curr 4555 and self._next 4556 and self._curr.token_type in self.TYPE_TOKENS 4557 and self._next.token_type in self.TYPE_TOKENS 4558 ): 4559 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4560 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4561 this = self._parse_id_var() 4562 else: 4563 this = ( 4564 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4565 or self._parse_id_var() 4566 ) 4567 4568 self._match(TokenType.COLON) 4569 4570 if ( 4571 type_required 4572 and not isinstance(this, exp.DataType) 4573 and not self._match_set(self.TYPE_TOKENS, advance=False) 4574 ): 4575 self._retreat(index) 4576 return self._parse_types() 4577 4578 return self._parse_column_def(this) 4579 4580 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4581 if not self._match_text_seq("AT", "TIME", "ZONE"): 4582 return this 4583 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4584 4585 def _parse_column(self) -> t.Optional[exp.Expression]: 4586 this = self._parse_column_reference() 4587 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4588 4589 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4590 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4591 4592 return column 4593 4594 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4595 this = self._parse_field() 4596 if ( 4597 not this 4598 and self._match(TokenType.VALUES, advance=False) 4599 and self.VALUES_FOLLOWED_BY_PAREN 4600 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4601 ): 4602 this = self._parse_id_var() 4603 4604 if isinstance(this, exp.Identifier): 4605 # We bubble up comments from the Identifier to the Column 4606 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4607 4608 return this 4609 4610 def _parse_colon_as_variant_extract( 4611 self, this: t.Optional[exp.Expression] 4612 ) -> t.Optional[exp.Expression]: 4613 casts = [] 4614 json_path = [] 4615 4616 while self._match(TokenType.COLON): 4617 start_index = self._index 4618 4619 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4620 path = self._parse_column_ops( 4621 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4622 ) 4623 4624 # The cast :: operator has a lower precedence than the extraction operator :, so 4625 # we rearrange the AST appropriately to avoid casting the JSON path 4626 while isinstance(path, exp.Cast): 4627 casts.append(path.to) 4628 path = path.this 4629 4630 if casts: 4631 dcolon_offset = next( 4632 i 4633 for i, t in enumerate(self._tokens[start_index:]) 4634 if t.token_type == TokenType.DCOLON 4635 ) 4636 end_token = self._tokens[start_index + dcolon_offset - 1] 4637 else: 4638 end_token = self._prev 4639 4640 if path: 4641 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4642 4643 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 4644 # Databricks transforms it back to the colon/dot notation 4645 if json_path: 4646 this = self.expression( 4647 exp.JSONExtract, 4648 this=this, 4649 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4650 variant_extract=True, 4651 ) 4652 4653 while casts: 4654 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4655 4656 return this 4657 4658 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 4659 return self._parse_types() 4660 4661 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4662 this = self._parse_bracket(this) 4663 4664 while self._match_set(self.COLUMN_OPERATORS): 4665 op_token = self._prev.token_type 4666 op = self.COLUMN_OPERATORS.get(op_token) 4667 4668 if op_token == TokenType.DCOLON: 4669 field = self._parse_dcolon() 4670 if not field: 4671 self.raise_error("Expected type") 4672 elif op and self._curr: 4673 field = self._parse_column_reference() 4674 else: 4675 field = self._parse_field(any_token=True, anonymous_func=True) 4676 4677 if isinstance(field, exp.Func) and this: 4678 # bigquery allows function calls like x.y.count(...) 4679 # SAFE.SUBSTR(...) 4680 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4681 this = exp.replace_tree( 4682 this, 4683 lambda n: ( 4684 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4685 if n.table 4686 else n.this 4687 ) 4688 if isinstance(n, exp.Column) 4689 else n, 4690 ) 4691 4692 if op: 4693 this = op(self, this, field) 4694 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4695 this = self.expression( 4696 exp.Column, 4697 this=field, 4698 table=this.this, 4699 db=this.args.get("table"), 4700 catalog=this.args.get("db"), 4701 ) 4702 else: 4703 this = self.expression(exp.Dot, this=this, expression=field) 4704 4705 this = self._parse_bracket(this) 4706 4707 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 4708 4709 def _parse_primary(self) -> t.Optional[exp.Expression]: 4710 if self._match_set(self.PRIMARY_PARSERS): 4711 token_type = self._prev.token_type 4712 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4713 4714 if token_type == TokenType.STRING: 4715 expressions = [primary] 4716 while self._match(TokenType.STRING): 4717 expressions.append(exp.Literal.string(self._prev.text)) 4718 4719 if len(expressions) > 1: 4720 return self.expression(exp.Concat, expressions=expressions) 4721 4722 return primary 4723 4724 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4725 return exp.Literal.number(f"0.{self._prev.text}") 4726 4727 if self._match(TokenType.L_PAREN): 4728 comments = self._prev_comments 4729 query = self._parse_select() 4730 4731 if query: 4732 expressions = [query] 4733 else: 4734 expressions = self._parse_expressions() 4735 4736 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4737 4738 if not this and self._match(TokenType.R_PAREN, advance=False): 4739 this = self.expression(exp.Tuple) 4740 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4741 this = self._parse_subquery(this=this, parse_alias=False) 4742 elif isinstance(this, exp.Subquery): 4743 this = self._parse_subquery( 4744 this=self._parse_set_operations(this), parse_alias=False 4745 ) 4746 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4747 this = self.expression(exp.Tuple, expressions=expressions) 4748 else: 4749 this = self.expression(exp.Paren, this=this) 4750 4751 if this: 4752 this.add_comments(comments) 4753 4754 self._match_r_paren(expression=this) 4755 return this 4756 4757 return None 4758 4759 def _parse_field( 4760 self, 4761 any_token: bool = False, 4762 tokens: t.Optional[t.Collection[TokenType]] = None, 4763 anonymous_func: bool = False, 4764 ) -> t.Optional[exp.Expression]: 4765 if anonymous_func: 4766 field = ( 4767 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4768 or self._parse_primary() 4769 ) 4770 else: 4771 field = self._parse_primary() or self._parse_function( 4772 anonymous=anonymous_func, any_token=any_token 4773 ) 4774 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4775 4776 def _parse_function( 4777 self, 4778 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4779 anonymous: bool = False, 4780 optional_parens: bool = True, 4781 any_token: bool = False, 4782 ) -> t.Optional[exp.Expression]: 4783 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4784 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4785 fn_syntax = False 4786 if ( 4787 self._match(TokenType.L_BRACE, advance=False) 4788 and self._next 4789 and self._next.text.upper() == "FN" 4790 ): 4791 self._advance(2) 4792 fn_syntax = True 4793 4794 func = self._parse_function_call( 4795 functions=functions, 4796 anonymous=anonymous, 4797 optional_parens=optional_parens, 4798 any_token=any_token, 4799 ) 4800 4801 if fn_syntax: 4802 self._match(TokenType.R_BRACE) 4803 4804 return func 4805 4806 def _parse_function_call( 4807 self, 4808 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4809 anonymous: bool = False, 4810 optional_parens: bool = True, 4811 any_token: bool = False, 4812 ) -> t.Optional[exp.Expression]: 4813 if not self._curr: 4814 return None 4815 4816 comments = self._curr.comments 4817 token_type = self._curr.token_type 4818 this = self._curr.text 4819 upper = this.upper() 4820 4821 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4822 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4823 self._advance() 4824 return self._parse_window(parser(self)) 4825 4826 if not self._next or self._next.token_type != TokenType.L_PAREN: 4827 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4828 self._advance() 4829 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4830 4831 return None 4832 4833 if any_token: 4834 if token_type in self.RESERVED_TOKENS: 4835 return None 4836 elif token_type not in self.FUNC_TOKENS: 4837 return None 4838 4839 self._advance(2) 4840 4841 parser = self.FUNCTION_PARSERS.get(upper) 4842 if parser and not anonymous: 4843 this = parser(self) 4844 else: 4845 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4846 4847 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4848 this = self.expression(subquery_predicate, this=self._parse_select()) 4849 self._match_r_paren() 4850 return this 4851 4852 if functions is None: 4853 functions = self.FUNCTIONS 4854 4855 function = functions.get(upper) 4856 4857 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4858 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4859 4860 if alias: 4861 args = self._kv_to_prop_eq(args) 4862 4863 if function and not anonymous: 4864 if "dialect" in function.__code__.co_varnames: 4865 func = function(args, dialect=self.dialect) 4866 else: 4867 func = function(args) 4868 4869 func = self.validate_expression(func, args) 4870 if not self.dialect.NORMALIZE_FUNCTIONS: 4871 func.meta["name"] = this 4872 4873 this = func 4874 else: 4875 if token_type == TokenType.IDENTIFIER: 4876 this = exp.Identifier(this=this, quoted=True) 4877 this = self.expression(exp.Anonymous, this=this, expressions=args) 4878 4879 if isinstance(this, exp.Expression): 4880 this.add_comments(comments) 4881 4882 self._match_r_paren(this) 4883 return self._parse_window(this) 4884 4885 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4886 transformed = [] 4887 4888 for e in expressions: 4889 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4890 if isinstance(e, exp.Alias): 4891 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4892 4893 if not isinstance(e, exp.PropertyEQ): 4894 e = self.expression( 4895 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4896 ) 4897 4898 if isinstance(e.this, exp.Column): 4899 e.this.replace(e.this.this) 4900 4901 transformed.append(e) 4902 4903 return transformed 4904 4905 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4906 return self._parse_column_def(self._parse_id_var()) 4907 4908 def _parse_user_defined_function( 4909 self, kind: t.Optional[TokenType] = None 4910 ) -> t.Optional[exp.Expression]: 4911 this = self._parse_id_var() 4912 4913 while self._match(TokenType.DOT): 4914 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4915 4916 if not self._match(TokenType.L_PAREN): 4917 return this 4918 4919 expressions = self._parse_csv(self._parse_function_parameter) 4920 self._match_r_paren() 4921 return self.expression( 4922 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4923 ) 4924 4925 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4926 literal = self._parse_primary() 4927 if literal: 4928 return self.expression(exp.Introducer, this=token.text, expression=literal) 4929 4930 return self.expression(exp.Identifier, this=token.text) 4931 4932 def _parse_session_parameter(self) -> exp.SessionParameter: 4933 kind = None 4934 this = self._parse_id_var() or self._parse_primary() 4935 4936 if this and self._match(TokenType.DOT): 4937 kind = this.name 4938 this = self._parse_var() or self._parse_primary() 4939 4940 return self.expression(exp.SessionParameter, this=this, kind=kind) 4941 4942 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 4943 return self._parse_id_var() 4944 4945 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4946 index = self._index 4947 4948 if self._match(TokenType.L_PAREN): 4949 expressions = t.cast( 4950 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 4951 ) 4952 4953 if not self._match(TokenType.R_PAREN): 4954 self._retreat(index) 4955 else: 4956 expressions = [self._parse_lambda_arg()] 4957 4958 if self._match_set(self.LAMBDAS): 4959 return self.LAMBDAS[self._prev.token_type](self, expressions) 4960 4961 self._retreat(index) 4962 4963 this: t.Optional[exp.Expression] 4964 4965 if self._match(TokenType.DISTINCT): 4966 this = self.expression( 4967 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 4968 ) 4969 else: 4970 this = self._parse_select_or_expression(alias=alias) 4971 4972 return self._parse_limit( 4973 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4974 ) 4975 4976 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4977 index = self._index 4978 if not self._match(TokenType.L_PAREN): 4979 return this 4980 4981 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4982 # expr can be of both types 4983 if self._match_set(self.SELECT_START_TOKENS): 4984 self._retreat(index) 4985 return this 4986 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4987 self._match_r_paren() 4988 return self.expression(exp.Schema, this=this, expressions=args) 4989 4990 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4991 return self._parse_column_def(self._parse_field(any_token=True)) 4992 4993 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4994 # column defs are not really columns, they're identifiers 4995 if isinstance(this, exp.Column): 4996 this = this.this 4997 4998 kind = self._parse_types(schema=True) 4999 5000 if self._match_text_seq("FOR", "ORDINALITY"): 5001 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5002 5003 constraints: t.List[exp.Expression] = [] 5004 5005 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5006 ("ALIAS", "MATERIALIZED") 5007 ): 5008 persisted = self._prev.text.upper() == "MATERIALIZED" 5009 constraints.append( 5010 self.expression( 5011 exp.ComputedColumnConstraint, 5012 this=self._parse_assignment(), 5013 persisted=persisted or self._match_text_seq("PERSISTED"), 5014 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5015 ) 5016 ) 5017 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5018 self._match(TokenType.ALIAS) 5019 constraints.append( 5020 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 5021 ) 5022 5023 while True: 5024 constraint = self._parse_column_constraint() 5025 if not constraint: 5026 break 5027 constraints.append(constraint) 5028 5029 if not kind and not constraints: 5030 return this 5031 5032 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5033 5034 def _parse_auto_increment( 5035 self, 5036 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5037 start = None 5038 increment = None 5039 5040 if self._match(TokenType.L_PAREN, advance=False): 5041 args = self._parse_wrapped_csv(self._parse_bitwise) 5042 start = seq_get(args, 0) 5043 increment = seq_get(args, 1) 5044 elif self._match_text_seq("START"): 5045 start = self._parse_bitwise() 5046 self._match_text_seq("INCREMENT") 5047 increment = self._parse_bitwise() 5048 5049 if start and increment: 5050 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5051 5052 return exp.AutoIncrementColumnConstraint() 5053 5054 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5055 if not self._match_text_seq("REFRESH"): 5056 self._retreat(self._index - 1) 5057 return None 5058 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5059 5060 def _parse_compress(self) -> exp.CompressColumnConstraint: 5061 if self._match(TokenType.L_PAREN, advance=False): 5062 return self.expression( 5063 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5064 ) 5065 5066 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5067 5068 def _parse_generated_as_identity( 5069 self, 5070 ) -> ( 5071 exp.GeneratedAsIdentityColumnConstraint 5072 | exp.ComputedColumnConstraint 5073 | exp.GeneratedAsRowColumnConstraint 5074 ): 5075 if self._match_text_seq("BY", "DEFAULT"): 5076 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5077 this = self.expression( 5078 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5079 ) 5080 else: 5081 self._match_text_seq("ALWAYS") 5082 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5083 5084 self._match(TokenType.ALIAS) 5085 5086 if self._match_text_seq("ROW"): 5087 start = self._match_text_seq("START") 5088 if not start: 5089 self._match(TokenType.END) 5090 hidden = self._match_text_seq("HIDDEN") 5091 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5092 5093 identity = self._match_text_seq("IDENTITY") 5094 5095 if self._match(TokenType.L_PAREN): 5096 if self._match(TokenType.START_WITH): 5097 this.set("start", self._parse_bitwise()) 5098 if self._match_text_seq("INCREMENT", "BY"): 5099 this.set("increment", self._parse_bitwise()) 5100 if self._match_text_seq("MINVALUE"): 5101 this.set("minvalue", self._parse_bitwise()) 5102 if self._match_text_seq("MAXVALUE"): 5103 this.set("maxvalue", self._parse_bitwise()) 5104 5105 if self._match_text_seq("CYCLE"): 5106 this.set("cycle", True) 5107 elif self._match_text_seq("NO", "CYCLE"): 5108 this.set("cycle", False) 5109 5110 if not identity: 5111 this.set("expression", self._parse_range()) 5112 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5113 args = self._parse_csv(self._parse_bitwise) 5114 this.set("start", seq_get(args, 0)) 5115 this.set("increment", seq_get(args, 1)) 5116 5117 self._match_r_paren() 5118 5119 return this 5120 5121 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5122 self._match_text_seq("LENGTH") 5123 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5124 5125 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5126 if self._match_text_seq("NULL"): 5127 return self.expression(exp.NotNullColumnConstraint) 5128 if self._match_text_seq("CASESPECIFIC"): 5129 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5130 if self._match_text_seq("FOR", "REPLICATION"): 5131 return self.expression(exp.NotForReplicationColumnConstraint) 5132 return None 5133 5134 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5135 if self._match(TokenType.CONSTRAINT): 5136 this = self._parse_id_var() 5137 else: 5138 this = None 5139 5140 if self._match_texts(self.CONSTRAINT_PARSERS): 5141 return self.expression( 5142 exp.ColumnConstraint, 5143 this=this, 5144 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5145 ) 5146 5147 return this 5148 5149 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5150 if not self._match(TokenType.CONSTRAINT): 5151 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5152 5153 return self.expression( 5154 exp.Constraint, 5155 this=self._parse_id_var(), 5156 expressions=self._parse_unnamed_constraints(), 5157 ) 5158 5159 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5160 constraints = [] 5161 while True: 5162 constraint = self._parse_unnamed_constraint() or self._parse_function() 5163 if not constraint: 5164 break 5165 constraints.append(constraint) 5166 5167 return constraints 5168 5169 def _parse_unnamed_constraint( 5170 self, constraints: t.Optional[t.Collection[str]] = None 5171 ) -> t.Optional[exp.Expression]: 5172 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5173 constraints or self.CONSTRAINT_PARSERS 5174 ): 5175 return None 5176 5177 constraint = self._prev.text.upper() 5178 if constraint not in self.CONSTRAINT_PARSERS: 5179 self.raise_error(f"No parser found for schema constraint {constraint}.") 5180 5181 return self.CONSTRAINT_PARSERS[constraint](self) 5182 5183 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5184 return self._parse_id_var(any_token=False) 5185 5186 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5187 self._match_text_seq("KEY") 5188 return self.expression( 5189 exp.UniqueColumnConstraint, 5190 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5191 this=self._parse_schema(self._parse_unique_key()), 5192 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5193 on_conflict=self._parse_on_conflict(), 5194 ) 5195 5196 def _parse_key_constraint_options(self) -> t.List[str]: 5197 options = [] 5198 while True: 5199 if not self._curr: 5200 break 5201 5202 if self._match(TokenType.ON): 5203 action = None 5204 on = self._advance_any() and self._prev.text 5205 5206 if self._match_text_seq("NO", "ACTION"): 5207 action = "NO ACTION" 5208 elif self._match_text_seq("CASCADE"): 5209 action = "CASCADE" 5210 elif self._match_text_seq("RESTRICT"): 5211 action = "RESTRICT" 5212 elif self._match_pair(TokenType.SET, TokenType.NULL): 5213 action = "SET NULL" 5214 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5215 action = "SET DEFAULT" 5216 else: 5217 self.raise_error("Invalid key constraint") 5218 5219 options.append(f"ON {on} {action}") 5220 elif self._match_text_seq("NOT", "ENFORCED"): 5221 options.append("NOT ENFORCED") 5222 elif self._match_text_seq("DEFERRABLE"): 5223 options.append("DEFERRABLE") 5224 elif self._match_text_seq("INITIALLY", "DEFERRED"): 5225 options.append("INITIALLY DEFERRED") 5226 elif self._match_text_seq("NORELY"): 5227 options.append("NORELY") 5228 elif self._match_text_seq("MATCH", "FULL"): 5229 options.append("MATCH FULL") 5230 else: 5231 break 5232 5233 return options 5234 5235 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5236 if match and not self._match(TokenType.REFERENCES): 5237 return None 5238 5239 expressions = None 5240 this = self._parse_table(schema=True) 5241 options = self._parse_key_constraint_options() 5242 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5243 5244 def _parse_foreign_key(self) -> exp.ForeignKey: 5245 expressions = self._parse_wrapped_id_vars() 5246 reference = self._parse_references() 5247 options = {} 5248 5249 while self._match(TokenType.ON): 5250 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5251 self.raise_error("Expected DELETE or UPDATE") 5252 5253 kind = self._prev.text.lower() 5254 5255 if self._match_text_seq("NO", "ACTION"): 5256 action = "NO ACTION" 5257 elif self._match(TokenType.SET): 5258 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5259 action = "SET " + self._prev.text.upper() 5260 else: 5261 self._advance() 5262 action = self._prev.text.upper() 5263 5264 options[kind] = action 5265 5266 return self.expression( 5267 exp.ForeignKey, 5268 expressions=expressions, 5269 reference=reference, 5270 **options, # type: ignore 5271 ) 5272 5273 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5274 return self._parse_field() 5275 5276 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5277 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5278 self._retreat(self._index - 1) 5279 return None 5280 5281 id_vars = self._parse_wrapped_id_vars() 5282 return self.expression( 5283 exp.PeriodForSystemTimeConstraint, 5284 this=seq_get(id_vars, 0), 5285 expression=seq_get(id_vars, 1), 5286 ) 5287 5288 def _parse_primary_key( 5289 self, wrapped_optional: bool = False, in_props: bool = False 5290 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5291 desc = ( 5292 self._match_set((TokenType.ASC, TokenType.DESC)) 5293 and self._prev.token_type == TokenType.DESC 5294 ) 5295 5296 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5297 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5298 5299 expressions = self._parse_wrapped_csv( 5300 self._parse_primary_key_part, optional=wrapped_optional 5301 ) 5302 options = self._parse_key_constraint_options() 5303 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5304 5305 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5306 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5307 5308 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5309 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5310 return this 5311 5312 bracket_kind = self._prev.token_type 5313 expressions = self._parse_csv( 5314 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5315 ) 5316 5317 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5318 self.raise_error("Expected ]") 5319 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5320 self.raise_error("Expected }") 5321 5322 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5323 if bracket_kind == TokenType.L_BRACE: 5324 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5325 elif not this: 5326 this = self.expression(exp.Array, expressions=expressions) 5327 else: 5328 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5329 if constructor_type: 5330 return self.expression(constructor_type, expressions=expressions) 5331 5332 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5333 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5334 5335 self._add_comments(this) 5336 return self._parse_bracket(this) 5337 5338 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5339 if self._match(TokenType.COLON): 5340 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5341 return this 5342 5343 def _parse_case(self) -> t.Optional[exp.Expression]: 5344 ifs = [] 5345 default = None 5346 5347 comments = self._prev_comments 5348 expression = self._parse_assignment() 5349 5350 while self._match(TokenType.WHEN): 5351 this = self._parse_assignment() 5352 self._match(TokenType.THEN) 5353 then = self._parse_assignment() 5354 ifs.append(self.expression(exp.If, this=this, true=then)) 5355 5356 if self._match(TokenType.ELSE): 5357 default = self._parse_assignment() 5358 5359 if not self._match(TokenType.END): 5360 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5361 default = exp.column("interval") 5362 else: 5363 self.raise_error("Expected END after CASE", self._prev) 5364 5365 return self.expression( 5366 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5367 ) 5368 5369 def _parse_if(self) -> t.Optional[exp.Expression]: 5370 if self._match(TokenType.L_PAREN): 5371 args = self._parse_csv(self._parse_assignment) 5372 this = self.validate_expression(exp.If.from_arg_list(args), args) 5373 self._match_r_paren() 5374 else: 5375 index = self._index - 1 5376 5377 if self.NO_PAREN_IF_COMMANDS and index == 0: 5378 return self._parse_as_command(self._prev) 5379 5380 condition = self._parse_assignment() 5381 5382 if not condition: 5383 self._retreat(index) 5384 return None 5385 5386 self._match(TokenType.THEN) 5387 true = self._parse_assignment() 5388 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5389 self._match(TokenType.END) 5390 this = self.expression(exp.If, this=condition, true=true, false=false) 5391 5392 return this 5393 5394 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5395 if not self._match_text_seq("VALUE", "FOR"): 5396 self._retreat(self._index - 1) 5397 return None 5398 5399 return self.expression( 5400 exp.NextValueFor, 5401 this=self._parse_column(), 5402 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5403 ) 5404 5405 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5406 this = self._parse_function() or self._parse_var_or_string(upper=True) 5407 5408 if self._match(TokenType.FROM): 5409 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5410 5411 if not self._match(TokenType.COMMA): 5412 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5413 5414 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5415 5416 def _parse_gap_fill(self) -> exp.GapFill: 5417 self._match(TokenType.TABLE) 5418 this = self._parse_table() 5419 5420 self._match(TokenType.COMMA) 5421 args = [this, *self._parse_csv(self._parse_lambda)] 5422 5423 gap_fill = exp.GapFill.from_arg_list(args) 5424 return self.validate_expression(gap_fill, args) 5425 5426 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5427 this = self._parse_assignment() 5428 5429 if not self._match(TokenType.ALIAS): 5430 if self._match(TokenType.COMMA): 5431 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5432 5433 self.raise_error("Expected AS after CAST") 5434 5435 fmt = None 5436 to = self._parse_types() 5437 5438 if self._match(TokenType.FORMAT): 5439 fmt_string = self._parse_string() 5440 fmt = self._parse_at_time_zone(fmt_string) 5441 5442 if not to: 5443 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5444 if to.this in exp.DataType.TEMPORAL_TYPES: 5445 this = self.expression( 5446 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5447 this=this, 5448 format=exp.Literal.string( 5449 format_time( 5450 fmt_string.this if fmt_string else "", 5451 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5452 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5453 ) 5454 ), 5455 safe=safe, 5456 ) 5457 5458 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5459 this.set("zone", fmt.args["zone"]) 5460 return this 5461 elif not to: 5462 self.raise_error("Expected TYPE after CAST") 5463 elif isinstance(to, exp.Identifier): 5464 to = exp.DataType.build(to.name, udt=True) 5465 elif to.this == exp.DataType.Type.CHAR: 5466 if self._match(TokenType.CHARACTER_SET): 5467 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5468 5469 return self.expression( 5470 exp.Cast if strict else exp.TryCast, 5471 this=this, 5472 to=to, 5473 format=fmt, 5474 safe=safe, 5475 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5476 ) 5477 5478 def _parse_string_agg(self) -> exp.Expression: 5479 if self._match(TokenType.DISTINCT): 5480 args: t.List[t.Optional[exp.Expression]] = [ 5481 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5482 ] 5483 if self._match(TokenType.COMMA): 5484 args.extend(self._parse_csv(self._parse_assignment)) 5485 else: 5486 args = self._parse_csv(self._parse_assignment) # type: ignore 5487 5488 index = self._index 5489 if not self._match(TokenType.R_PAREN) and args: 5490 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5491 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5492 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5493 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5494 5495 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5496 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5497 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5498 if not self._match_text_seq("WITHIN", "GROUP"): 5499 self._retreat(index) 5500 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5501 5502 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5503 order = self._parse_order(this=seq_get(args, 0)) 5504 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5505 5506 def _parse_convert( 5507 self, strict: bool, safe: t.Optional[bool] = None 5508 ) -> t.Optional[exp.Expression]: 5509 this = self._parse_bitwise() 5510 5511 if self._match(TokenType.USING): 5512 to: t.Optional[exp.Expression] = self.expression( 5513 exp.CharacterSet, this=self._parse_var() 5514 ) 5515 elif self._match(TokenType.COMMA): 5516 to = self._parse_types() 5517 else: 5518 to = None 5519 5520 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5521 5522 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5523 """ 5524 There are generally two variants of the DECODE function: 5525 5526 - DECODE(bin, charset) 5527 - DECODE(expression, search, result [, search, result] ... [, default]) 5528 5529 The second variant will always be parsed into a CASE expression. Note that NULL 5530 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5531 instead of relying on pattern matching. 5532 """ 5533 args = self._parse_csv(self._parse_assignment) 5534 5535 if len(args) < 3: 5536 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5537 5538 expression, *expressions = args 5539 if not expression: 5540 return None 5541 5542 ifs = [] 5543 for search, result in zip(expressions[::2], expressions[1::2]): 5544 if not search or not result: 5545 return None 5546 5547 if isinstance(search, exp.Literal): 5548 ifs.append( 5549 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5550 ) 5551 elif isinstance(search, exp.Null): 5552 ifs.append( 5553 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5554 ) 5555 else: 5556 cond = exp.or_( 5557 exp.EQ(this=expression.copy(), expression=search), 5558 exp.and_( 5559 exp.Is(this=expression.copy(), expression=exp.Null()), 5560 exp.Is(this=search.copy(), expression=exp.Null()), 5561 copy=False, 5562 ), 5563 copy=False, 5564 ) 5565 ifs.append(exp.If(this=cond, true=result)) 5566 5567 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5568 5569 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5570 self._match_text_seq("KEY") 5571 key = self._parse_column() 5572 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5573 self._match_text_seq("VALUE") 5574 value = self._parse_bitwise() 5575 5576 if not key and not value: 5577 return None 5578 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5579 5580 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5581 if not this or not self._match_text_seq("FORMAT", "JSON"): 5582 return this 5583 5584 return self.expression(exp.FormatJson, this=this) 5585 5586 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5587 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5588 for value in values: 5589 if self._match_text_seq(value, "ON", on): 5590 return f"{value} ON {on}" 5591 5592 return None 5593 5594 @t.overload 5595 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5596 5597 @t.overload 5598 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5599 5600 def _parse_json_object(self, agg=False): 5601 star = self._parse_star() 5602 expressions = ( 5603 [star] 5604 if star 5605 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5606 ) 5607 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5608 5609 unique_keys = None 5610 if self._match_text_seq("WITH", "UNIQUE"): 5611 unique_keys = True 5612 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5613 unique_keys = False 5614 5615 self._match_text_seq("KEYS") 5616 5617 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5618 self._parse_type() 5619 ) 5620 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5621 5622 return self.expression( 5623 exp.JSONObjectAgg if agg else exp.JSONObject, 5624 expressions=expressions, 5625 null_handling=null_handling, 5626 unique_keys=unique_keys, 5627 return_type=return_type, 5628 encoding=encoding, 5629 ) 5630 5631 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5632 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5633 if not self._match_text_seq("NESTED"): 5634 this = self._parse_id_var() 5635 kind = self._parse_types(allow_identifiers=False) 5636 nested = None 5637 else: 5638 this = None 5639 kind = None 5640 nested = True 5641 5642 path = self._match_text_seq("PATH") and self._parse_string() 5643 nested_schema = nested and self._parse_json_schema() 5644 5645 return self.expression( 5646 exp.JSONColumnDef, 5647 this=this, 5648 kind=kind, 5649 path=path, 5650 nested_schema=nested_schema, 5651 ) 5652 5653 def _parse_json_schema(self) -> exp.JSONSchema: 5654 self._match_text_seq("COLUMNS") 5655 return self.expression( 5656 exp.JSONSchema, 5657 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5658 ) 5659 5660 def _parse_json_table(self) -> exp.JSONTable: 5661 this = self._parse_format_json(self._parse_bitwise()) 5662 path = self._match(TokenType.COMMA) and self._parse_string() 5663 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5664 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5665 schema = self._parse_json_schema() 5666 5667 return exp.JSONTable( 5668 this=this, 5669 schema=schema, 5670 path=path, 5671 error_handling=error_handling, 5672 empty_handling=empty_handling, 5673 ) 5674 5675 def _parse_match_against(self) -> exp.MatchAgainst: 5676 expressions = self._parse_csv(self._parse_column) 5677 5678 self._match_text_seq(")", "AGAINST", "(") 5679 5680 this = self._parse_string() 5681 5682 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5683 modifier = "IN NATURAL LANGUAGE MODE" 5684 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5685 modifier = f"{modifier} WITH QUERY EXPANSION" 5686 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5687 modifier = "IN BOOLEAN MODE" 5688 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5689 modifier = "WITH QUERY EXPANSION" 5690 else: 5691 modifier = None 5692 5693 return self.expression( 5694 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5695 ) 5696 5697 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5698 def _parse_open_json(self) -> exp.OpenJSON: 5699 this = self._parse_bitwise() 5700 path = self._match(TokenType.COMMA) and self._parse_string() 5701 5702 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5703 this = self._parse_field(any_token=True) 5704 kind = self._parse_types() 5705 path = self._parse_string() 5706 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5707 5708 return self.expression( 5709 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5710 ) 5711 5712 expressions = None 5713 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5714 self._match_l_paren() 5715 expressions = self._parse_csv(_parse_open_json_column_def) 5716 5717 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5718 5719 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5720 args = self._parse_csv(self._parse_bitwise) 5721 5722 if self._match(TokenType.IN): 5723 return self.expression( 5724 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5725 ) 5726 5727 if haystack_first: 5728 haystack = seq_get(args, 0) 5729 needle = seq_get(args, 1) 5730 else: 5731 needle = seq_get(args, 0) 5732 haystack = seq_get(args, 1) 5733 5734 return self.expression( 5735 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5736 ) 5737 5738 def _parse_predict(self) -> exp.Predict: 5739 self._match_text_seq("MODEL") 5740 this = self._parse_table() 5741 5742 self._match(TokenType.COMMA) 5743 self._match_text_seq("TABLE") 5744 5745 return self.expression( 5746 exp.Predict, 5747 this=this, 5748 expression=self._parse_table(), 5749 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5750 ) 5751 5752 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5753 args = self._parse_csv(self._parse_table) 5754 return exp.JoinHint(this=func_name.upper(), expressions=args) 5755 5756 def _parse_substring(self) -> exp.Substring: 5757 # Postgres supports the form: substring(string [from int] [for int]) 5758 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5759 5760 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5761 5762 if self._match(TokenType.FROM): 5763 args.append(self._parse_bitwise()) 5764 if self._match(TokenType.FOR): 5765 if len(args) == 1: 5766 args.append(exp.Literal.number(1)) 5767 args.append(self._parse_bitwise()) 5768 5769 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5770 5771 def _parse_trim(self) -> exp.Trim: 5772 # https://www.w3resource.com/sql/character-functions/trim.php 5773 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5774 5775 position = None 5776 collation = None 5777 expression = None 5778 5779 if self._match_texts(self.TRIM_TYPES): 5780 position = self._prev.text.upper() 5781 5782 this = self._parse_bitwise() 5783 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5784 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5785 expression = self._parse_bitwise() 5786 5787 if invert_order: 5788 this, expression = expression, this 5789 5790 if self._match(TokenType.COLLATE): 5791 collation = self._parse_bitwise() 5792 5793 return self.expression( 5794 exp.Trim, this=this, position=position, expression=expression, collation=collation 5795 ) 5796 5797 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5798 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5799 5800 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5801 return self._parse_window(self._parse_id_var(), alias=True) 5802 5803 def _parse_respect_or_ignore_nulls( 5804 self, this: t.Optional[exp.Expression] 5805 ) -> t.Optional[exp.Expression]: 5806 if self._match_text_seq("IGNORE", "NULLS"): 5807 return self.expression(exp.IgnoreNulls, this=this) 5808 if self._match_text_seq("RESPECT", "NULLS"): 5809 return self.expression(exp.RespectNulls, this=this) 5810 return this 5811 5812 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5813 if self._match(TokenType.HAVING): 5814 self._match_texts(("MAX", "MIN")) 5815 max = self._prev.text.upper() != "MIN" 5816 return self.expression( 5817 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5818 ) 5819 5820 return this 5821 5822 def _parse_window( 5823 self, this: t.Optional[exp.Expression], alias: bool = False 5824 ) -> t.Optional[exp.Expression]: 5825 func = this 5826 comments = func.comments if isinstance(func, exp.Expression) else None 5827 5828 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5829 self._match(TokenType.WHERE) 5830 this = self.expression( 5831 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5832 ) 5833 self._match_r_paren() 5834 5835 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5836 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5837 if self._match_text_seq("WITHIN", "GROUP"): 5838 order = self._parse_wrapped(self._parse_order) 5839 this = self.expression(exp.WithinGroup, this=this, expression=order) 5840 5841 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5842 # Some dialects choose to implement and some do not. 5843 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5844 5845 # There is some code above in _parse_lambda that handles 5846 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5847 5848 # The below changes handle 5849 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5850 5851 # Oracle allows both formats 5852 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5853 # and Snowflake chose to do the same for familiarity 5854 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5855 if isinstance(this, exp.AggFunc): 5856 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5857 5858 if ignore_respect and ignore_respect is not this: 5859 ignore_respect.replace(ignore_respect.this) 5860 this = self.expression(ignore_respect.__class__, this=this) 5861 5862 this = self._parse_respect_or_ignore_nulls(this) 5863 5864 # bigquery select from window x AS (partition by ...) 5865 if alias: 5866 over = None 5867 self._match(TokenType.ALIAS) 5868 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5869 return this 5870 else: 5871 over = self._prev.text.upper() 5872 5873 if comments and isinstance(func, exp.Expression): 5874 func.pop_comments() 5875 5876 if not self._match(TokenType.L_PAREN): 5877 return self.expression( 5878 exp.Window, 5879 comments=comments, 5880 this=this, 5881 alias=self._parse_id_var(False), 5882 over=over, 5883 ) 5884 5885 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5886 5887 first = self._match(TokenType.FIRST) 5888 if self._match_text_seq("LAST"): 5889 first = False 5890 5891 partition, order = self._parse_partition_and_order() 5892 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5893 5894 if kind: 5895 self._match(TokenType.BETWEEN) 5896 start = self._parse_window_spec() 5897 self._match(TokenType.AND) 5898 end = self._parse_window_spec() 5899 5900 spec = self.expression( 5901 exp.WindowSpec, 5902 kind=kind, 5903 start=start["value"], 5904 start_side=start["side"], 5905 end=end["value"], 5906 end_side=end["side"], 5907 ) 5908 else: 5909 spec = None 5910 5911 self._match_r_paren() 5912 5913 window = self.expression( 5914 exp.Window, 5915 comments=comments, 5916 this=this, 5917 partition_by=partition, 5918 order=order, 5919 spec=spec, 5920 alias=window_alias, 5921 over=over, 5922 first=first, 5923 ) 5924 5925 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5926 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5927 return self._parse_window(window, alias=alias) 5928 5929 return window 5930 5931 def _parse_partition_and_order( 5932 self, 5933 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5934 return self._parse_partition_by(), self._parse_order() 5935 5936 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5937 self._match(TokenType.BETWEEN) 5938 5939 return { 5940 "value": ( 5941 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5942 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5943 or self._parse_bitwise() 5944 ), 5945 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5946 } 5947 5948 def _parse_alias( 5949 self, this: t.Optional[exp.Expression], explicit: bool = False 5950 ) -> t.Optional[exp.Expression]: 5951 any_token = self._match(TokenType.ALIAS) 5952 comments = self._prev_comments or [] 5953 5954 if explicit and not any_token: 5955 return this 5956 5957 if self._match(TokenType.L_PAREN): 5958 aliases = self.expression( 5959 exp.Aliases, 5960 comments=comments, 5961 this=this, 5962 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5963 ) 5964 self._match_r_paren(aliases) 5965 return aliases 5966 5967 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5968 self.STRING_ALIASES and self._parse_string_as_identifier() 5969 ) 5970 5971 if alias: 5972 comments.extend(alias.pop_comments()) 5973 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5974 column = this.this 5975 5976 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5977 if not this.comments and column and column.comments: 5978 this.comments = column.pop_comments() 5979 5980 return this 5981 5982 def _parse_id_var( 5983 self, 5984 any_token: bool = True, 5985 tokens: t.Optional[t.Collection[TokenType]] = None, 5986 ) -> t.Optional[exp.Expression]: 5987 expression = self._parse_identifier() 5988 if not expression and ( 5989 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5990 ): 5991 quoted = self._prev.token_type == TokenType.STRING 5992 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5993 5994 return expression 5995 5996 def _parse_string(self) -> t.Optional[exp.Expression]: 5997 if self._match_set(self.STRING_PARSERS): 5998 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5999 return self._parse_placeholder() 6000 6001 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6002 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6003 6004 def _parse_number(self) -> t.Optional[exp.Expression]: 6005 if self._match_set(self.NUMERIC_PARSERS): 6006 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6007 return self._parse_placeholder() 6008 6009 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6010 if self._match(TokenType.IDENTIFIER): 6011 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6012 return self._parse_placeholder() 6013 6014 def _parse_var( 6015 self, 6016 any_token: bool = False, 6017 tokens: t.Optional[t.Collection[TokenType]] = None, 6018 upper: bool = False, 6019 ) -> t.Optional[exp.Expression]: 6020 if ( 6021 (any_token and self._advance_any()) 6022 or self._match(TokenType.VAR) 6023 or (self._match_set(tokens) if tokens else False) 6024 ): 6025 return self.expression( 6026 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6027 ) 6028 return self._parse_placeholder() 6029 6030 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6031 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6032 self._advance() 6033 return self._prev 6034 return None 6035 6036 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6037 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6038 6039 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6040 return self._parse_primary() or self._parse_var(any_token=True) 6041 6042 def _parse_null(self) -> t.Optional[exp.Expression]: 6043 if self._match_set(self.NULL_TOKENS): 6044 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6045 return self._parse_placeholder() 6046 6047 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6048 if self._match(TokenType.TRUE): 6049 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6050 if self._match(TokenType.FALSE): 6051 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6052 return self._parse_placeholder() 6053 6054 def _parse_star(self) -> t.Optional[exp.Expression]: 6055 if self._match(TokenType.STAR): 6056 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6057 return self._parse_placeholder() 6058 6059 def _parse_parameter(self) -> exp.Parameter: 6060 this = self._parse_identifier() or self._parse_primary_or_var() 6061 return self.expression(exp.Parameter, this=this) 6062 6063 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6064 if self._match_set(self.PLACEHOLDER_PARSERS): 6065 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6066 if placeholder: 6067 return placeholder 6068 self._advance(-1) 6069 return None 6070 6071 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6072 if not self._match_texts(keywords): 6073 return None 6074 if self._match(TokenType.L_PAREN, advance=False): 6075 return self._parse_wrapped_csv(self._parse_expression) 6076 6077 expression = self._parse_expression() 6078 return [expression] if expression else None 6079 6080 def _parse_csv( 6081 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6082 ) -> t.List[exp.Expression]: 6083 parse_result = parse_method() 6084 items = [parse_result] if parse_result is not None else [] 6085 6086 while self._match(sep): 6087 self._add_comments(parse_result) 6088 parse_result = parse_method() 6089 if parse_result is not None: 6090 items.append(parse_result) 6091 6092 return items 6093 6094 def _parse_tokens( 6095 self, parse_method: t.Callable, expressions: t.Dict 6096 ) -> t.Optional[exp.Expression]: 6097 this = parse_method() 6098 6099 while self._match_set(expressions): 6100 this = self.expression( 6101 expressions[self._prev.token_type], 6102 this=this, 6103 comments=self._prev_comments, 6104 expression=parse_method(), 6105 ) 6106 6107 return this 6108 6109 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6110 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6111 6112 def _parse_wrapped_csv( 6113 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6114 ) -> t.List[exp.Expression]: 6115 return self._parse_wrapped( 6116 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6117 ) 6118 6119 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6120 wrapped = self._match(TokenType.L_PAREN) 6121 if not wrapped and not optional: 6122 self.raise_error("Expecting (") 6123 parse_result = parse_method() 6124 if wrapped: 6125 self._match_r_paren() 6126 return parse_result 6127 6128 def _parse_expressions(self) -> t.List[exp.Expression]: 6129 return self._parse_csv(self._parse_expression) 6130 6131 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6132 return self._parse_select() or self._parse_set_operations( 6133 self._parse_expression() if alias else self._parse_assignment() 6134 ) 6135 6136 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6137 return self._parse_query_modifiers( 6138 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6139 ) 6140 6141 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6142 this = None 6143 if self._match_texts(self.TRANSACTION_KIND): 6144 this = self._prev.text 6145 6146 self._match_texts(("TRANSACTION", "WORK")) 6147 6148 modes = [] 6149 while True: 6150 mode = [] 6151 while self._match(TokenType.VAR): 6152 mode.append(self._prev.text) 6153 6154 if mode: 6155 modes.append(" ".join(mode)) 6156 if not self._match(TokenType.COMMA): 6157 break 6158 6159 return self.expression(exp.Transaction, this=this, modes=modes) 6160 6161 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6162 chain = None 6163 savepoint = None 6164 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6165 6166 self._match_texts(("TRANSACTION", "WORK")) 6167 6168 if self._match_text_seq("TO"): 6169 self._match_text_seq("SAVEPOINT") 6170 savepoint = self._parse_id_var() 6171 6172 if self._match(TokenType.AND): 6173 chain = not self._match_text_seq("NO") 6174 self._match_text_seq("CHAIN") 6175 6176 if is_rollback: 6177 return self.expression(exp.Rollback, savepoint=savepoint) 6178 6179 return self.expression(exp.Commit, chain=chain) 6180 6181 def _parse_refresh(self) -> exp.Refresh: 6182 self._match(TokenType.TABLE) 6183 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6184 6185 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6186 if not self._match_text_seq("ADD"): 6187 return None 6188 6189 self._match(TokenType.COLUMN) 6190 exists_column = self._parse_exists(not_=True) 6191 expression = self._parse_field_def() 6192 6193 if expression: 6194 expression.set("exists", exists_column) 6195 6196 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6197 if self._match_texts(("FIRST", "AFTER")): 6198 position = self._prev.text 6199 column_position = self.expression( 6200 exp.ColumnPosition, this=self._parse_column(), position=position 6201 ) 6202 expression.set("position", column_position) 6203 6204 return expression 6205 6206 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6207 drop = self._match(TokenType.DROP) and self._parse_drop() 6208 if drop and not isinstance(drop, exp.Command): 6209 drop.set("kind", drop.args.get("kind", "COLUMN")) 6210 return drop 6211 6212 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6213 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6214 return self.expression( 6215 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6216 ) 6217 6218 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6219 index = self._index - 1 6220 6221 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6222 return self._parse_csv( 6223 lambda: self.expression( 6224 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6225 ) 6226 ) 6227 6228 self._retreat(index) 6229 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6230 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6231 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6232 6233 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6234 if self._match_texts(self.ALTER_ALTER_PARSERS): 6235 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6236 6237 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6238 # keyword after ALTER we default to parsing this statement 6239 self._match(TokenType.COLUMN) 6240 column = self._parse_field(any_token=True) 6241 6242 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6243 return self.expression(exp.AlterColumn, this=column, drop=True) 6244 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6245 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6246 if self._match(TokenType.COMMENT): 6247 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6248 if self._match_text_seq("DROP", "NOT", "NULL"): 6249 return self.expression( 6250 exp.AlterColumn, 6251 this=column, 6252 drop=True, 6253 allow_null=True, 6254 ) 6255 if self._match_text_seq("SET", "NOT", "NULL"): 6256 return self.expression( 6257 exp.AlterColumn, 6258 this=column, 6259 allow_null=False, 6260 ) 6261 self._match_text_seq("SET", "DATA") 6262 self._match_text_seq("TYPE") 6263 return self.expression( 6264 exp.AlterColumn, 6265 this=column, 6266 dtype=self._parse_types(), 6267 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6268 using=self._match(TokenType.USING) and self._parse_assignment(), 6269 ) 6270 6271 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6272 if self._match_texts(("ALL", "EVEN", "AUTO")): 6273 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6274 6275 self._match_text_seq("KEY", "DISTKEY") 6276 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6277 6278 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6279 if compound: 6280 self._match_text_seq("SORTKEY") 6281 6282 if self._match(TokenType.L_PAREN, advance=False): 6283 return self.expression( 6284 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6285 ) 6286 6287 self._match_texts(("AUTO", "NONE")) 6288 return self.expression( 6289 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6290 ) 6291 6292 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6293 index = self._index - 1 6294 6295 partition_exists = self._parse_exists() 6296 if self._match(TokenType.PARTITION, advance=False): 6297 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6298 6299 self._retreat(index) 6300 return self._parse_csv(self._parse_drop_column) 6301 6302 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6303 if self._match(TokenType.COLUMN): 6304 exists = self._parse_exists() 6305 old_column = self._parse_column() 6306 to = self._match_text_seq("TO") 6307 new_column = self._parse_column() 6308 6309 if old_column is None or to is None or new_column is None: 6310 return None 6311 6312 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6313 6314 self._match_text_seq("TO") 6315 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6316 6317 def _parse_alter_table_set(self) -> exp.AlterSet: 6318 alter_set = self.expression(exp.AlterSet) 6319 6320 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6321 "TABLE", "PROPERTIES" 6322 ): 6323 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6324 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6325 alter_set.set("expressions", [self._parse_assignment()]) 6326 elif self._match_texts(("LOGGED", "UNLOGGED")): 6327 alter_set.set("option", exp.var(self._prev.text.upper())) 6328 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6329 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6330 elif self._match_text_seq("LOCATION"): 6331 alter_set.set("location", self._parse_field()) 6332 elif self._match_text_seq("ACCESS", "METHOD"): 6333 alter_set.set("access_method", self._parse_field()) 6334 elif self._match_text_seq("TABLESPACE"): 6335 alter_set.set("tablespace", self._parse_field()) 6336 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6337 alter_set.set("file_format", [self._parse_field()]) 6338 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6339 alter_set.set("file_format", self._parse_wrapped_options()) 6340 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6341 alter_set.set("copy_options", self._parse_wrapped_options()) 6342 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6343 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6344 else: 6345 if self._match_text_seq("SERDE"): 6346 alter_set.set("serde", self._parse_field()) 6347 6348 alter_set.set("expressions", [self._parse_properties()]) 6349 6350 return alter_set 6351 6352 def _parse_alter(self) -> exp.AlterTable | exp.Command: 6353 start = self._prev 6354 6355 if not self._match(TokenType.TABLE): 6356 return self._parse_as_command(start) 6357 6358 exists = self._parse_exists() 6359 only = self._match_text_seq("ONLY") 6360 this = self._parse_table(schema=True) 6361 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6362 6363 if self._next: 6364 self._advance() 6365 6366 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6367 if parser: 6368 actions = ensure_list(parser(self)) 6369 options = self._parse_csv(self._parse_property) 6370 6371 if not self._curr and actions: 6372 return self.expression( 6373 exp.AlterTable, 6374 this=this, 6375 exists=exists, 6376 actions=actions, 6377 only=only, 6378 options=options, 6379 cluster=cluster, 6380 ) 6381 6382 return self._parse_as_command(start) 6383 6384 def _parse_merge(self) -> exp.Merge: 6385 self._match(TokenType.INTO) 6386 target = self._parse_table() 6387 6388 if target and self._match(TokenType.ALIAS, advance=False): 6389 target.set("alias", self._parse_table_alias()) 6390 6391 self._match(TokenType.USING) 6392 using = self._parse_table() 6393 6394 self._match(TokenType.ON) 6395 on = self._parse_assignment() 6396 6397 return self.expression( 6398 exp.Merge, 6399 this=target, 6400 using=using, 6401 on=on, 6402 expressions=self._parse_when_matched(), 6403 ) 6404 6405 def _parse_when_matched(self) -> t.List[exp.When]: 6406 whens = [] 6407 6408 while self._match(TokenType.WHEN): 6409 matched = not self._match(TokenType.NOT) 6410 self._match_text_seq("MATCHED") 6411 source = ( 6412 False 6413 if self._match_text_seq("BY", "TARGET") 6414 else self._match_text_seq("BY", "SOURCE") 6415 ) 6416 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6417 6418 self._match(TokenType.THEN) 6419 6420 if self._match(TokenType.INSERT): 6421 _this = self._parse_star() 6422 if _this: 6423 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6424 else: 6425 then = self.expression( 6426 exp.Insert, 6427 this=self._parse_value(), 6428 expression=self._match_text_seq("VALUES") and self._parse_value(), 6429 ) 6430 elif self._match(TokenType.UPDATE): 6431 expressions = self._parse_star() 6432 if expressions: 6433 then = self.expression(exp.Update, expressions=expressions) 6434 else: 6435 then = self.expression( 6436 exp.Update, 6437 expressions=self._match(TokenType.SET) 6438 and self._parse_csv(self._parse_equality), 6439 ) 6440 elif self._match(TokenType.DELETE): 6441 then = self.expression(exp.Var, this=self._prev.text) 6442 else: 6443 then = None 6444 6445 whens.append( 6446 self.expression( 6447 exp.When, 6448 matched=matched, 6449 source=source, 6450 condition=condition, 6451 then=then, 6452 ) 6453 ) 6454 return whens 6455 6456 def _parse_show(self) -> t.Optional[exp.Expression]: 6457 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6458 if parser: 6459 return parser(self) 6460 return self._parse_as_command(self._prev) 6461 6462 def _parse_set_item_assignment( 6463 self, kind: t.Optional[str] = None 6464 ) -> t.Optional[exp.Expression]: 6465 index = self._index 6466 6467 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6468 return self._parse_set_transaction(global_=kind == "GLOBAL") 6469 6470 left = self._parse_primary() or self._parse_column() 6471 assignment_delimiter = self._match_texts(("=", "TO")) 6472 6473 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6474 self._retreat(index) 6475 return None 6476 6477 right = self._parse_statement() or self._parse_id_var() 6478 if isinstance(right, (exp.Column, exp.Identifier)): 6479 right = exp.var(right.name) 6480 6481 this = self.expression(exp.EQ, this=left, expression=right) 6482 return self.expression(exp.SetItem, this=this, kind=kind) 6483 6484 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6485 self._match_text_seq("TRANSACTION") 6486 characteristics = self._parse_csv( 6487 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6488 ) 6489 return self.expression( 6490 exp.SetItem, 6491 expressions=characteristics, 6492 kind="TRANSACTION", 6493 **{"global": global_}, # type: ignore 6494 ) 6495 6496 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6497 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6498 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6499 6500 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6501 index = self._index 6502 set_ = self.expression( 6503 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6504 ) 6505 6506 if self._curr: 6507 self._retreat(index) 6508 return self._parse_as_command(self._prev) 6509 6510 return set_ 6511 6512 def _parse_var_from_options( 6513 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6514 ) -> t.Optional[exp.Var]: 6515 start = self._curr 6516 if not start: 6517 return None 6518 6519 option = start.text.upper() 6520 continuations = options.get(option) 6521 6522 index = self._index 6523 self._advance() 6524 for keywords in continuations or []: 6525 if isinstance(keywords, str): 6526 keywords = (keywords,) 6527 6528 if self._match_text_seq(*keywords): 6529 option = f"{option} {' '.join(keywords)}" 6530 break 6531 else: 6532 if continuations or continuations is None: 6533 if raise_unmatched: 6534 self.raise_error(f"Unknown option {option}") 6535 6536 self._retreat(index) 6537 return None 6538 6539 return exp.var(option) 6540 6541 def _parse_as_command(self, start: Token) -> exp.Command: 6542 while self._curr: 6543 self._advance() 6544 text = self._find_sql(start, self._prev) 6545 size = len(start.text) 6546 self._warn_unsupported() 6547 return exp.Command(this=text[:size], expression=text[size:]) 6548 6549 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6550 settings = [] 6551 6552 self._match_l_paren() 6553 kind = self._parse_id_var() 6554 6555 if self._match(TokenType.L_PAREN): 6556 while True: 6557 key = self._parse_id_var() 6558 value = self._parse_primary() 6559 6560 if not key and value is None: 6561 break 6562 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6563 self._match(TokenType.R_PAREN) 6564 6565 self._match_r_paren() 6566 6567 return self.expression( 6568 exp.DictProperty, 6569 this=this, 6570 kind=kind.this if kind else None, 6571 settings=settings, 6572 ) 6573 6574 def _parse_dict_range(self, this: str) -> exp.DictRange: 6575 self._match_l_paren() 6576 has_min = self._match_text_seq("MIN") 6577 if has_min: 6578 min = self._parse_var() or self._parse_primary() 6579 self._match_text_seq("MAX") 6580 max = self._parse_var() or self._parse_primary() 6581 else: 6582 max = self._parse_var() or self._parse_primary() 6583 min = exp.Literal.number(0) 6584 self._match_r_paren() 6585 return self.expression(exp.DictRange, this=this, min=min, max=max) 6586 6587 def _parse_comprehension( 6588 self, this: t.Optional[exp.Expression] 6589 ) -> t.Optional[exp.Comprehension]: 6590 index = self._index 6591 expression = self._parse_column() 6592 if not self._match(TokenType.IN): 6593 self._retreat(index - 1) 6594 return None 6595 iterator = self._parse_column() 6596 condition = self._parse_assignment() if self._match_text_seq("IF") else None 6597 return self.expression( 6598 exp.Comprehension, 6599 this=this, 6600 expression=expression, 6601 iterator=iterator, 6602 condition=condition, 6603 ) 6604 6605 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6606 if self._match(TokenType.HEREDOC_STRING): 6607 return self.expression(exp.Heredoc, this=self._prev.text) 6608 6609 if not self._match_text_seq("$"): 6610 return None 6611 6612 tags = ["$"] 6613 tag_text = None 6614 6615 if self._is_connected(): 6616 self._advance() 6617 tags.append(self._prev.text.upper()) 6618 else: 6619 self.raise_error("No closing $ found") 6620 6621 if tags[-1] != "$": 6622 if self._is_connected() and self._match_text_seq("$"): 6623 tag_text = tags[-1] 6624 tags.append("$") 6625 else: 6626 self.raise_error("No closing $ found") 6627 6628 heredoc_start = self._curr 6629 6630 while self._curr: 6631 if self._match_text_seq(*tags, advance=False): 6632 this = self._find_sql(heredoc_start, self._prev) 6633 self._advance(len(tags)) 6634 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6635 6636 self._advance() 6637 6638 self.raise_error(f"No closing {''.join(tags)} found") 6639 return None 6640 6641 def _find_parser( 6642 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6643 ) -> t.Optional[t.Callable]: 6644 if not self._curr: 6645 return None 6646 6647 index = self._index 6648 this = [] 6649 while True: 6650 # The current token might be multiple words 6651 curr = self._curr.text.upper() 6652 key = curr.split(" ") 6653 this.append(curr) 6654 6655 self._advance() 6656 result, trie = in_trie(trie, key) 6657 if result == TrieResult.FAILED: 6658 break 6659 6660 if result == TrieResult.EXISTS: 6661 subparser = parsers[" ".join(this)] 6662 return subparser 6663 6664 self._retreat(index) 6665 return None 6666 6667 def _match(self, token_type, advance=True, expression=None): 6668 if not self._curr: 6669 return None 6670 6671 if self._curr.token_type == token_type: 6672 if advance: 6673 self._advance() 6674 self._add_comments(expression) 6675 return True 6676 6677 return None 6678 6679 def _match_set(self, types, advance=True): 6680 if not self._curr: 6681 return None 6682 6683 if self._curr.token_type in types: 6684 if advance: 6685 self._advance() 6686 return True 6687 6688 return None 6689 6690 def _match_pair(self, token_type_a, token_type_b, advance=True): 6691 if not self._curr or not self._next: 6692 return None 6693 6694 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6695 if advance: 6696 self._advance(2) 6697 return True 6698 6699 return None 6700 6701 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6702 if not self._match(TokenType.L_PAREN, expression=expression): 6703 self.raise_error("Expecting (") 6704 6705 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6706 if not self._match(TokenType.R_PAREN, expression=expression): 6707 self.raise_error("Expecting )") 6708 6709 def _match_texts(self, texts, advance=True): 6710 if self._curr and self._curr.text.upper() in texts: 6711 if advance: 6712 self._advance() 6713 return True 6714 return None 6715 6716 def _match_text_seq(self, *texts, advance=True): 6717 index = self._index 6718 for text in texts: 6719 if self._curr and self._curr.text.upper() == text: 6720 self._advance() 6721 else: 6722 self._retreat(index) 6723 return None 6724 6725 if not advance: 6726 self._retreat(index) 6727 6728 return True 6729 6730 def _replace_lambda( 6731 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6732 ) -> t.Optional[exp.Expression]: 6733 if not node: 6734 return node 6735 6736 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6737 6738 for column in node.find_all(exp.Column): 6739 typ = lambda_types.get(column.parts[0].name) 6740 if typ is not None: 6741 dot_or_id = column.to_dot() if column.table else column.this 6742 6743 if typ: 6744 dot_or_id = self.expression( 6745 exp.Cast, 6746 this=dot_or_id, 6747 to=typ, 6748 ) 6749 6750 parent = column.parent 6751 6752 while isinstance(parent, exp.Dot): 6753 if not isinstance(parent.parent, exp.Dot): 6754 parent.replace(dot_or_id) 6755 break 6756 parent = parent.parent 6757 else: 6758 if column is node: 6759 node = dot_or_id 6760 else: 6761 column.replace(dot_or_id) 6762 return node 6763 6764 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6765 start = self._prev 6766 6767 # Not to be confused with TRUNCATE(number, decimals) function call 6768 if self._match(TokenType.L_PAREN): 6769 self._retreat(self._index - 2) 6770 return self._parse_function() 6771 6772 # Clickhouse supports TRUNCATE DATABASE as well 6773 is_database = self._match(TokenType.DATABASE) 6774 6775 self._match(TokenType.TABLE) 6776 6777 exists = self._parse_exists(not_=False) 6778 6779 expressions = self._parse_csv( 6780 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6781 ) 6782 6783 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6784 6785 if self._match_text_seq("RESTART", "IDENTITY"): 6786 identity = "RESTART" 6787 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6788 identity = "CONTINUE" 6789 else: 6790 identity = None 6791 6792 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6793 option = self._prev.text 6794 else: 6795 option = None 6796 6797 partition = self._parse_partition() 6798 6799 # Fallback case 6800 if self._curr: 6801 return self._parse_as_command(start) 6802 6803 return self.expression( 6804 exp.TruncateTable, 6805 expressions=expressions, 6806 is_database=is_database, 6807 exists=exists, 6808 cluster=cluster, 6809 identity=identity, 6810 option=option, 6811 partition=partition, 6812 ) 6813 6814 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6815 this = self._parse_ordered(self._parse_opclass) 6816 6817 if not self._match(TokenType.WITH): 6818 return this 6819 6820 op = self._parse_var(any_token=True) 6821 6822 return self.expression(exp.WithOperator, this=this, op=op) 6823 6824 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6825 self._match(TokenType.EQ) 6826 self._match(TokenType.L_PAREN) 6827 6828 opts: t.List[t.Optional[exp.Expression]] = [] 6829 while self._curr and not self._match(TokenType.R_PAREN): 6830 if self._match_text_seq("FORMAT_NAME", "="): 6831 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 6832 # so we parse it separately to use _parse_field() 6833 prop = self.expression( 6834 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 6835 ) 6836 opts.append(prop) 6837 else: 6838 opts.append(self._parse_property()) 6839 6840 self._match(TokenType.COMMA) 6841 6842 return opts 6843 6844 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6845 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6846 6847 options = [] 6848 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6849 option = self._parse_var(any_token=True) 6850 prev = self._prev.text.upper() 6851 6852 # Different dialects might separate options and values by white space, "=" and "AS" 6853 self._match(TokenType.EQ) 6854 self._match(TokenType.ALIAS) 6855 6856 param = self.expression(exp.CopyParameter, this=option) 6857 6858 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 6859 TokenType.L_PAREN, advance=False 6860 ): 6861 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 6862 param.set("expressions", self._parse_wrapped_options()) 6863 elif prev == "FILE_FORMAT": 6864 # T-SQL's external file format case 6865 param.set("expression", self._parse_field()) 6866 else: 6867 param.set("expression", self._parse_unquoted_field()) 6868 6869 options.append(param) 6870 self._match(sep) 6871 6872 return options 6873 6874 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6875 expr = self.expression(exp.Credentials) 6876 6877 if self._match_text_seq("STORAGE_INTEGRATION", "="): 6878 expr.set("storage", self._parse_field()) 6879 if self._match_text_seq("CREDENTIALS"): 6880 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 6881 creds = ( 6882 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6883 ) 6884 expr.set("credentials", creds) 6885 if self._match_text_seq("ENCRYPTION"): 6886 expr.set("encryption", self._parse_wrapped_options()) 6887 if self._match_text_seq("IAM_ROLE"): 6888 expr.set("iam_role", self._parse_field()) 6889 if self._match_text_seq("REGION"): 6890 expr.set("region", self._parse_field()) 6891 6892 return expr 6893 6894 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6895 return self._parse_field() 6896 6897 def _parse_copy(self) -> exp.Copy | exp.Command: 6898 start = self._prev 6899 6900 self._match(TokenType.INTO) 6901 6902 this = ( 6903 self._parse_select(nested=True, parse_subquery_alias=False) 6904 if self._match(TokenType.L_PAREN, advance=False) 6905 else self._parse_table(schema=True) 6906 ) 6907 6908 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6909 6910 files = self._parse_csv(self._parse_file_location) 6911 credentials = self._parse_credentials() 6912 6913 self._match_text_seq("WITH") 6914 6915 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6916 6917 # Fallback case 6918 if self._curr: 6919 return self._parse_as_command(start) 6920 6921 return self.expression( 6922 exp.Copy, 6923 this=this, 6924 kind=kind, 6925 credentials=credentials, 6926 files=files, 6927 params=params, 6928 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1262 def __init__( 1263 self, 1264 error_level: t.Optional[ErrorLevel] = None, 1265 error_message_context: int = 100, 1266 max_errors: int = 3, 1267 dialect: DialectType = None, 1268 ): 1269 from sqlglot.dialects import Dialect 1270 1271 self.error_level = error_level or ErrorLevel.IMMEDIATE 1272 self.error_message_context = error_message_context 1273 self.max_errors = max_errors 1274 self.dialect = Dialect.get_or_raise(dialect) 1275 self.reset()
1287 def parse( 1288 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1289 ) -> t.List[t.Optional[exp.Expression]]: 1290 """ 1291 Parses a list of tokens and returns a list of syntax trees, one tree 1292 per parsed SQL statement. 1293 1294 Args: 1295 raw_tokens: The list of tokens. 1296 sql: The original SQL string, used to produce helpful debug messages. 1297 1298 Returns: 1299 The list of the produced syntax trees. 1300 """ 1301 return self._parse( 1302 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1303 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1305 def parse_into( 1306 self, 1307 expression_types: exp.IntoType, 1308 raw_tokens: t.List[Token], 1309 sql: t.Optional[str] = None, 1310 ) -> t.List[t.Optional[exp.Expression]]: 1311 """ 1312 Parses a list of tokens into a given Expression type. If a collection of Expression 1313 types is given instead, this method will try to parse the token list into each one 1314 of them, stopping at the first for which the parsing succeeds. 1315 1316 Args: 1317 expression_types: The expression type(s) to try and parse the token list into. 1318 raw_tokens: The list of tokens. 1319 sql: The original SQL string, used to produce helpful debug messages. 1320 1321 Returns: 1322 The target Expression. 1323 """ 1324 errors = [] 1325 for expression_type in ensure_list(expression_types): 1326 parser = self.EXPRESSION_PARSERS.get(expression_type) 1327 if not parser: 1328 raise TypeError(f"No parser registered for {expression_type}") 1329 1330 try: 1331 return self._parse(parser, raw_tokens, sql) 1332 except ParseError as e: 1333 e.errors[0]["into_expression"] = expression_type 1334 errors.append(e) 1335 1336 raise ParseError( 1337 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1338 errors=merge_errors(errors), 1339 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1379 def check_errors(self) -> None: 1380 """Logs or raises any found errors, depending on the chosen error level setting.""" 1381 if self.error_level == ErrorLevel.WARN: 1382 for error in self.errors: 1383 logger.error(str(error)) 1384 elif self.error_level == ErrorLevel.RAISE and self.errors: 1385 raise ParseError( 1386 concat_messages(self.errors, self.max_errors), 1387 errors=merge_errors(self.errors), 1388 )
Logs or raises any found errors, depending on the chosen error level setting.
1390 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1391 """ 1392 Appends an error in the list of recorded errors or raises it, depending on the chosen 1393 error level setting. 1394 """ 1395 token = token or self._curr or self._prev or Token.string("") 1396 start = token.start 1397 end = token.end + 1 1398 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1399 highlight = self.sql[start:end] 1400 end_context = self.sql[end : end + self.error_message_context] 1401 1402 error = ParseError.new( 1403 f"{message}. Line {token.line}, Col: {token.col}.\n" 1404 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1405 description=message, 1406 line=token.line, 1407 col=token.col, 1408 start_context=start_context, 1409 highlight=highlight, 1410 end_context=end_context, 1411 ) 1412 1413 if self.error_level == ErrorLevel.IMMEDIATE: 1414 raise error 1415 1416 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1418 def expression( 1419 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1420 ) -> E: 1421 """ 1422 Creates a new, validated Expression. 1423 1424 Args: 1425 exp_class: The expression class to instantiate. 1426 comments: An optional list of comments to attach to the expression. 1427 kwargs: The arguments to set for the expression along with their respective values. 1428 1429 Returns: 1430 The target expression. 1431 """ 1432 instance = exp_class(**kwargs) 1433 instance.add_comments(comments) if comments else self._add_comments(instance) 1434 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1441 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1442 """ 1443 Validates an Expression, making sure that all its mandatory arguments are set. 1444 1445 Args: 1446 expression: The expression to validate. 1447 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1448 1449 Returns: 1450 The validated expression. 1451 """ 1452 if self.error_level != ErrorLevel.IGNORE: 1453 for error_message in expression.error_messages(args): 1454 self.raise_error(error_message) 1455 1456 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.