sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 STRUCT_TYPE_TOKENS = { 106 TokenType.NESTED, 107 TokenType.STRUCT, 108 } 109 110 NESTED_TYPE_TOKENS = { 111 TokenType.ARRAY, 112 TokenType.LOWCARDINALITY, 113 TokenType.MAP, 114 TokenType.NULLABLE, 115 *STRUCT_TYPE_TOKENS, 116 } 117 118 ENUM_TYPE_TOKENS = { 119 TokenType.ENUM, 120 TokenType.ENUM8, 121 TokenType.ENUM16, 122 } 123 124 TYPE_TOKENS = { 125 TokenType.BIT, 126 TokenType.BOOLEAN, 127 TokenType.TINYINT, 128 TokenType.UTINYINT, 129 TokenType.SMALLINT, 130 TokenType.USMALLINT, 131 TokenType.INT, 132 TokenType.UINT, 133 TokenType.BIGINT, 134 TokenType.UBIGINT, 135 TokenType.INT128, 136 TokenType.UINT128, 137 TokenType.INT256, 138 TokenType.UINT256, 139 TokenType.MEDIUMINT, 140 TokenType.UMEDIUMINT, 141 TokenType.FIXEDSTRING, 142 TokenType.FLOAT, 143 TokenType.DOUBLE, 144 TokenType.CHAR, 145 TokenType.NCHAR, 146 TokenType.VARCHAR, 147 TokenType.NVARCHAR, 148 TokenType.TEXT, 149 TokenType.MEDIUMTEXT, 150 TokenType.LONGTEXT, 151 TokenType.MEDIUMBLOB, 152 TokenType.LONGBLOB, 153 TokenType.BINARY, 154 TokenType.VARBINARY, 155 TokenType.JSON, 156 TokenType.JSONB, 157 TokenType.INTERVAL, 158 TokenType.TINYBLOB, 159 TokenType.TINYTEXT, 160 TokenType.TIME, 161 TokenType.TIMETZ, 162 TokenType.TIMESTAMP, 163 TokenType.TIMESTAMPTZ, 164 TokenType.TIMESTAMPLTZ, 165 TokenType.DATETIME, 166 TokenType.DATETIME64, 167 TokenType.DATE, 168 TokenType.INT4RANGE, 169 TokenType.INT4MULTIRANGE, 170 TokenType.INT8RANGE, 171 TokenType.INT8MULTIRANGE, 172 TokenType.NUMRANGE, 173 TokenType.NUMMULTIRANGE, 174 TokenType.TSRANGE, 175 TokenType.TSMULTIRANGE, 176 TokenType.TSTZRANGE, 177 TokenType.TSTZMULTIRANGE, 178 TokenType.DATERANGE, 179 TokenType.DATEMULTIRANGE, 180 TokenType.DECIMAL, 181 TokenType.UDECIMAL, 182 TokenType.BIGDECIMAL, 183 TokenType.UUID, 184 TokenType.GEOGRAPHY, 185 TokenType.GEOMETRY, 186 TokenType.HLLSKETCH, 187 TokenType.HSTORE, 188 TokenType.PSEUDO_TYPE, 189 TokenType.SUPER, 190 TokenType.SERIAL, 191 TokenType.SMALLSERIAL, 192 TokenType.BIGSERIAL, 193 TokenType.XML, 194 TokenType.YEAR, 195 TokenType.UNIQUEIDENTIFIER, 196 TokenType.USERDEFINED, 197 TokenType.MONEY, 198 TokenType.SMALLMONEY, 199 TokenType.ROWVERSION, 200 TokenType.IMAGE, 201 TokenType.VARIANT, 202 TokenType.OBJECT, 203 TokenType.OBJECT_IDENTIFIER, 204 TokenType.INET, 205 TokenType.IPADDRESS, 206 TokenType.IPPREFIX, 207 TokenType.UNKNOWN, 208 TokenType.NULL, 209 *ENUM_TYPE_TOKENS, 210 *NESTED_TYPE_TOKENS, 211 } 212 213 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 214 TokenType.BIGINT: TokenType.UBIGINT, 215 TokenType.INT: TokenType.UINT, 216 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 217 TokenType.SMALLINT: TokenType.USMALLINT, 218 TokenType.TINYINT: TokenType.UTINYINT, 219 TokenType.DECIMAL: TokenType.UDECIMAL, 220 } 221 222 SUBQUERY_PREDICATES = { 223 TokenType.ANY: exp.Any, 224 TokenType.ALL: exp.All, 225 TokenType.EXISTS: exp.Exists, 226 TokenType.SOME: exp.Any, 227 } 228 229 RESERVED_KEYWORDS = { 230 *Tokenizer.SINGLE_TOKENS.values(), 231 TokenType.SELECT, 232 } 233 234 DB_CREATABLES = { 235 TokenType.DATABASE, 236 TokenType.SCHEMA, 237 TokenType.TABLE, 238 TokenType.VIEW, 239 TokenType.DICTIONARY, 240 } 241 242 CREATABLES = { 243 TokenType.COLUMN, 244 TokenType.FUNCTION, 245 TokenType.INDEX, 246 TokenType.PROCEDURE, 247 *DB_CREATABLES, 248 } 249 250 # Tokens that can represent identifiers 251 ID_VAR_TOKENS = { 252 TokenType.VAR, 253 TokenType.ANTI, 254 TokenType.APPLY, 255 TokenType.ASC, 256 TokenType.AUTO_INCREMENT, 257 TokenType.BEGIN, 258 TokenType.CACHE, 259 TokenType.CASE, 260 TokenType.COLLATE, 261 TokenType.COMMAND, 262 TokenType.COMMENT, 263 TokenType.COMMIT, 264 TokenType.CONSTRAINT, 265 TokenType.DEFAULT, 266 TokenType.DELETE, 267 TokenType.DESC, 268 TokenType.DESCRIBE, 269 TokenType.DICTIONARY, 270 TokenType.DIV, 271 TokenType.END, 272 TokenType.EXECUTE, 273 TokenType.ESCAPE, 274 TokenType.FALSE, 275 TokenType.FIRST, 276 TokenType.FILTER, 277 TokenType.FORMAT, 278 TokenType.FULL, 279 TokenType.IS, 280 TokenType.ISNULL, 281 TokenType.INTERVAL, 282 TokenType.KEEP, 283 TokenType.KILL, 284 TokenType.LEFT, 285 TokenType.LOAD, 286 TokenType.MERGE, 287 TokenType.NATURAL, 288 TokenType.NEXT, 289 TokenType.OFFSET, 290 TokenType.ORDINALITY, 291 TokenType.OVERLAPS, 292 TokenType.OVERWRITE, 293 TokenType.PARTITION, 294 TokenType.PERCENT, 295 TokenType.PIVOT, 296 TokenType.PRAGMA, 297 TokenType.RANGE, 298 TokenType.REFERENCES, 299 TokenType.RIGHT, 300 TokenType.ROW, 301 TokenType.ROWS, 302 TokenType.SEMI, 303 TokenType.SET, 304 TokenType.SETTINGS, 305 TokenType.SHOW, 306 TokenType.TEMPORARY, 307 TokenType.TOP, 308 TokenType.TRUE, 309 TokenType.UNIQUE, 310 TokenType.UNPIVOT, 311 TokenType.UPDATE, 312 TokenType.VOLATILE, 313 TokenType.WINDOW, 314 *CREATABLES, 315 *SUBQUERY_PREDICATES, 316 *TYPE_TOKENS, 317 *NO_PAREN_FUNCTIONS, 318 } 319 320 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 321 322 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 323 TokenType.ANTI, 324 TokenType.APPLY, 325 TokenType.ASOF, 326 TokenType.FULL, 327 TokenType.LEFT, 328 TokenType.LOCK, 329 TokenType.NATURAL, 330 TokenType.OFFSET, 331 TokenType.RIGHT, 332 TokenType.SEMI, 333 TokenType.WINDOW, 334 } 335 336 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 337 338 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 339 340 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 341 342 FUNC_TOKENS = { 343 TokenType.COLLATE, 344 TokenType.COMMAND, 345 TokenType.CURRENT_DATE, 346 TokenType.CURRENT_DATETIME, 347 TokenType.CURRENT_TIMESTAMP, 348 TokenType.CURRENT_TIME, 349 TokenType.CURRENT_USER, 350 TokenType.FILTER, 351 TokenType.FIRST, 352 TokenType.FORMAT, 353 TokenType.GLOB, 354 TokenType.IDENTIFIER, 355 TokenType.INDEX, 356 TokenType.ISNULL, 357 TokenType.ILIKE, 358 TokenType.INSERT, 359 TokenType.LIKE, 360 TokenType.MERGE, 361 TokenType.OFFSET, 362 TokenType.PRIMARY_KEY, 363 TokenType.RANGE, 364 TokenType.REPLACE, 365 TokenType.RLIKE, 366 TokenType.ROW, 367 TokenType.UNNEST, 368 TokenType.VAR, 369 TokenType.LEFT, 370 TokenType.RIGHT, 371 TokenType.DATE, 372 TokenType.DATETIME, 373 TokenType.TABLE, 374 TokenType.TIMESTAMP, 375 TokenType.TIMESTAMPTZ, 376 TokenType.WINDOW, 377 TokenType.XOR, 378 *TYPE_TOKENS, 379 *SUBQUERY_PREDICATES, 380 } 381 382 CONJUNCTION = { 383 TokenType.AND: exp.And, 384 TokenType.OR: exp.Or, 385 } 386 387 EQUALITY = { 388 TokenType.EQ: exp.EQ, 389 TokenType.NEQ: exp.NEQ, 390 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 391 } 392 393 COMPARISON = { 394 TokenType.GT: exp.GT, 395 TokenType.GTE: exp.GTE, 396 TokenType.LT: exp.LT, 397 TokenType.LTE: exp.LTE, 398 } 399 400 BITWISE = { 401 TokenType.AMP: exp.BitwiseAnd, 402 TokenType.CARET: exp.BitwiseXor, 403 TokenType.PIPE: exp.BitwiseOr, 404 TokenType.DPIPE: exp.DPipe, 405 } 406 407 TERM = { 408 TokenType.DASH: exp.Sub, 409 TokenType.PLUS: exp.Add, 410 TokenType.MOD: exp.Mod, 411 TokenType.COLLATE: exp.Collate, 412 } 413 414 FACTOR = { 415 TokenType.DIV: exp.IntDiv, 416 TokenType.LR_ARROW: exp.Distance, 417 TokenType.SLASH: exp.Div, 418 TokenType.STAR: exp.Mul, 419 } 420 421 TIMES = { 422 TokenType.TIME, 423 TokenType.TIMETZ, 424 } 425 426 TIMESTAMPS = { 427 TokenType.TIMESTAMP, 428 TokenType.TIMESTAMPTZ, 429 TokenType.TIMESTAMPLTZ, 430 *TIMES, 431 } 432 433 SET_OPERATIONS = { 434 TokenType.UNION, 435 TokenType.INTERSECT, 436 TokenType.EXCEPT, 437 } 438 439 JOIN_METHODS = { 440 TokenType.NATURAL, 441 TokenType.ASOF, 442 } 443 444 JOIN_SIDES = { 445 TokenType.LEFT, 446 TokenType.RIGHT, 447 TokenType.FULL, 448 } 449 450 JOIN_KINDS = { 451 TokenType.INNER, 452 TokenType.OUTER, 453 TokenType.CROSS, 454 TokenType.SEMI, 455 TokenType.ANTI, 456 } 457 458 JOIN_HINTS: t.Set[str] = set() 459 460 LAMBDAS = { 461 TokenType.ARROW: lambda self, expressions: self.expression( 462 exp.Lambda, 463 this=self._replace_lambda( 464 self._parse_conjunction(), 465 {node.name for node in expressions}, 466 ), 467 expressions=expressions, 468 ), 469 TokenType.FARROW: lambda self, expressions: self.expression( 470 exp.Kwarg, 471 this=exp.var(expressions[0].name), 472 expression=self._parse_conjunction(), 473 ), 474 } 475 476 COLUMN_OPERATORS = { 477 TokenType.DOT: None, 478 TokenType.DCOLON: lambda self, this, to: self.expression( 479 exp.Cast if self.STRICT_CAST else exp.TryCast, 480 this=this, 481 to=to, 482 ), 483 TokenType.ARROW: lambda self, this, path: self.expression( 484 exp.JSONExtract, 485 this=this, 486 expression=path, 487 ), 488 TokenType.DARROW: lambda self, this, path: self.expression( 489 exp.JSONExtractScalar, 490 this=this, 491 expression=path, 492 ), 493 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 494 exp.JSONBExtract, 495 this=this, 496 expression=path, 497 ), 498 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 499 exp.JSONBExtractScalar, 500 this=this, 501 expression=path, 502 ), 503 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 504 exp.JSONBContains, 505 this=this, 506 expression=key, 507 ), 508 } 509 510 EXPRESSION_PARSERS = { 511 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 512 exp.Column: lambda self: self._parse_column(), 513 exp.Condition: lambda self: self._parse_conjunction(), 514 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 515 exp.Expression: lambda self: self._parse_statement(), 516 exp.From: lambda self: self._parse_from(), 517 exp.Group: lambda self: self._parse_group(), 518 exp.Having: lambda self: self._parse_having(), 519 exp.Identifier: lambda self: self._parse_id_var(), 520 exp.Join: lambda self: self._parse_join(), 521 exp.Lambda: lambda self: self._parse_lambda(), 522 exp.Lateral: lambda self: self._parse_lateral(), 523 exp.Limit: lambda self: self._parse_limit(), 524 exp.Offset: lambda self: self._parse_offset(), 525 exp.Order: lambda self: self._parse_order(), 526 exp.Ordered: lambda self: self._parse_ordered(), 527 exp.Properties: lambda self: self._parse_properties(), 528 exp.Qualify: lambda self: self._parse_qualify(), 529 exp.Returning: lambda self: self._parse_returning(), 530 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 531 exp.Table: lambda self: self._parse_table_parts(), 532 exp.TableAlias: lambda self: self._parse_table_alias(), 533 exp.Where: lambda self: self._parse_where(), 534 exp.Window: lambda self: self._parse_named_window(), 535 exp.With: lambda self: self._parse_with(), 536 "JOIN_TYPE": lambda self: self._parse_join_parts(), 537 } 538 539 STATEMENT_PARSERS = { 540 TokenType.ALTER: lambda self: self._parse_alter(), 541 TokenType.BEGIN: lambda self: self._parse_transaction(), 542 TokenType.CACHE: lambda self: self._parse_cache(), 543 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 544 TokenType.COMMENT: lambda self: self._parse_comment(), 545 TokenType.CREATE: lambda self: self._parse_create(), 546 TokenType.DELETE: lambda self: self._parse_delete(), 547 TokenType.DESC: lambda self: self._parse_describe(), 548 TokenType.DESCRIBE: lambda self: self._parse_describe(), 549 TokenType.DROP: lambda self: self._parse_drop(), 550 TokenType.INSERT: lambda self: self._parse_insert(), 551 TokenType.KILL: lambda self: self._parse_kill(), 552 TokenType.LOAD: lambda self: self._parse_load(), 553 TokenType.MERGE: lambda self: self._parse_merge(), 554 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 555 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 556 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 557 TokenType.SET: lambda self: self._parse_set(), 558 TokenType.UNCACHE: lambda self: self._parse_uncache(), 559 TokenType.UPDATE: lambda self: self._parse_update(), 560 TokenType.USE: lambda self: self.expression( 561 exp.Use, 562 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 563 and exp.var(self._prev.text), 564 this=self._parse_table(schema=False), 565 ), 566 } 567 568 UNARY_PARSERS = { 569 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 570 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 571 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 572 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 573 } 574 575 PRIMARY_PARSERS = { 576 TokenType.STRING: lambda self, token: self.expression( 577 exp.Literal, this=token.text, is_string=True 578 ), 579 TokenType.NUMBER: lambda self, token: self.expression( 580 exp.Literal, this=token.text, is_string=False 581 ), 582 TokenType.STAR: lambda self, _: self.expression( 583 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 584 ), 585 TokenType.NULL: lambda self, _: self.expression(exp.Null), 586 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 587 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 588 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 589 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 590 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 591 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 592 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 593 exp.National, this=token.text 594 ), 595 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 596 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 597 exp.RawString, this=token.text 598 ), 599 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 600 } 601 602 PLACEHOLDER_PARSERS = { 603 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 604 TokenType.PARAMETER: lambda self: self._parse_parameter(), 605 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 606 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 607 else None, 608 } 609 610 RANGE_PARSERS = { 611 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 612 TokenType.GLOB: binary_range_parser(exp.Glob), 613 TokenType.ILIKE: binary_range_parser(exp.ILike), 614 TokenType.IN: lambda self, this: self._parse_in(this), 615 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 616 TokenType.IS: lambda self, this: self._parse_is(this), 617 TokenType.LIKE: binary_range_parser(exp.Like), 618 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 619 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 620 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 621 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 622 } 623 624 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 625 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 626 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 627 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 628 "CHARACTER SET": lambda self: self._parse_character_set(), 629 "CHECKSUM": lambda self: self._parse_checksum(), 630 "CLUSTER BY": lambda self: self._parse_cluster(), 631 "CLUSTERED": lambda self: self._parse_clustered_by(), 632 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 633 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 634 "COPY": lambda self: self._parse_copy_property(), 635 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 636 "DEFINER": lambda self: self._parse_definer(), 637 "DETERMINISTIC": lambda self: self.expression( 638 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 639 ), 640 "DISTKEY": lambda self: self._parse_distkey(), 641 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 642 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 643 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 644 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 645 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 646 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 647 "FREESPACE": lambda self: self._parse_freespace(), 648 "HEAP": lambda self: self.expression(exp.HeapProperty), 649 "IMMUTABLE": lambda self: self.expression( 650 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 651 ), 652 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 653 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 654 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 655 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 656 "LIKE": lambda self: self._parse_create_like(), 657 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 658 "LOCK": lambda self: self._parse_locking(), 659 "LOCKING": lambda self: self._parse_locking(), 660 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 661 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 662 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 663 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 664 "NO": lambda self: self._parse_no_property(), 665 "ON": lambda self: self._parse_on_property(), 666 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 667 "PARTITION BY": lambda self: self._parse_partitioned_by(), 668 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 669 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 670 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 671 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 672 "RETURNS": lambda self: self._parse_returns(), 673 "ROW": lambda self: self._parse_row(), 674 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 675 "SAMPLE": lambda self: self.expression( 676 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 677 ), 678 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 679 "SETTINGS": lambda self: self.expression( 680 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 681 ), 682 "SORTKEY": lambda self: self._parse_sortkey(), 683 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 684 "STABLE": lambda self: self.expression( 685 exp.StabilityProperty, this=exp.Literal.string("STABLE") 686 ), 687 "STORED": lambda self: self._parse_stored(), 688 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 689 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 690 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 691 "TO": lambda self: self._parse_to_table(), 692 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 693 "TTL": lambda self: self._parse_ttl(), 694 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 695 "VOLATILE": lambda self: self._parse_volatile_property(), 696 "WITH": lambda self: self._parse_with_property(), 697 } 698 699 CONSTRAINT_PARSERS = { 700 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 701 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 702 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 703 "CHARACTER SET": lambda self: self.expression( 704 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 705 ), 706 "CHECK": lambda self: self.expression( 707 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 708 ), 709 "COLLATE": lambda self: self.expression( 710 exp.CollateColumnConstraint, this=self._parse_var() 711 ), 712 "COMMENT": lambda self: self.expression( 713 exp.CommentColumnConstraint, this=self._parse_string() 714 ), 715 "COMPRESS": lambda self: self._parse_compress(), 716 "CLUSTERED": lambda self: self.expression( 717 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 718 ), 719 "NONCLUSTERED": lambda self: self.expression( 720 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 721 ), 722 "DEFAULT": lambda self: self.expression( 723 exp.DefaultColumnConstraint, this=self._parse_bitwise() 724 ), 725 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 726 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 727 "FORMAT": lambda self: self.expression( 728 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 729 ), 730 "GENERATED": lambda self: self._parse_generated_as_identity(), 731 "IDENTITY": lambda self: self._parse_auto_increment(), 732 "INLINE": lambda self: self._parse_inline(), 733 "LIKE": lambda self: self._parse_create_like(), 734 "NOT": lambda self: self._parse_not_constraint(), 735 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 736 "ON": lambda self: ( 737 self._match(TokenType.UPDATE) 738 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 739 ) 740 or self.expression(exp.OnProperty, this=self._parse_id_var()), 741 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 742 "PRIMARY KEY": lambda self: self._parse_primary_key(), 743 "REFERENCES": lambda self: self._parse_references(match=False), 744 "TITLE": lambda self: self.expression( 745 exp.TitleColumnConstraint, this=self._parse_var_or_string() 746 ), 747 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 748 "UNIQUE": lambda self: self._parse_unique(), 749 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 750 "WITH": lambda self: self.expression( 751 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 752 ), 753 } 754 755 ALTER_PARSERS = { 756 "ADD": lambda self: self._parse_alter_table_add(), 757 "ALTER": lambda self: self._parse_alter_table_alter(), 758 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 759 "DROP": lambda self: self._parse_alter_table_drop(), 760 "RENAME": lambda self: self._parse_alter_table_rename(), 761 } 762 763 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 764 765 NO_PAREN_FUNCTION_PARSERS = { 766 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 767 "CASE": lambda self: self._parse_case(), 768 "IF": lambda self: self._parse_if(), 769 "NEXT": lambda self: self._parse_next_value_for(), 770 } 771 772 INVALID_FUNC_NAME_TOKENS = { 773 TokenType.IDENTIFIER, 774 TokenType.STRING, 775 } 776 777 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 778 779 FUNCTION_PARSERS = { 780 "ANY_VALUE": lambda self: self._parse_any_value(), 781 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 782 "CONCAT": lambda self: self._parse_concat(), 783 "CONCAT_WS": lambda self: self._parse_concat_ws(), 784 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 785 "DECODE": lambda self: self._parse_decode(), 786 "EXTRACT": lambda self: self._parse_extract(), 787 "JSON_OBJECT": lambda self: self._parse_json_object(), 788 "LOG": lambda self: self._parse_logarithm(), 789 "MATCH": lambda self: self._parse_match_against(), 790 "OPENJSON": lambda self: self._parse_open_json(), 791 "POSITION": lambda self: self._parse_position(), 792 "PREDICT": lambda self: self._parse_predict(), 793 "SAFE_CAST": lambda self: self._parse_cast(False), 794 "STRING_AGG": lambda self: self._parse_string_agg(), 795 "SUBSTRING": lambda self: self._parse_substring(), 796 "TRIM": lambda self: self._parse_trim(), 797 "TRY_CAST": lambda self: self._parse_cast(False), 798 "TRY_CONVERT": lambda self: self._parse_convert(False), 799 } 800 801 QUERY_MODIFIER_PARSERS = { 802 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 803 TokenType.WHERE: lambda self: ("where", self._parse_where()), 804 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 805 TokenType.HAVING: lambda self: ("having", self._parse_having()), 806 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 807 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 808 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 809 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 810 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 811 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 812 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 813 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 814 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 815 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 816 TokenType.CLUSTER_BY: lambda self: ( 817 "cluster", 818 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 819 ), 820 TokenType.DISTRIBUTE_BY: lambda self: ( 821 "distribute", 822 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 823 ), 824 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 825 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 826 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 827 } 828 829 SET_PARSERS = { 830 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 831 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 832 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 833 "TRANSACTION": lambda self: self._parse_set_transaction(), 834 } 835 836 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 837 838 TYPE_LITERAL_PARSERS = { 839 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 840 } 841 842 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 843 844 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 845 846 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 847 848 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 849 TRANSACTION_CHARACTERISTICS = { 850 "ISOLATION LEVEL REPEATABLE READ", 851 "ISOLATION LEVEL READ COMMITTED", 852 "ISOLATION LEVEL READ UNCOMMITTED", 853 "ISOLATION LEVEL SERIALIZABLE", 854 "READ WRITE", 855 "READ ONLY", 856 } 857 858 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 859 860 CLONE_KEYWORDS = {"CLONE", "COPY"} 861 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 862 863 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 864 865 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 866 867 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 868 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 869 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 870 871 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 872 873 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 874 875 DISTINCT_TOKENS = {TokenType.DISTINCT} 876 877 NULL_TOKENS = {TokenType.NULL} 878 879 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 880 881 STRICT_CAST = True 882 883 # A NULL arg in CONCAT yields NULL by default 884 CONCAT_NULL_OUTPUTS_STRING = False 885 886 PREFIXED_PIVOT_COLUMNS = False 887 IDENTIFY_PIVOT_STRINGS = False 888 889 LOG_BASE_FIRST = True 890 LOG_DEFAULTS_TO_LN = False 891 892 # Whether or not ADD is present for each column added by ALTER TABLE 893 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 894 895 # Whether or not the table sample clause expects CSV syntax 896 TABLESAMPLE_CSV = False 897 898 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 899 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 900 901 # Whether the TRIM function expects the characters to trim as its first argument 902 TRIM_PATTERN_FIRST = False 903 904 __slots__ = ( 905 "error_level", 906 "error_message_context", 907 "max_errors", 908 "sql", 909 "errors", 910 "_tokens", 911 "_index", 912 "_curr", 913 "_next", 914 "_prev", 915 "_prev_comments", 916 "_tokenizer", 917 ) 918 919 # Autofilled 920 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 921 INDEX_OFFSET: int = 0 922 UNNEST_COLUMN_ONLY: bool = False 923 ALIAS_POST_TABLESAMPLE: bool = False 924 STRICT_STRING_CONCAT = False 925 SUPPORTS_USER_DEFINED_TYPES = True 926 NORMALIZE_FUNCTIONS = "upper" 927 NULL_ORDERING: str = "nulls_are_small" 928 SHOW_TRIE: t.Dict = {} 929 SET_TRIE: t.Dict = {} 930 FORMAT_MAPPING: t.Dict[str, str] = {} 931 FORMAT_TRIE: t.Dict = {} 932 TIME_MAPPING: t.Dict[str, str] = {} 933 TIME_TRIE: t.Dict = {} 934 935 def __init__( 936 self, 937 error_level: t.Optional[ErrorLevel] = None, 938 error_message_context: int = 100, 939 max_errors: int = 3, 940 ): 941 self.error_level = error_level or ErrorLevel.IMMEDIATE 942 self.error_message_context = error_message_context 943 self.max_errors = max_errors 944 self._tokenizer = self.TOKENIZER_CLASS() 945 self.reset() 946 947 def reset(self): 948 self.sql = "" 949 self.errors = [] 950 self._tokens = [] 951 self._index = 0 952 self._curr = None 953 self._next = None 954 self._prev = None 955 self._prev_comments = None 956 957 def parse( 958 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 959 ) -> t.List[t.Optional[exp.Expression]]: 960 """ 961 Parses a list of tokens and returns a list of syntax trees, one tree 962 per parsed SQL statement. 963 964 Args: 965 raw_tokens: The list of tokens. 966 sql: The original SQL string, used to produce helpful debug messages. 967 968 Returns: 969 The list of the produced syntax trees. 970 """ 971 return self._parse( 972 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 973 ) 974 975 def parse_into( 976 self, 977 expression_types: exp.IntoType, 978 raw_tokens: t.List[Token], 979 sql: t.Optional[str] = None, 980 ) -> t.List[t.Optional[exp.Expression]]: 981 """ 982 Parses a list of tokens into a given Expression type. If a collection of Expression 983 types is given instead, this method will try to parse the token list into each one 984 of them, stopping at the first for which the parsing succeeds. 985 986 Args: 987 expression_types: The expression type(s) to try and parse the token list into. 988 raw_tokens: The list of tokens. 989 sql: The original SQL string, used to produce helpful debug messages. 990 991 Returns: 992 The target Expression. 993 """ 994 errors = [] 995 for expression_type in ensure_list(expression_types): 996 parser = self.EXPRESSION_PARSERS.get(expression_type) 997 if not parser: 998 raise TypeError(f"No parser registered for {expression_type}") 999 1000 try: 1001 return self._parse(parser, raw_tokens, sql) 1002 except ParseError as e: 1003 e.errors[0]["into_expression"] = expression_type 1004 errors.append(e) 1005 1006 raise ParseError( 1007 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1008 errors=merge_errors(errors), 1009 ) from errors[-1] 1010 1011 def _parse( 1012 self, 1013 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1014 raw_tokens: t.List[Token], 1015 sql: t.Optional[str] = None, 1016 ) -> t.List[t.Optional[exp.Expression]]: 1017 self.reset() 1018 self.sql = sql or "" 1019 1020 total = len(raw_tokens) 1021 chunks: t.List[t.List[Token]] = [[]] 1022 1023 for i, token in enumerate(raw_tokens): 1024 if token.token_type == TokenType.SEMICOLON: 1025 if i < total - 1: 1026 chunks.append([]) 1027 else: 1028 chunks[-1].append(token) 1029 1030 expressions = [] 1031 1032 for tokens in chunks: 1033 self._index = -1 1034 self._tokens = tokens 1035 self._advance() 1036 1037 expressions.append(parse_method(self)) 1038 1039 if self._index < len(self._tokens): 1040 self.raise_error("Invalid expression / Unexpected token") 1041 1042 self.check_errors() 1043 1044 return expressions 1045 1046 def check_errors(self) -> None: 1047 """Logs or raises any found errors, depending on the chosen error level setting.""" 1048 if self.error_level == ErrorLevel.WARN: 1049 for error in self.errors: 1050 logger.error(str(error)) 1051 elif self.error_level == ErrorLevel.RAISE and self.errors: 1052 raise ParseError( 1053 concat_messages(self.errors, self.max_errors), 1054 errors=merge_errors(self.errors), 1055 ) 1056 1057 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1058 """ 1059 Appends an error in the list of recorded errors or raises it, depending on the chosen 1060 error level setting. 1061 """ 1062 token = token or self._curr or self._prev or Token.string("") 1063 start = token.start 1064 end = token.end + 1 1065 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1066 highlight = self.sql[start:end] 1067 end_context = self.sql[end : end + self.error_message_context] 1068 1069 error = ParseError.new( 1070 f"{message}. Line {token.line}, Col: {token.col}.\n" 1071 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1072 description=message, 1073 line=token.line, 1074 col=token.col, 1075 start_context=start_context, 1076 highlight=highlight, 1077 end_context=end_context, 1078 ) 1079 1080 if self.error_level == ErrorLevel.IMMEDIATE: 1081 raise error 1082 1083 self.errors.append(error) 1084 1085 def expression( 1086 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1087 ) -> E: 1088 """ 1089 Creates a new, validated Expression. 1090 1091 Args: 1092 exp_class: The expression class to instantiate. 1093 comments: An optional list of comments to attach to the expression. 1094 kwargs: The arguments to set for the expression along with their respective values. 1095 1096 Returns: 1097 The target expression. 1098 """ 1099 instance = exp_class(**kwargs) 1100 instance.add_comments(comments) if comments else self._add_comments(instance) 1101 return self.validate_expression(instance) 1102 1103 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1104 if expression and self._prev_comments: 1105 expression.add_comments(self._prev_comments) 1106 self._prev_comments = None 1107 1108 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1109 """ 1110 Validates an Expression, making sure that all its mandatory arguments are set. 1111 1112 Args: 1113 expression: The expression to validate. 1114 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1115 1116 Returns: 1117 The validated expression. 1118 """ 1119 if self.error_level != ErrorLevel.IGNORE: 1120 for error_message in expression.error_messages(args): 1121 self.raise_error(error_message) 1122 1123 return expression 1124 1125 def _find_sql(self, start: Token, end: Token) -> str: 1126 return self.sql[start.start : end.end + 1] 1127 1128 def _advance(self, times: int = 1) -> None: 1129 self._index += times 1130 self._curr = seq_get(self._tokens, self._index) 1131 self._next = seq_get(self._tokens, self._index + 1) 1132 1133 if self._index > 0: 1134 self._prev = self._tokens[self._index - 1] 1135 self._prev_comments = self._prev.comments 1136 else: 1137 self._prev = None 1138 self._prev_comments = None 1139 1140 def _retreat(self, index: int) -> None: 1141 if index != self._index: 1142 self._advance(index - self._index) 1143 1144 def _parse_command(self) -> exp.Command: 1145 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1146 1147 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1148 start = self._prev 1149 exists = self._parse_exists() if allow_exists else None 1150 1151 self._match(TokenType.ON) 1152 1153 kind = self._match_set(self.CREATABLES) and self._prev 1154 if not kind: 1155 return self._parse_as_command(start) 1156 1157 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1158 this = self._parse_user_defined_function(kind=kind.token_type) 1159 elif kind.token_type == TokenType.TABLE: 1160 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1161 elif kind.token_type == TokenType.COLUMN: 1162 this = self._parse_column() 1163 else: 1164 this = self._parse_id_var() 1165 1166 self._match(TokenType.IS) 1167 1168 return self.expression( 1169 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1170 ) 1171 1172 def _parse_to_table( 1173 self, 1174 ) -> exp.ToTableProperty: 1175 table = self._parse_table_parts(schema=True) 1176 return self.expression(exp.ToTableProperty, this=table) 1177 1178 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1179 def _parse_ttl(self) -> exp.Expression: 1180 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1181 this = self._parse_bitwise() 1182 1183 if self._match_text_seq("DELETE"): 1184 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1185 if self._match_text_seq("RECOMPRESS"): 1186 return self.expression( 1187 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1188 ) 1189 if self._match_text_seq("TO", "DISK"): 1190 return self.expression( 1191 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1192 ) 1193 if self._match_text_seq("TO", "VOLUME"): 1194 return self.expression( 1195 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1196 ) 1197 1198 return this 1199 1200 expressions = self._parse_csv(_parse_ttl_action) 1201 where = self._parse_where() 1202 group = self._parse_group() 1203 1204 aggregates = None 1205 if group and self._match(TokenType.SET): 1206 aggregates = self._parse_csv(self._parse_set_item) 1207 1208 return self.expression( 1209 exp.MergeTreeTTL, 1210 expressions=expressions, 1211 where=where, 1212 group=group, 1213 aggregates=aggregates, 1214 ) 1215 1216 def _parse_statement(self) -> t.Optional[exp.Expression]: 1217 if self._curr is None: 1218 return None 1219 1220 if self._match_set(self.STATEMENT_PARSERS): 1221 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1222 1223 if self._match_set(Tokenizer.COMMANDS): 1224 return self._parse_command() 1225 1226 expression = self._parse_expression() 1227 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1228 return self._parse_query_modifiers(expression) 1229 1230 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1231 start = self._prev 1232 temporary = self._match(TokenType.TEMPORARY) 1233 materialized = self._match_text_seq("MATERIALIZED") 1234 1235 kind = self._match_set(self.CREATABLES) and self._prev.text 1236 if not kind: 1237 return self._parse_as_command(start) 1238 1239 return self.expression( 1240 exp.Drop, 1241 comments=start.comments, 1242 exists=exists or self._parse_exists(), 1243 this=self._parse_table(schema=True), 1244 kind=kind, 1245 temporary=temporary, 1246 materialized=materialized, 1247 cascade=self._match_text_seq("CASCADE"), 1248 constraints=self._match_text_seq("CONSTRAINTS"), 1249 purge=self._match_text_seq("PURGE"), 1250 ) 1251 1252 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1253 return ( 1254 self._match_text_seq("IF") 1255 and (not not_ or self._match(TokenType.NOT)) 1256 and self._match(TokenType.EXISTS) 1257 ) 1258 1259 def _parse_create(self) -> exp.Create | exp.Command: 1260 # Note: this can't be None because we've matched a statement parser 1261 start = self._prev 1262 comments = self._prev_comments 1263 1264 replace = start.text.upper() == "REPLACE" or self._match_pair( 1265 TokenType.OR, TokenType.REPLACE 1266 ) 1267 unique = self._match(TokenType.UNIQUE) 1268 1269 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1270 self._advance() 1271 1272 properties = None 1273 create_token = self._match_set(self.CREATABLES) and self._prev 1274 1275 if not create_token: 1276 # exp.Properties.Location.POST_CREATE 1277 properties = self._parse_properties() 1278 create_token = self._match_set(self.CREATABLES) and self._prev 1279 1280 if not properties or not create_token: 1281 return self._parse_as_command(start) 1282 1283 exists = self._parse_exists(not_=True) 1284 this = None 1285 expression: t.Optional[exp.Expression] = None 1286 indexes = None 1287 no_schema_binding = None 1288 begin = None 1289 end = None 1290 clone = None 1291 1292 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1293 nonlocal properties 1294 if properties and temp_props: 1295 properties.expressions.extend(temp_props.expressions) 1296 elif temp_props: 1297 properties = temp_props 1298 1299 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1300 this = self._parse_user_defined_function(kind=create_token.token_type) 1301 1302 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1303 extend_props(self._parse_properties()) 1304 1305 self._match(TokenType.ALIAS) 1306 1307 if self._match(TokenType.COMMAND): 1308 expression = self._parse_as_command(self._prev) 1309 else: 1310 begin = self._match(TokenType.BEGIN) 1311 return_ = self._match_text_seq("RETURN") 1312 1313 if self._match(TokenType.STRING, advance=False): 1314 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1315 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1316 expression = self._parse_string() 1317 extend_props(self._parse_properties()) 1318 else: 1319 expression = self._parse_statement() 1320 1321 end = self._match_text_seq("END") 1322 1323 if return_: 1324 expression = self.expression(exp.Return, this=expression) 1325 elif create_token.token_type == TokenType.INDEX: 1326 this = self._parse_index(index=self._parse_id_var()) 1327 elif create_token.token_type in self.DB_CREATABLES: 1328 table_parts = self._parse_table_parts(schema=True) 1329 1330 # exp.Properties.Location.POST_NAME 1331 self._match(TokenType.COMMA) 1332 extend_props(self._parse_properties(before=True)) 1333 1334 this = self._parse_schema(this=table_parts) 1335 1336 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1337 extend_props(self._parse_properties()) 1338 1339 self._match(TokenType.ALIAS) 1340 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1341 # exp.Properties.Location.POST_ALIAS 1342 extend_props(self._parse_properties()) 1343 1344 expression = self._parse_ddl_select() 1345 1346 if create_token.token_type == TokenType.TABLE: 1347 # exp.Properties.Location.POST_EXPRESSION 1348 extend_props(self._parse_properties()) 1349 1350 indexes = [] 1351 while True: 1352 index = self._parse_index() 1353 1354 # exp.Properties.Location.POST_INDEX 1355 extend_props(self._parse_properties()) 1356 1357 if not index: 1358 break 1359 else: 1360 self._match(TokenType.COMMA) 1361 indexes.append(index) 1362 elif create_token.token_type == TokenType.VIEW: 1363 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1364 no_schema_binding = True 1365 1366 shallow = self._match_text_seq("SHALLOW") 1367 1368 if self._match_texts(self.CLONE_KEYWORDS): 1369 copy = self._prev.text.lower() == "copy" 1370 clone = self._parse_table(schema=True) 1371 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1372 clone_kind = ( 1373 self._match(TokenType.L_PAREN) 1374 and self._match_texts(self.CLONE_KINDS) 1375 and self._prev.text.upper() 1376 ) 1377 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1378 self._match(TokenType.R_PAREN) 1379 clone = self.expression( 1380 exp.Clone, 1381 this=clone, 1382 when=when, 1383 kind=clone_kind, 1384 shallow=shallow, 1385 expression=clone_expression, 1386 copy=copy, 1387 ) 1388 1389 return self.expression( 1390 exp.Create, 1391 comments=comments, 1392 this=this, 1393 kind=create_token.text, 1394 replace=replace, 1395 unique=unique, 1396 expression=expression, 1397 exists=exists, 1398 properties=properties, 1399 indexes=indexes, 1400 no_schema_binding=no_schema_binding, 1401 begin=begin, 1402 end=end, 1403 clone=clone, 1404 ) 1405 1406 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1407 # only used for teradata currently 1408 self._match(TokenType.COMMA) 1409 1410 kwargs = { 1411 "no": self._match_text_seq("NO"), 1412 "dual": self._match_text_seq("DUAL"), 1413 "before": self._match_text_seq("BEFORE"), 1414 "default": self._match_text_seq("DEFAULT"), 1415 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1416 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1417 "after": self._match_text_seq("AFTER"), 1418 "minimum": self._match_texts(("MIN", "MINIMUM")), 1419 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1420 } 1421 1422 if self._match_texts(self.PROPERTY_PARSERS): 1423 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1424 try: 1425 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1426 except TypeError: 1427 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1428 1429 return None 1430 1431 def _parse_property(self) -> t.Optional[exp.Expression]: 1432 if self._match_texts(self.PROPERTY_PARSERS): 1433 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1434 1435 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1436 return self._parse_character_set(default=True) 1437 1438 if self._match_text_seq("COMPOUND", "SORTKEY"): 1439 return self._parse_sortkey(compound=True) 1440 1441 if self._match_text_seq("SQL", "SECURITY"): 1442 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1443 1444 index = self._index 1445 key = self._parse_column() 1446 1447 if not self._match(TokenType.EQ): 1448 self._retreat(index) 1449 return None 1450 1451 return self.expression( 1452 exp.Property, 1453 this=key.to_dot() if isinstance(key, exp.Column) else key, 1454 value=self._parse_column() or self._parse_var(any_token=True), 1455 ) 1456 1457 def _parse_stored(self) -> exp.FileFormatProperty: 1458 self._match(TokenType.ALIAS) 1459 1460 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1461 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1462 1463 return self.expression( 1464 exp.FileFormatProperty, 1465 this=self.expression( 1466 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1467 ) 1468 if input_format or output_format 1469 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1470 ) 1471 1472 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1473 self._match(TokenType.EQ) 1474 self._match(TokenType.ALIAS) 1475 return self.expression(exp_class, this=self._parse_field()) 1476 1477 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1478 properties = [] 1479 while True: 1480 if before: 1481 prop = self._parse_property_before() 1482 else: 1483 prop = self._parse_property() 1484 1485 if not prop: 1486 break 1487 for p in ensure_list(prop): 1488 properties.append(p) 1489 1490 if properties: 1491 return self.expression(exp.Properties, expressions=properties) 1492 1493 return None 1494 1495 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1496 return self.expression( 1497 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1498 ) 1499 1500 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1501 if self._index >= 2: 1502 pre_volatile_token = self._tokens[self._index - 2] 1503 else: 1504 pre_volatile_token = None 1505 1506 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1507 return exp.VolatileProperty() 1508 1509 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1510 1511 def _parse_with_property( 1512 self, 1513 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1514 if self._match(TokenType.L_PAREN, advance=False): 1515 return self._parse_wrapped_csv(self._parse_property) 1516 1517 if self._match_text_seq("JOURNAL"): 1518 return self._parse_withjournaltable() 1519 1520 if self._match_text_seq("DATA"): 1521 return self._parse_withdata(no=False) 1522 elif self._match_text_seq("NO", "DATA"): 1523 return self._parse_withdata(no=True) 1524 1525 if not self._next: 1526 return None 1527 1528 return self._parse_withisolatedloading() 1529 1530 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1531 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1532 self._match(TokenType.EQ) 1533 1534 user = self._parse_id_var() 1535 self._match(TokenType.PARAMETER) 1536 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1537 1538 if not user or not host: 1539 return None 1540 1541 return exp.DefinerProperty(this=f"{user}@{host}") 1542 1543 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1544 self._match(TokenType.TABLE) 1545 self._match(TokenType.EQ) 1546 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1547 1548 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1549 return self.expression(exp.LogProperty, no=no) 1550 1551 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1552 return self.expression(exp.JournalProperty, **kwargs) 1553 1554 def _parse_checksum(self) -> exp.ChecksumProperty: 1555 self._match(TokenType.EQ) 1556 1557 on = None 1558 if self._match(TokenType.ON): 1559 on = True 1560 elif self._match_text_seq("OFF"): 1561 on = False 1562 1563 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1564 1565 def _parse_cluster(self) -> exp.Cluster: 1566 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1567 1568 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1569 self._match_text_seq("BY") 1570 1571 self._match_l_paren() 1572 expressions = self._parse_csv(self._parse_column) 1573 self._match_r_paren() 1574 1575 if self._match_text_seq("SORTED", "BY"): 1576 self._match_l_paren() 1577 sorted_by = self._parse_csv(self._parse_ordered) 1578 self._match_r_paren() 1579 else: 1580 sorted_by = None 1581 1582 self._match(TokenType.INTO) 1583 buckets = self._parse_number() 1584 self._match_text_seq("BUCKETS") 1585 1586 return self.expression( 1587 exp.ClusteredByProperty, 1588 expressions=expressions, 1589 sorted_by=sorted_by, 1590 buckets=buckets, 1591 ) 1592 1593 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1594 if not self._match_text_seq("GRANTS"): 1595 self._retreat(self._index - 1) 1596 return None 1597 1598 return self.expression(exp.CopyGrantsProperty) 1599 1600 def _parse_freespace(self) -> exp.FreespaceProperty: 1601 self._match(TokenType.EQ) 1602 return self.expression( 1603 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1604 ) 1605 1606 def _parse_mergeblockratio( 1607 self, no: bool = False, default: bool = False 1608 ) -> exp.MergeBlockRatioProperty: 1609 if self._match(TokenType.EQ): 1610 return self.expression( 1611 exp.MergeBlockRatioProperty, 1612 this=self._parse_number(), 1613 percent=self._match(TokenType.PERCENT), 1614 ) 1615 1616 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1617 1618 def _parse_datablocksize( 1619 self, 1620 default: t.Optional[bool] = None, 1621 minimum: t.Optional[bool] = None, 1622 maximum: t.Optional[bool] = None, 1623 ) -> exp.DataBlocksizeProperty: 1624 self._match(TokenType.EQ) 1625 size = self._parse_number() 1626 1627 units = None 1628 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1629 units = self._prev.text 1630 1631 return self.expression( 1632 exp.DataBlocksizeProperty, 1633 size=size, 1634 units=units, 1635 default=default, 1636 minimum=minimum, 1637 maximum=maximum, 1638 ) 1639 1640 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1641 self._match(TokenType.EQ) 1642 always = self._match_text_seq("ALWAYS") 1643 manual = self._match_text_seq("MANUAL") 1644 never = self._match_text_seq("NEVER") 1645 default = self._match_text_seq("DEFAULT") 1646 1647 autotemp = None 1648 if self._match_text_seq("AUTOTEMP"): 1649 autotemp = self._parse_schema() 1650 1651 return self.expression( 1652 exp.BlockCompressionProperty, 1653 always=always, 1654 manual=manual, 1655 never=never, 1656 default=default, 1657 autotemp=autotemp, 1658 ) 1659 1660 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1661 no = self._match_text_seq("NO") 1662 concurrent = self._match_text_seq("CONCURRENT") 1663 self._match_text_seq("ISOLATED", "LOADING") 1664 for_all = self._match_text_seq("FOR", "ALL") 1665 for_insert = self._match_text_seq("FOR", "INSERT") 1666 for_none = self._match_text_seq("FOR", "NONE") 1667 return self.expression( 1668 exp.IsolatedLoadingProperty, 1669 no=no, 1670 concurrent=concurrent, 1671 for_all=for_all, 1672 for_insert=for_insert, 1673 for_none=for_none, 1674 ) 1675 1676 def _parse_locking(self) -> exp.LockingProperty: 1677 if self._match(TokenType.TABLE): 1678 kind = "TABLE" 1679 elif self._match(TokenType.VIEW): 1680 kind = "VIEW" 1681 elif self._match(TokenType.ROW): 1682 kind = "ROW" 1683 elif self._match_text_seq("DATABASE"): 1684 kind = "DATABASE" 1685 else: 1686 kind = None 1687 1688 if kind in ("DATABASE", "TABLE", "VIEW"): 1689 this = self._parse_table_parts() 1690 else: 1691 this = None 1692 1693 if self._match(TokenType.FOR): 1694 for_or_in = "FOR" 1695 elif self._match(TokenType.IN): 1696 for_or_in = "IN" 1697 else: 1698 for_or_in = None 1699 1700 if self._match_text_seq("ACCESS"): 1701 lock_type = "ACCESS" 1702 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1703 lock_type = "EXCLUSIVE" 1704 elif self._match_text_seq("SHARE"): 1705 lock_type = "SHARE" 1706 elif self._match_text_seq("READ"): 1707 lock_type = "READ" 1708 elif self._match_text_seq("WRITE"): 1709 lock_type = "WRITE" 1710 elif self._match_text_seq("CHECKSUM"): 1711 lock_type = "CHECKSUM" 1712 else: 1713 lock_type = None 1714 1715 override = self._match_text_seq("OVERRIDE") 1716 1717 return self.expression( 1718 exp.LockingProperty, 1719 this=this, 1720 kind=kind, 1721 for_or_in=for_or_in, 1722 lock_type=lock_type, 1723 override=override, 1724 ) 1725 1726 def _parse_partition_by(self) -> t.List[exp.Expression]: 1727 if self._match(TokenType.PARTITION_BY): 1728 return self._parse_csv(self._parse_conjunction) 1729 return [] 1730 1731 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1732 self._match(TokenType.EQ) 1733 return self.expression( 1734 exp.PartitionedByProperty, 1735 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1736 ) 1737 1738 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1739 if self._match_text_seq("AND", "STATISTICS"): 1740 statistics = True 1741 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1742 statistics = False 1743 else: 1744 statistics = None 1745 1746 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1747 1748 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1749 if self._match_text_seq("PRIMARY", "INDEX"): 1750 return exp.NoPrimaryIndexProperty() 1751 return None 1752 1753 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1754 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1755 return exp.OnCommitProperty() 1756 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1757 return exp.OnCommitProperty(delete=True) 1758 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1759 1760 def _parse_distkey(self) -> exp.DistKeyProperty: 1761 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1762 1763 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1764 table = self._parse_table(schema=True) 1765 1766 options = [] 1767 while self._match_texts(("INCLUDING", "EXCLUDING")): 1768 this = self._prev.text.upper() 1769 1770 id_var = self._parse_id_var() 1771 if not id_var: 1772 return None 1773 1774 options.append( 1775 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1776 ) 1777 1778 return self.expression(exp.LikeProperty, this=table, expressions=options) 1779 1780 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1781 return self.expression( 1782 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1783 ) 1784 1785 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1786 self._match(TokenType.EQ) 1787 return self.expression( 1788 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1789 ) 1790 1791 def _parse_returns(self) -> exp.ReturnsProperty: 1792 value: t.Optional[exp.Expression] 1793 is_table = self._match(TokenType.TABLE) 1794 1795 if is_table: 1796 if self._match(TokenType.LT): 1797 value = self.expression( 1798 exp.Schema, 1799 this="TABLE", 1800 expressions=self._parse_csv(self._parse_struct_types), 1801 ) 1802 if not self._match(TokenType.GT): 1803 self.raise_error("Expecting >") 1804 else: 1805 value = self._parse_schema(exp.var("TABLE")) 1806 else: 1807 value = self._parse_types() 1808 1809 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1810 1811 def _parse_describe(self) -> exp.Describe: 1812 kind = self._match_set(self.CREATABLES) and self._prev.text 1813 this = self._parse_table(schema=True) 1814 properties = self._parse_properties() 1815 expressions = properties.expressions if properties else None 1816 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1817 1818 def _parse_insert(self) -> exp.Insert: 1819 comments = ensure_list(self._prev_comments) 1820 overwrite = self._match(TokenType.OVERWRITE) 1821 ignore = self._match(TokenType.IGNORE) 1822 local = self._match_text_seq("LOCAL") 1823 alternative = None 1824 1825 if self._match_text_seq("DIRECTORY"): 1826 this: t.Optional[exp.Expression] = self.expression( 1827 exp.Directory, 1828 this=self._parse_var_or_string(), 1829 local=local, 1830 row_format=self._parse_row_format(match_row=True), 1831 ) 1832 else: 1833 if self._match(TokenType.OR): 1834 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1835 1836 self._match(TokenType.INTO) 1837 comments += ensure_list(self._prev_comments) 1838 self._match(TokenType.TABLE) 1839 this = self._parse_table(schema=True) 1840 1841 returning = self._parse_returning() 1842 1843 return self.expression( 1844 exp.Insert, 1845 comments=comments, 1846 this=this, 1847 by_name=self._match_text_seq("BY", "NAME"), 1848 exists=self._parse_exists(), 1849 partition=self._parse_partition(), 1850 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1851 and self._parse_conjunction(), 1852 expression=self._parse_ddl_select(), 1853 conflict=self._parse_on_conflict(), 1854 returning=returning or self._parse_returning(), 1855 overwrite=overwrite, 1856 alternative=alternative, 1857 ignore=ignore, 1858 ) 1859 1860 def _parse_kill(self) -> exp.Kill: 1861 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1862 1863 return self.expression( 1864 exp.Kill, 1865 this=self._parse_primary(), 1866 kind=kind, 1867 ) 1868 1869 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1870 conflict = self._match_text_seq("ON", "CONFLICT") 1871 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1872 1873 if not conflict and not duplicate: 1874 return None 1875 1876 nothing = None 1877 expressions = None 1878 key = None 1879 constraint = None 1880 1881 if conflict: 1882 if self._match_text_seq("ON", "CONSTRAINT"): 1883 constraint = self._parse_id_var() 1884 else: 1885 key = self._parse_csv(self._parse_value) 1886 1887 self._match_text_seq("DO") 1888 if self._match_text_seq("NOTHING"): 1889 nothing = True 1890 else: 1891 self._match(TokenType.UPDATE) 1892 self._match(TokenType.SET) 1893 expressions = self._parse_csv(self._parse_equality) 1894 1895 return self.expression( 1896 exp.OnConflict, 1897 duplicate=duplicate, 1898 expressions=expressions, 1899 nothing=nothing, 1900 key=key, 1901 constraint=constraint, 1902 ) 1903 1904 def _parse_returning(self) -> t.Optional[exp.Returning]: 1905 if not self._match(TokenType.RETURNING): 1906 return None 1907 return self.expression( 1908 exp.Returning, 1909 expressions=self._parse_csv(self._parse_expression), 1910 into=self._match(TokenType.INTO) and self._parse_table_part(), 1911 ) 1912 1913 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1914 if not self._match(TokenType.FORMAT): 1915 return None 1916 return self._parse_row_format() 1917 1918 def _parse_row_format( 1919 self, match_row: bool = False 1920 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1921 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1922 return None 1923 1924 if self._match_text_seq("SERDE"): 1925 this = self._parse_string() 1926 1927 serde_properties = None 1928 if self._match(TokenType.SERDE_PROPERTIES): 1929 serde_properties = self.expression( 1930 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1931 ) 1932 1933 return self.expression( 1934 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1935 ) 1936 1937 self._match_text_seq("DELIMITED") 1938 1939 kwargs = {} 1940 1941 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1942 kwargs["fields"] = self._parse_string() 1943 if self._match_text_seq("ESCAPED", "BY"): 1944 kwargs["escaped"] = self._parse_string() 1945 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1946 kwargs["collection_items"] = self._parse_string() 1947 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1948 kwargs["map_keys"] = self._parse_string() 1949 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1950 kwargs["lines"] = self._parse_string() 1951 if self._match_text_seq("NULL", "DEFINED", "AS"): 1952 kwargs["null"] = self._parse_string() 1953 1954 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1955 1956 def _parse_load(self) -> exp.LoadData | exp.Command: 1957 if self._match_text_seq("DATA"): 1958 local = self._match_text_seq("LOCAL") 1959 self._match_text_seq("INPATH") 1960 inpath = self._parse_string() 1961 overwrite = self._match(TokenType.OVERWRITE) 1962 self._match_pair(TokenType.INTO, TokenType.TABLE) 1963 1964 return self.expression( 1965 exp.LoadData, 1966 this=self._parse_table(schema=True), 1967 local=local, 1968 overwrite=overwrite, 1969 inpath=inpath, 1970 partition=self._parse_partition(), 1971 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1972 serde=self._match_text_seq("SERDE") and self._parse_string(), 1973 ) 1974 return self._parse_as_command(self._prev) 1975 1976 def _parse_delete(self) -> exp.Delete: 1977 # This handles MySQL's "Multiple-Table Syntax" 1978 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1979 tables = None 1980 comments = self._prev_comments 1981 if not self._match(TokenType.FROM, advance=False): 1982 tables = self._parse_csv(self._parse_table) or None 1983 1984 returning = self._parse_returning() 1985 1986 return self.expression( 1987 exp.Delete, 1988 comments=comments, 1989 tables=tables, 1990 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1991 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1992 where=self._parse_where(), 1993 returning=returning or self._parse_returning(), 1994 limit=self._parse_limit(), 1995 ) 1996 1997 def _parse_update(self) -> exp.Update: 1998 comments = self._prev_comments 1999 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2000 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2001 returning = self._parse_returning() 2002 return self.expression( 2003 exp.Update, 2004 comments=comments, 2005 **{ # type: ignore 2006 "this": this, 2007 "expressions": expressions, 2008 "from": self._parse_from(joins=True), 2009 "where": self._parse_where(), 2010 "returning": returning or self._parse_returning(), 2011 "order": self._parse_order(), 2012 "limit": self._parse_limit(), 2013 }, 2014 ) 2015 2016 def _parse_uncache(self) -> exp.Uncache: 2017 if not self._match(TokenType.TABLE): 2018 self.raise_error("Expecting TABLE after UNCACHE") 2019 2020 return self.expression( 2021 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2022 ) 2023 2024 def _parse_cache(self) -> exp.Cache: 2025 lazy = self._match_text_seq("LAZY") 2026 self._match(TokenType.TABLE) 2027 table = self._parse_table(schema=True) 2028 2029 options = [] 2030 if self._match_text_seq("OPTIONS"): 2031 self._match_l_paren() 2032 k = self._parse_string() 2033 self._match(TokenType.EQ) 2034 v = self._parse_string() 2035 options = [k, v] 2036 self._match_r_paren() 2037 2038 self._match(TokenType.ALIAS) 2039 return self.expression( 2040 exp.Cache, 2041 this=table, 2042 lazy=lazy, 2043 options=options, 2044 expression=self._parse_select(nested=True), 2045 ) 2046 2047 def _parse_partition(self) -> t.Optional[exp.Partition]: 2048 if not self._match(TokenType.PARTITION): 2049 return None 2050 2051 return self.expression( 2052 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2053 ) 2054 2055 def _parse_value(self) -> exp.Tuple: 2056 if self._match(TokenType.L_PAREN): 2057 expressions = self._parse_csv(self._parse_conjunction) 2058 self._match_r_paren() 2059 return self.expression(exp.Tuple, expressions=expressions) 2060 2061 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2062 # https://prestodb.io/docs/current/sql/values.html 2063 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2064 2065 def _parse_projections(self) -> t.List[exp.Expression]: 2066 return self._parse_expressions() 2067 2068 def _parse_select( 2069 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2070 ) -> t.Optional[exp.Expression]: 2071 cte = self._parse_with() 2072 2073 if cte: 2074 this = self._parse_statement() 2075 2076 if not this: 2077 self.raise_error("Failed to parse any statement following CTE") 2078 return cte 2079 2080 if "with" in this.arg_types: 2081 this.set("with", cte) 2082 else: 2083 self.raise_error(f"{this.key} does not support CTE") 2084 this = cte 2085 2086 return this 2087 2088 # duckdb supports leading with FROM x 2089 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2090 2091 if self._match(TokenType.SELECT): 2092 comments = self._prev_comments 2093 2094 hint = self._parse_hint() 2095 all_ = self._match(TokenType.ALL) 2096 distinct = self._match_set(self.DISTINCT_TOKENS) 2097 2098 kind = ( 2099 self._match(TokenType.ALIAS) 2100 and self._match_texts(("STRUCT", "VALUE")) 2101 and self._prev.text 2102 ) 2103 2104 if distinct: 2105 distinct = self.expression( 2106 exp.Distinct, 2107 on=self._parse_value() if self._match(TokenType.ON) else None, 2108 ) 2109 2110 if all_ and distinct: 2111 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2112 2113 limit = self._parse_limit(top=True) 2114 projections = self._parse_projections() 2115 2116 this = self.expression( 2117 exp.Select, 2118 kind=kind, 2119 hint=hint, 2120 distinct=distinct, 2121 expressions=projections, 2122 limit=limit, 2123 ) 2124 this.comments = comments 2125 2126 into = self._parse_into() 2127 if into: 2128 this.set("into", into) 2129 2130 if not from_: 2131 from_ = self._parse_from() 2132 2133 if from_: 2134 this.set("from", from_) 2135 2136 this = self._parse_query_modifiers(this) 2137 elif (table or nested) and self._match(TokenType.L_PAREN): 2138 if self._match(TokenType.PIVOT): 2139 this = self._parse_simplified_pivot() 2140 elif self._match(TokenType.FROM): 2141 this = exp.select("*").from_( 2142 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2143 ) 2144 else: 2145 this = self._parse_table() if table else self._parse_select(nested=True) 2146 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2147 2148 self._match_r_paren() 2149 2150 # We return early here so that the UNION isn't attached to the subquery by the 2151 # following call to _parse_set_operations, but instead becomes the parent node 2152 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2153 elif self._match(TokenType.VALUES): 2154 this = self.expression( 2155 exp.Values, 2156 expressions=self._parse_csv(self._parse_value), 2157 alias=self._parse_table_alias(), 2158 ) 2159 elif from_: 2160 this = exp.select("*").from_(from_.this, copy=False) 2161 else: 2162 this = None 2163 2164 return self._parse_set_operations(this) 2165 2166 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2167 if not skip_with_token and not self._match(TokenType.WITH): 2168 return None 2169 2170 comments = self._prev_comments 2171 recursive = self._match(TokenType.RECURSIVE) 2172 2173 expressions = [] 2174 while True: 2175 expressions.append(self._parse_cte()) 2176 2177 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2178 break 2179 else: 2180 self._match(TokenType.WITH) 2181 2182 return self.expression( 2183 exp.With, comments=comments, expressions=expressions, recursive=recursive 2184 ) 2185 2186 def _parse_cte(self) -> exp.CTE: 2187 alias = self._parse_table_alias() 2188 if not alias or not alias.this: 2189 self.raise_error("Expected CTE to have alias") 2190 2191 self._match(TokenType.ALIAS) 2192 return self.expression( 2193 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2194 ) 2195 2196 def _parse_table_alias( 2197 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2198 ) -> t.Optional[exp.TableAlias]: 2199 any_token = self._match(TokenType.ALIAS) 2200 alias = ( 2201 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2202 or self._parse_string_as_identifier() 2203 ) 2204 2205 index = self._index 2206 if self._match(TokenType.L_PAREN): 2207 columns = self._parse_csv(self._parse_function_parameter) 2208 self._match_r_paren() if columns else self._retreat(index) 2209 else: 2210 columns = None 2211 2212 if not alias and not columns: 2213 return None 2214 2215 return self.expression(exp.TableAlias, this=alias, columns=columns) 2216 2217 def _parse_subquery( 2218 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2219 ) -> t.Optional[exp.Subquery]: 2220 if not this: 2221 return None 2222 2223 return self.expression( 2224 exp.Subquery, 2225 this=this, 2226 pivots=self._parse_pivots(), 2227 alias=self._parse_table_alias() if parse_alias else None, 2228 ) 2229 2230 def _parse_query_modifiers( 2231 self, this: t.Optional[exp.Expression] 2232 ) -> t.Optional[exp.Expression]: 2233 if isinstance(this, self.MODIFIABLES): 2234 for join in iter(self._parse_join, None): 2235 this.append("joins", join) 2236 for lateral in iter(self._parse_lateral, None): 2237 this.append("laterals", lateral) 2238 2239 while True: 2240 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2241 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2242 key, expression = parser(self) 2243 2244 if expression: 2245 this.set(key, expression) 2246 if key == "limit": 2247 offset = expression.args.pop("offset", None) 2248 if offset: 2249 this.set("offset", exp.Offset(expression=offset)) 2250 continue 2251 break 2252 return this 2253 2254 def _parse_hint(self) -> t.Optional[exp.Hint]: 2255 if self._match(TokenType.HINT): 2256 hints = [] 2257 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2258 hints.extend(hint) 2259 2260 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2261 self.raise_error("Expected */ after HINT") 2262 2263 return self.expression(exp.Hint, expressions=hints) 2264 2265 return None 2266 2267 def _parse_into(self) -> t.Optional[exp.Into]: 2268 if not self._match(TokenType.INTO): 2269 return None 2270 2271 temp = self._match(TokenType.TEMPORARY) 2272 unlogged = self._match_text_seq("UNLOGGED") 2273 self._match(TokenType.TABLE) 2274 2275 return self.expression( 2276 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2277 ) 2278 2279 def _parse_from( 2280 self, joins: bool = False, skip_from_token: bool = False 2281 ) -> t.Optional[exp.From]: 2282 if not skip_from_token and not self._match(TokenType.FROM): 2283 return None 2284 2285 return self.expression( 2286 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2287 ) 2288 2289 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2290 if not self._match(TokenType.MATCH_RECOGNIZE): 2291 return None 2292 2293 self._match_l_paren() 2294 2295 partition = self._parse_partition_by() 2296 order = self._parse_order() 2297 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2298 2299 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2300 rows = exp.var("ONE ROW PER MATCH") 2301 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2302 text = "ALL ROWS PER MATCH" 2303 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2304 text += f" SHOW EMPTY MATCHES" 2305 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2306 text += f" OMIT EMPTY MATCHES" 2307 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2308 text += f" WITH UNMATCHED ROWS" 2309 rows = exp.var(text) 2310 else: 2311 rows = None 2312 2313 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2314 text = "AFTER MATCH SKIP" 2315 if self._match_text_seq("PAST", "LAST", "ROW"): 2316 text += f" PAST LAST ROW" 2317 elif self._match_text_seq("TO", "NEXT", "ROW"): 2318 text += f" TO NEXT ROW" 2319 elif self._match_text_seq("TO", "FIRST"): 2320 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2321 elif self._match_text_seq("TO", "LAST"): 2322 text += f" TO LAST {self._advance_any().text}" # type: ignore 2323 after = exp.var(text) 2324 else: 2325 after = None 2326 2327 if self._match_text_seq("PATTERN"): 2328 self._match_l_paren() 2329 2330 if not self._curr: 2331 self.raise_error("Expecting )", self._curr) 2332 2333 paren = 1 2334 start = self._curr 2335 2336 while self._curr and paren > 0: 2337 if self._curr.token_type == TokenType.L_PAREN: 2338 paren += 1 2339 if self._curr.token_type == TokenType.R_PAREN: 2340 paren -= 1 2341 2342 end = self._prev 2343 self._advance() 2344 2345 if paren > 0: 2346 self.raise_error("Expecting )", self._curr) 2347 2348 pattern = exp.var(self._find_sql(start, end)) 2349 else: 2350 pattern = None 2351 2352 define = ( 2353 self._parse_csv( 2354 lambda: self.expression( 2355 exp.Alias, 2356 alias=self._parse_id_var(any_token=True), 2357 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2358 ) 2359 ) 2360 if self._match_text_seq("DEFINE") 2361 else None 2362 ) 2363 2364 self._match_r_paren() 2365 2366 return self.expression( 2367 exp.MatchRecognize, 2368 partition_by=partition, 2369 order=order, 2370 measures=measures, 2371 rows=rows, 2372 after=after, 2373 pattern=pattern, 2374 define=define, 2375 alias=self._parse_table_alias(), 2376 ) 2377 2378 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2379 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2380 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2381 2382 if outer_apply or cross_apply: 2383 this = self._parse_select(table=True) 2384 view = None 2385 outer = not cross_apply 2386 elif self._match(TokenType.LATERAL): 2387 this = self._parse_select(table=True) 2388 view = self._match(TokenType.VIEW) 2389 outer = self._match(TokenType.OUTER) 2390 else: 2391 return None 2392 2393 if not this: 2394 this = ( 2395 self._parse_unnest() 2396 or self._parse_function() 2397 or self._parse_id_var(any_token=False) 2398 ) 2399 2400 while self._match(TokenType.DOT): 2401 this = exp.Dot( 2402 this=this, 2403 expression=self._parse_function() or self._parse_id_var(any_token=False), 2404 ) 2405 2406 if view: 2407 table = self._parse_id_var(any_token=False) 2408 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2409 table_alias: t.Optional[exp.TableAlias] = self.expression( 2410 exp.TableAlias, this=table, columns=columns 2411 ) 2412 elif isinstance(this, exp.Subquery) and this.alias: 2413 # Ensures parity between the Subquery's and the Lateral's "alias" args 2414 table_alias = this.args["alias"].copy() 2415 else: 2416 table_alias = self._parse_table_alias() 2417 2418 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2419 2420 def _parse_join_parts( 2421 self, 2422 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2423 return ( 2424 self._match_set(self.JOIN_METHODS) and self._prev, 2425 self._match_set(self.JOIN_SIDES) and self._prev, 2426 self._match_set(self.JOIN_KINDS) and self._prev, 2427 ) 2428 2429 def _parse_join( 2430 self, skip_join_token: bool = False, parse_bracket: bool = False 2431 ) -> t.Optional[exp.Join]: 2432 if self._match(TokenType.COMMA): 2433 return self.expression(exp.Join, this=self._parse_table()) 2434 2435 index = self._index 2436 method, side, kind = self._parse_join_parts() 2437 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2438 join = self._match(TokenType.JOIN) 2439 2440 if not skip_join_token and not join: 2441 self._retreat(index) 2442 kind = None 2443 method = None 2444 side = None 2445 2446 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2447 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2448 2449 if not skip_join_token and not join and not outer_apply and not cross_apply: 2450 return None 2451 2452 if outer_apply: 2453 side = Token(TokenType.LEFT, "LEFT") 2454 2455 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2456 2457 if method: 2458 kwargs["method"] = method.text 2459 if side: 2460 kwargs["side"] = side.text 2461 if kind: 2462 kwargs["kind"] = kind.text 2463 if hint: 2464 kwargs["hint"] = hint 2465 2466 if self._match(TokenType.ON): 2467 kwargs["on"] = self._parse_conjunction() 2468 elif self._match(TokenType.USING): 2469 kwargs["using"] = self._parse_wrapped_id_vars() 2470 elif not (kind and kind.token_type == TokenType.CROSS): 2471 index = self._index 2472 join = self._parse_join() 2473 2474 if join and self._match(TokenType.ON): 2475 kwargs["on"] = self._parse_conjunction() 2476 elif join and self._match(TokenType.USING): 2477 kwargs["using"] = self._parse_wrapped_id_vars() 2478 else: 2479 join = None 2480 self._retreat(index) 2481 2482 kwargs["this"].set("joins", [join] if join else None) 2483 2484 comments = [c for token in (method, side, kind) if token for c in token.comments] 2485 return self.expression(exp.Join, comments=comments, **kwargs) 2486 2487 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2488 this = self._parse_conjunction() 2489 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2490 return this 2491 2492 opclass = self._parse_var(any_token=True) 2493 if opclass: 2494 return self.expression(exp.Opclass, this=this, expression=opclass) 2495 2496 return this 2497 2498 def _parse_index( 2499 self, 2500 index: t.Optional[exp.Expression] = None, 2501 ) -> t.Optional[exp.Index]: 2502 if index: 2503 unique = None 2504 primary = None 2505 amp = None 2506 2507 self._match(TokenType.ON) 2508 self._match(TokenType.TABLE) # hive 2509 table = self._parse_table_parts(schema=True) 2510 else: 2511 unique = self._match(TokenType.UNIQUE) 2512 primary = self._match_text_seq("PRIMARY") 2513 amp = self._match_text_seq("AMP") 2514 2515 if not self._match(TokenType.INDEX): 2516 return None 2517 2518 index = self._parse_id_var() 2519 table = None 2520 2521 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2522 2523 if self._match(TokenType.L_PAREN, advance=False): 2524 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2525 else: 2526 columns = None 2527 2528 return self.expression( 2529 exp.Index, 2530 this=index, 2531 table=table, 2532 using=using, 2533 columns=columns, 2534 unique=unique, 2535 primary=primary, 2536 amp=amp, 2537 partition_by=self._parse_partition_by(), 2538 where=self._parse_where(), 2539 ) 2540 2541 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2542 hints: t.List[exp.Expression] = [] 2543 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2544 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2545 hints.append( 2546 self.expression( 2547 exp.WithTableHint, 2548 expressions=self._parse_csv( 2549 lambda: self._parse_function() or self._parse_var(any_token=True) 2550 ), 2551 ) 2552 ) 2553 self._match_r_paren() 2554 else: 2555 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2556 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2557 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2558 2559 self._match_texts({"INDEX", "KEY"}) 2560 if self._match(TokenType.FOR): 2561 hint.set("target", self._advance_any() and self._prev.text.upper()) 2562 2563 hint.set("expressions", self._parse_wrapped_id_vars()) 2564 hints.append(hint) 2565 2566 return hints or None 2567 2568 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2569 return ( 2570 (not schema and self._parse_function(optional_parens=False)) 2571 or self._parse_id_var(any_token=False) 2572 or self._parse_string_as_identifier() 2573 or self._parse_placeholder() 2574 ) 2575 2576 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2577 catalog = None 2578 db = None 2579 table = self._parse_table_part(schema=schema) 2580 2581 while self._match(TokenType.DOT): 2582 if catalog: 2583 # This allows nesting the table in arbitrarily many dot expressions if needed 2584 table = self.expression( 2585 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2586 ) 2587 else: 2588 catalog = db 2589 db = table 2590 table = self._parse_table_part(schema=schema) 2591 2592 if not table: 2593 self.raise_error(f"Expected table name but got {self._curr}") 2594 2595 return self.expression( 2596 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2597 ) 2598 2599 def _parse_table( 2600 self, 2601 schema: bool = False, 2602 joins: bool = False, 2603 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2604 parse_bracket: bool = False, 2605 ) -> t.Optional[exp.Expression]: 2606 lateral = self._parse_lateral() 2607 if lateral: 2608 return lateral 2609 2610 unnest = self._parse_unnest() 2611 if unnest: 2612 return unnest 2613 2614 values = self._parse_derived_table_values() 2615 if values: 2616 return values 2617 2618 subquery = self._parse_select(table=True) 2619 if subquery: 2620 if not subquery.args.get("pivots"): 2621 subquery.set("pivots", self._parse_pivots()) 2622 return subquery 2623 2624 bracket = parse_bracket and self._parse_bracket(None) 2625 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2626 this = t.cast( 2627 exp.Expression, bracket or self._parse_bracket(self._parse_table_parts(schema=schema)) 2628 ) 2629 2630 if schema: 2631 return self._parse_schema(this=this) 2632 2633 version = self._parse_version() 2634 2635 if version: 2636 this.set("version", version) 2637 2638 if self.ALIAS_POST_TABLESAMPLE: 2639 table_sample = self._parse_table_sample() 2640 2641 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2642 if alias: 2643 this.set("alias", alias) 2644 2645 if self._match_text_seq("AT"): 2646 this.set("index", self._parse_id_var()) 2647 2648 this.set("hints", self._parse_table_hints()) 2649 2650 if not this.args.get("pivots"): 2651 this.set("pivots", self._parse_pivots()) 2652 2653 if not self.ALIAS_POST_TABLESAMPLE: 2654 table_sample = self._parse_table_sample() 2655 2656 if table_sample: 2657 table_sample.set("this", this) 2658 this = table_sample 2659 2660 if joins: 2661 for join in iter(self._parse_join, None): 2662 this.append("joins", join) 2663 2664 return this 2665 2666 def _parse_version(self) -> t.Optional[exp.Version]: 2667 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2668 this = "TIMESTAMP" 2669 elif self._match(TokenType.VERSION_SNAPSHOT): 2670 this = "VERSION" 2671 else: 2672 return None 2673 2674 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2675 kind = self._prev.text.upper() 2676 start = self._parse_bitwise() 2677 self._match_texts(("TO", "AND")) 2678 end = self._parse_bitwise() 2679 expression: t.Optional[exp.Expression] = self.expression( 2680 exp.Tuple, expressions=[start, end] 2681 ) 2682 elif self._match_text_seq("CONTAINED", "IN"): 2683 kind = "CONTAINED IN" 2684 expression = self.expression( 2685 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2686 ) 2687 elif self._match(TokenType.ALL): 2688 kind = "ALL" 2689 expression = None 2690 else: 2691 self._match_text_seq("AS", "OF") 2692 kind = "AS OF" 2693 expression = self._parse_type() 2694 2695 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2696 2697 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2698 if not self._match(TokenType.UNNEST): 2699 return None 2700 2701 expressions = self._parse_wrapped_csv(self._parse_type) 2702 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2703 2704 alias = self._parse_table_alias() if with_alias else None 2705 2706 if alias: 2707 if self.UNNEST_COLUMN_ONLY: 2708 if alias.args.get("columns"): 2709 self.raise_error("Unexpected extra column alias in unnest.") 2710 2711 alias.set("columns", [alias.this]) 2712 alias.set("this", None) 2713 2714 columns = alias.args.get("columns") or [] 2715 if offset and len(expressions) < len(columns): 2716 offset = columns.pop() 2717 2718 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2719 self._match(TokenType.ALIAS) 2720 offset = self._parse_id_var( 2721 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2722 ) or exp.to_identifier("offset") 2723 2724 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2725 2726 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2727 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2728 if not is_derived and not self._match(TokenType.VALUES): 2729 return None 2730 2731 expressions = self._parse_csv(self._parse_value) 2732 alias = self._parse_table_alias() 2733 2734 if is_derived: 2735 self._match_r_paren() 2736 2737 return self.expression( 2738 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2739 ) 2740 2741 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2742 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2743 as_modifier and self._match_text_seq("USING", "SAMPLE") 2744 ): 2745 return None 2746 2747 bucket_numerator = None 2748 bucket_denominator = None 2749 bucket_field = None 2750 percent = None 2751 rows = None 2752 size = None 2753 seed = None 2754 2755 kind = ( 2756 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2757 ) 2758 method = self._parse_var(tokens=(TokenType.ROW,)) 2759 2760 matched_l_paren = self._match(TokenType.L_PAREN) 2761 2762 if self.TABLESAMPLE_CSV: 2763 num = None 2764 expressions = self._parse_csv(self._parse_primary) 2765 else: 2766 expressions = None 2767 num = ( 2768 self._parse_factor() 2769 if self._match(TokenType.NUMBER, advance=False) 2770 else self._parse_primary() 2771 ) 2772 2773 if self._match_text_seq("BUCKET"): 2774 bucket_numerator = self._parse_number() 2775 self._match_text_seq("OUT", "OF") 2776 bucket_denominator = bucket_denominator = self._parse_number() 2777 self._match(TokenType.ON) 2778 bucket_field = self._parse_field() 2779 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2780 percent = num 2781 elif self._match(TokenType.ROWS): 2782 rows = num 2783 elif num: 2784 size = num 2785 2786 if matched_l_paren: 2787 self._match_r_paren() 2788 2789 if self._match(TokenType.L_PAREN): 2790 method = self._parse_var() 2791 seed = self._match(TokenType.COMMA) and self._parse_number() 2792 self._match_r_paren() 2793 elif self._match_texts(("SEED", "REPEATABLE")): 2794 seed = self._parse_wrapped(self._parse_number) 2795 2796 return self.expression( 2797 exp.TableSample, 2798 expressions=expressions, 2799 method=method, 2800 bucket_numerator=bucket_numerator, 2801 bucket_denominator=bucket_denominator, 2802 bucket_field=bucket_field, 2803 percent=percent, 2804 rows=rows, 2805 size=size, 2806 seed=seed, 2807 kind=kind, 2808 ) 2809 2810 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2811 return list(iter(self._parse_pivot, None)) or None 2812 2813 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2814 return list(iter(self._parse_join, None)) or None 2815 2816 # https://duckdb.org/docs/sql/statements/pivot 2817 def _parse_simplified_pivot(self) -> exp.Pivot: 2818 def _parse_on() -> t.Optional[exp.Expression]: 2819 this = self._parse_bitwise() 2820 return self._parse_in(this) if self._match(TokenType.IN) else this 2821 2822 this = self._parse_table() 2823 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2824 using = self._match(TokenType.USING) and self._parse_csv( 2825 lambda: self._parse_alias(self._parse_function()) 2826 ) 2827 group = self._parse_group() 2828 return self.expression( 2829 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2830 ) 2831 2832 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2833 index = self._index 2834 include_nulls = None 2835 2836 if self._match(TokenType.PIVOT): 2837 unpivot = False 2838 elif self._match(TokenType.UNPIVOT): 2839 unpivot = True 2840 2841 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2842 if self._match_text_seq("INCLUDE", "NULLS"): 2843 include_nulls = True 2844 elif self._match_text_seq("EXCLUDE", "NULLS"): 2845 include_nulls = False 2846 else: 2847 return None 2848 2849 expressions = [] 2850 field = None 2851 2852 if not self._match(TokenType.L_PAREN): 2853 self._retreat(index) 2854 return None 2855 2856 if unpivot: 2857 expressions = self._parse_csv(self._parse_column) 2858 else: 2859 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2860 2861 if not expressions: 2862 self.raise_error("Failed to parse PIVOT's aggregation list") 2863 2864 if not self._match(TokenType.FOR): 2865 self.raise_error("Expecting FOR") 2866 2867 value = self._parse_column() 2868 2869 if not self._match(TokenType.IN): 2870 self.raise_error("Expecting IN") 2871 2872 field = self._parse_in(value, alias=True) 2873 2874 self._match_r_paren() 2875 2876 pivot = self.expression( 2877 exp.Pivot, 2878 expressions=expressions, 2879 field=field, 2880 unpivot=unpivot, 2881 include_nulls=include_nulls, 2882 ) 2883 2884 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2885 pivot.set("alias", self._parse_table_alias()) 2886 2887 if not unpivot: 2888 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2889 2890 columns: t.List[exp.Expression] = [] 2891 for fld in pivot.args["field"].expressions: 2892 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2893 for name in names: 2894 if self.PREFIXED_PIVOT_COLUMNS: 2895 name = f"{name}_{field_name}" if name else field_name 2896 else: 2897 name = f"{field_name}_{name}" if name else field_name 2898 2899 columns.append(exp.to_identifier(name)) 2900 2901 pivot.set("columns", columns) 2902 2903 return pivot 2904 2905 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2906 return [agg.alias for agg in aggregations] 2907 2908 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2909 if not skip_where_token and not self._match(TokenType.WHERE): 2910 return None 2911 2912 return self.expression( 2913 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2914 ) 2915 2916 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2917 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2918 return None 2919 2920 elements = defaultdict(list) 2921 2922 if self._match(TokenType.ALL): 2923 return self.expression(exp.Group, all=True) 2924 2925 while True: 2926 expressions = self._parse_csv(self._parse_conjunction) 2927 if expressions: 2928 elements["expressions"].extend(expressions) 2929 2930 grouping_sets = self._parse_grouping_sets() 2931 if grouping_sets: 2932 elements["grouping_sets"].extend(grouping_sets) 2933 2934 rollup = None 2935 cube = None 2936 totals = None 2937 2938 with_ = self._match(TokenType.WITH) 2939 if self._match(TokenType.ROLLUP): 2940 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2941 elements["rollup"].extend(ensure_list(rollup)) 2942 2943 if self._match(TokenType.CUBE): 2944 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2945 elements["cube"].extend(ensure_list(cube)) 2946 2947 if self._match_text_seq("TOTALS"): 2948 totals = True 2949 elements["totals"] = True # type: ignore 2950 2951 if not (grouping_sets or rollup or cube or totals): 2952 break 2953 2954 return self.expression(exp.Group, **elements) # type: ignore 2955 2956 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2957 if not self._match(TokenType.GROUPING_SETS): 2958 return None 2959 2960 return self._parse_wrapped_csv(self._parse_grouping_set) 2961 2962 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2963 if self._match(TokenType.L_PAREN): 2964 grouping_set = self._parse_csv(self._parse_column) 2965 self._match_r_paren() 2966 return self.expression(exp.Tuple, expressions=grouping_set) 2967 2968 return self._parse_column() 2969 2970 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2971 if not skip_having_token and not self._match(TokenType.HAVING): 2972 return None 2973 return self.expression(exp.Having, this=self._parse_conjunction()) 2974 2975 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2976 if not self._match(TokenType.QUALIFY): 2977 return None 2978 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2979 2980 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 2981 if skip_start_token: 2982 start = None 2983 elif self._match(TokenType.START_WITH): 2984 start = self._parse_conjunction() 2985 else: 2986 return None 2987 2988 self._match(TokenType.CONNECT_BY) 2989 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 2990 exp.Prior, this=self._parse_bitwise() 2991 ) 2992 connect = self._parse_conjunction() 2993 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 2994 2995 if not start and self._match(TokenType.START_WITH): 2996 start = self._parse_conjunction() 2997 2998 return self.expression(exp.Connect, start=start, connect=connect) 2999 3000 def _parse_order( 3001 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3002 ) -> t.Optional[exp.Expression]: 3003 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3004 return this 3005 3006 return self.expression( 3007 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 3008 ) 3009 3010 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3011 if not self._match(token): 3012 return None 3013 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3014 3015 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3016 this = parse_method() if parse_method else self._parse_conjunction() 3017 3018 asc = self._match(TokenType.ASC) 3019 desc = self._match(TokenType.DESC) or (asc and False) 3020 3021 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3022 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3023 3024 nulls_first = is_nulls_first or False 3025 explicitly_null_ordered = is_nulls_first or is_nulls_last 3026 3027 if ( 3028 not explicitly_null_ordered 3029 and ( 3030 (not desc and self.NULL_ORDERING == "nulls_are_small") 3031 or (desc and self.NULL_ORDERING != "nulls_are_small") 3032 ) 3033 and self.NULL_ORDERING != "nulls_are_last" 3034 ): 3035 nulls_first = True 3036 3037 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 3038 3039 def _parse_limit( 3040 self, this: t.Optional[exp.Expression] = None, top: bool = False 3041 ) -> t.Optional[exp.Expression]: 3042 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3043 comments = self._prev_comments 3044 if top: 3045 limit_paren = self._match(TokenType.L_PAREN) 3046 expression = self._parse_number() 3047 3048 if limit_paren: 3049 self._match_r_paren() 3050 else: 3051 expression = self._parse_term() 3052 3053 if self._match(TokenType.COMMA): 3054 offset = expression 3055 expression = self._parse_term() 3056 else: 3057 offset = None 3058 3059 limit_exp = self.expression( 3060 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3061 ) 3062 3063 return limit_exp 3064 3065 if self._match(TokenType.FETCH): 3066 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3067 direction = self._prev.text if direction else "FIRST" 3068 3069 count = self._parse_field(tokens=self.FETCH_TOKENS) 3070 percent = self._match(TokenType.PERCENT) 3071 3072 self._match_set((TokenType.ROW, TokenType.ROWS)) 3073 3074 only = self._match_text_seq("ONLY") 3075 with_ties = self._match_text_seq("WITH", "TIES") 3076 3077 if only and with_ties: 3078 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3079 3080 return self.expression( 3081 exp.Fetch, 3082 direction=direction, 3083 count=count, 3084 percent=percent, 3085 with_ties=with_ties, 3086 ) 3087 3088 return this 3089 3090 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3091 if not self._match(TokenType.OFFSET): 3092 return this 3093 3094 count = self._parse_term() 3095 self._match_set((TokenType.ROW, TokenType.ROWS)) 3096 return self.expression(exp.Offset, this=this, expression=count) 3097 3098 def _parse_locks(self) -> t.List[exp.Lock]: 3099 locks = [] 3100 while True: 3101 if self._match_text_seq("FOR", "UPDATE"): 3102 update = True 3103 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3104 "LOCK", "IN", "SHARE", "MODE" 3105 ): 3106 update = False 3107 else: 3108 break 3109 3110 expressions = None 3111 if self._match_text_seq("OF"): 3112 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3113 3114 wait: t.Optional[bool | exp.Expression] = None 3115 if self._match_text_seq("NOWAIT"): 3116 wait = True 3117 elif self._match_text_seq("WAIT"): 3118 wait = self._parse_primary() 3119 elif self._match_text_seq("SKIP", "LOCKED"): 3120 wait = False 3121 3122 locks.append( 3123 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3124 ) 3125 3126 return locks 3127 3128 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3129 if not self._match_set(self.SET_OPERATIONS): 3130 return this 3131 3132 token_type = self._prev.token_type 3133 3134 if token_type == TokenType.UNION: 3135 expression = exp.Union 3136 elif token_type == TokenType.EXCEPT: 3137 expression = exp.Except 3138 else: 3139 expression = exp.Intersect 3140 3141 return self.expression( 3142 expression, 3143 this=this, 3144 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3145 by_name=self._match_text_seq("BY", "NAME"), 3146 expression=self._parse_set_operations(self._parse_select(nested=True)), 3147 ) 3148 3149 def _parse_expression(self) -> t.Optional[exp.Expression]: 3150 return self._parse_alias(self._parse_conjunction()) 3151 3152 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3153 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3154 3155 def _parse_equality(self) -> t.Optional[exp.Expression]: 3156 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3157 3158 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3159 return self._parse_tokens(self._parse_range, self.COMPARISON) 3160 3161 def _parse_range(self) -> t.Optional[exp.Expression]: 3162 this = self._parse_bitwise() 3163 negate = self._match(TokenType.NOT) 3164 3165 if self._match_set(self.RANGE_PARSERS): 3166 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3167 if not expression: 3168 return this 3169 3170 this = expression 3171 elif self._match(TokenType.ISNULL): 3172 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3173 3174 # Postgres supports ISNULL and NOTNULL for conditions. 3175 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3176 if self._match(TokenType.NOTNULL): 3177 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3178 this = self.expression(exp.Not, this=this) 3179 3180 if negate: 3181 this = self.expression(exp.Not, this=this) 3182 3183 if self._match(TokenType.IS): 3184 this = self._parse_is(this) 3185 3186 return this 3187 3188 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3189 index = self._index - 1 3190 negate = self._match(TokenType.NOT) 3191 3192 if self._match_text_seq("DISTINCT", "FROM"): 3193 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3194 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3195 3196 expression = self._parse_null() or self._parse_boolean() 3197 if not expression: 3198 self._retreat(index) 3199 return None 3200 3201 this = self.expression(exp.Is, this=this, expression=expression) 3202 return self.expression(exp.Not, this=this) if negate else this 3203 3204 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3205 unnest = self._parse_unnest(with_alias=False) 3206 if unnest: 3207 this = self.expression(exp.In, this=this, unnest=unnest) 3208 elif self._match(TokenType.L_PAREN): 3209 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3210 3211 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3212 this = self.expression(exp.In, this=this, query=expressions[0]) 3213 else: 3214 this = self.expression(exp.In, this=this, expressions=expressions) 3215 3216 self._match_r_paren(this) 3217 else: 3218 this = self.expression(exp.In, this=this, field=self._parse_field()) 3219 3220 return this 3221 3222 def _parse_between(self, this: exp.Expression) -> exp.Between: 3223 low = self._parse_bitwise() 3224 self._match(TokenType.AND) 3225 high = self._parse_bitwise() 3226 return self.expression(exp.Between, this=this, low=low, high=high) 3227 3228 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3229 if not self._match(TokenType.ESCAPE): 3230 return this 3231 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3232 3233 def _parse_interval(self) -> t.Optional[exp.Interval]: 3234 index = self._index 3235 3236 if not self._match(TokenType.INTERVAL): 3237 return None 3238 3239 if self._match(TokenType.STRING, advance=False): 3240 this = self._parse_primary() 3241 else: 3242 this = self._parse_term() 3243 3244 if not this: 3245 self._retreat(index) 3246 return None 3247 3248 unit = self._parse_function() or self._parse_var(any_token=True) 3249 3250 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3251 # each INTERVAL expression into this canonical form so it's easy to transpile 3252 if this and this.is_number: 3253 this = exp.Literal.string(this.name) 3254 elif this and this.is_string: 3255 parts = this.name.split() 3256 3257 if len(parts) == 2: 3258 if unit: 3259 # This is not actually a unit, it's something else (e.g. a "window side") 3260 unit = None 3261 self._retreat(self._index - 1) 3262 3263 this = exp.Literal.string(parts[0]) 3264 unit = self.expression(exp.Var, this=parts[1]) 3265 3266 return self.expression(exp.Interval, this=this, unit=unit) 3267 3268 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3269 this = self._parse_term() 3270 3271 while True: 3272 if self._match_set(self.BITWISE): 3273 this = self.expression( 3274 self.BITWISE[self._prev.token_type], 3275 this=this, 3276 expression=self._parse_term(), 3277 ) 3278 elif self._match(TokenType.DQMARK): 3279 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3280 elif self._match_pair(TokenType.LT, TokenType.LT): 3281 this = self.expression( 3282 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3283 ) 3284 elif self._match_pair(TokenType.GT, TokenType.GT): 3285 this = self.expression( 3286 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3287 ) 3288 else: 3289 break 3290 3291 return this 3292 3293 def _parse_term(self) -> t.Optional[exp.Expression]: 3294 return self._parse_tokens(self._parse_factor, self.TERM) 3295 3296 def _parse_factor(self) -> t.Optional[exp.Expression]: 3297 return self._parse_tokens(self._parse_unary, self.FACTOR) 3298 3299 def _parse_unary(self) -> t.Optional[exp.Expression]: 3300 if self._match_set(self.UNARY_PARSERS): 3301 return self.UNARY_PARSERS[self._prev.token_type](self) 3302 return self._parse_at_time_zone(self._parse_type()) 3303 3304 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3305 interval = parse_interval and self._parse_interval() 3306 if interval: 3307 return interval 3308 3309 index = self._index 3310 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3311 this = self._parse_column() 3312 3313 if data_type: 3314 if isinstance(this, exp.Literal): 3315 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3316 if parser: 3317 return parser(self, this, data_type) 3318 return self.expression(exp.Cast, this=this, to=data_type) 3319 if not data_type.expressions: 3320 self._retreat(index) 3321 return self._parse_column() 3322 return self._parse_column_ops(data_type) 3323 3324 return this and self._parse_column_ops(this) 3325 3326 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3327 this = self._parse_type() 3328 if not this: 3329 return None 3330 3331 return self.expression( 3332 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3333 ) 3334 3335 def _parse_types( 3336 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3337 ) -> t.Optional[exp.Expression]: 3338 index = self._index 3339 3340 prefix = self._match_text_seq("SYSUDTLIB", ".") 3341 3342 if not self._match_set(self.TYPE_TOKENS): 3343 identifier = allow_identifiers and self._parse_id_var( 3344 any_token=False, tokens=(TokenType.VAR,) 3345 ) 3346 3347 if identifier: 3348 tokens = self._tokenizer.tokenize(identifier.name) 3349 3350 if len(tokens) != 1: 3351 self.raise_error("Unexpected identifier", self._prev) 3352 3353 if tokens[0].token_type in self.TYPE_TOKENS: 3354 self._prev = tokens[0] 3355 elif self.SUPPORTS_USER_DEFINED_TYPES: 3356 type_name = identifier.name 3357 3358 while self._match(TokenType.DOT): 3359 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3360 3361 return exp.DataType.build(type_name, udt=True) 3362 else: 3363 return None 3364 else: 3365 return None 3366 3367 type_token = self._prev.token_type 3368 3369 if type_token == TokenType.PSEUDO_TYPE: 3370 return self.expression(exp.PseudoType, this=self._prev.text) 3371 3372 if type_token == TokenType.OBJECT_IDENTIFIER: 3373 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3374 3375 nested = type_token in self.NESTED_TYPE_TOKENS 3376 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3377 expressions = None 3378 maybe_func = False 3379 3380 if self._match(TokenType.L_PAREN): 3381 if is_struct: 3382 expressions = self._parse_csv(self._parse_struct_types) 3383 elif nested: 3384 expressions = self._parse_csv( 3385 lambda: self._parse_types( 3386 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3387 ) 3388 ) 3389 elif type_token in self.ENUM_TYPE_TOKENS: 3390 expressions = self._parse_csv(self._parse_equality) 3391 else: 3392 expressions = self._parse_csv(self._parse_type_size) 3393 3394 if not expressions or not self._match(TokenType.R_PAREN): 3395 self._retreat(index) 3396 return None 3397 3398 maybe_func = True 3399 3400 this: t.Optional[exp.Expression] = None 3401 values: t.Optional[t.List[exp.Expression]] = None 3402 3403 if nested and self._match(TokenType.LT): 3404 if is_struct: 3405 expressions = self._parse_csv(self._parse_struct_types) 3406 else: 3407 expressions = self._parse_csv( 3408 lambda: self._parse_types( 3409 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3410 ) 3411 ) 3412 3413 if not self._match(TokenType.GT): 3414 self.raise_error("Expecting >") 3415 3416 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3417 values = self._parse_csv(self._parse_conjunction) 3418 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3419 3420 if type_token in self.TIMESTAMPS: 3421 if self._match_text_seq("WITH", "TIME", "ZONE"): 3422 maybe_func = False 3423 tz_type = ( 3424 exp.DataType.Type.TIMETZ 3425 if type_token in self.TIMES 3426 else exp.DataType.Type.TIMESTAMPTZ 3427 ) 3428 this = exp.DataType(this=tz_type, expressions=expressions) 3429 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3430 maybe_func = False 3431 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3432 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3433 maybe_func = False 3434 elif type_token == TokenType.INTERVAL: 3435 unit = self._parse_var() 3436 3437 if self._match_text_seq("TO"): 3438 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3439 else: 3440 span = None 3441 3442 if span or not unit: 3443 this = self.expression( 3444 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3445 ) 3446 else: 3447 this = self.expression(exp.Interval, unit=unit) 3448 3449 if maybe_func and check_func: 3450 index2 = self._index 3451 peek = self._parse_string() 3452 3453 if not peek: 3454 self._retreat(index) 3455 return None 3456 3457 self._retreat(index2) 3458 3459 if not this: 3460 if self._match_text_seq("UNSIGNED"): 3461 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3462 if not unsigned_type_token: 3463 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3464 3465 type_token = unsigned_type_token or type_token 3466 3467 this = exp.DataType( 3468 this=exp.DataType.Type[type_token.value], 3469 expressions=expressions, 3470 nested=nested, 3471 values=values, 3472 prefix=prefix, 3473 ) 3474 3475 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3476 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3477 3478 return this 3479 3480 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3481 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3482 self._match(TokenType.COLON) 3483 return self._parse_column_def(this) 3484 3485 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3486 if not self._match_text_seq("AT", "TIME", "ZONE"): 3487 return this 3488 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3489 3490 def _parse_column(self) -> t.Optional[exp.Expression]: 3491 this = self._parse_field() 3492 if isinstance(this, exp.Identifier): 3493 this = self.expression(exp.Column, this=this) 3494 elif not this: 3495 return self._parse_bracket(this) 3496 return self._parse_column_ops(this) 3497 3498 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3499 this = self._parse_bracket(this) 3500 3501 while self._match_set(self.COLUMN_OPERATORS): 3502 op_token = self._prev.token_type 3503 op = self.COLUMN_OPERATORS.get(op_token) 3504 3505 if op_token == TokenType.DCOLON: 3506 field = self._parse_types() 3507 if not field: 3508 self.raise_error("Expected type") 3509 elif op and self._curr: 3510 self._advance() 3511 value = self._prev.text 3512 field = ( 3513 exp.Literal.number(value) 3514 if self._prev.token_type == TokenType.NUMBER 3515 else exp.Literal.string(value) 3516 ) 3517 else: 3518 field = self._parse_field(anonymous_func=True, any_token=True) 3519 3520 if isinstance(field, exp.Func): 3521 # bigquery allows function calls like x.y.count(...) 3522 # SAFE.SUBSTR(...) 3523 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3524 this = self._replace_columns_with_dots(this) 3525 3526 if op: 3527 this = op(self, this, field) 3528 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3529 this = self.expression( 3530 exp.Column, 3531 this=field, 3532 table=this.this, 3533 db=this.args.get("table"), 3534 catalog=this.args.get("db"), 3535 ) 3536 else: 3537 this = self.expression(exp.Dot, this=this, expression=field) 3538 this = self._parse_bracket(this) 3539 return this 3540 3541 def _parse_primary(self) -> t.Optional[exp.Expression]: 3542 if self._match_set(self.PRIMARY_PARSERS): 3543 token_type = self._prev.token_type 3544 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3545 3546 if token_type == TokenType.STRING: 3547 expressions = [primary] 3548 while self._match(TokenType.STRING): 3549 expressions.append(exp.Literal.string(self._prev.text)) 3550 3551 if len(expressions) > 1: 3552 return self.expression(exp.Concat, expressions=expressions) 3553 3554 return primary 3555 3556 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3557 return exp.Literal.number(f"0.{self._prev.text}") 3558 3559 if self._match(TokenType.L_PAREN): 3560 comments = self._prev_comments 3561 query = self._parse_select() 3562 3563 if query: 3564 expressions = [query] 3565 else: 3566 expressions = self._parse_expressions() 3567 3568 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3569 3570 if isinstance(this, exp.Subqueryable): 3571 this = self._parse_set_operations( 3572 self._parse_subquery(this=this, parse_alias=False) 3573 ) 3574 elif len(expressions) > 1: 3575 this = self.expression(exp.Tuple, expressions=expressions) 3576 else: 3577 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3578 3579 if this: 3580 this.add_comments(comments) 3581 3582 self._match_r_paren(expression=this) 3583 return this 3584 3585 return None 3586 3587 def _parse_field( 3588 self, 3589 any_token: bool = False, 3590 tokens: t.Optional[t.Collection[TokenType]] = None, 3591 anonymous_func: bool = False, 3592 ) -> t.Optional[exp.Expression]: 3593 return ( 3594 self._parse_primary() 3595 or self._parse_function(anonymous=anonymous_func) 3596 or self._parse_id_var(any_token=any_token, tokens=tokens) 3597 ) 3598 3599 def _parse_function( 3600 self, 3601 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3602 anonymous: bool = False, 3603 optional_parens: bool = True, 3604 ) -> t.Optional[exp.Expression]: 3605 if not self._curr: 3606 return None 3607 3608 token_type = self._curr.token_type 3609 this = self._curr.text 3610 upper = this.upper() 3611 3612 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3613 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3614 self._advance() 3615 return parser(self) 3616 3617 if not self._next or self._next.token_type != TokenType.L_PAREN: 3618 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3619 self._advance() 3620 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3621 3622 return None 3623 3624 if token_type not in self.FUNC_TOKENS: 3625 return None 3626 3627 self._advance(2) 3628 3629 parser = self.FUNCTION_PARSERS.get(upper) 3630 if parser and not anonymous: 3631 this = parser(self) 3632 else: 3633 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3634 3635 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3636 this = self.expression(subquery_predicate, this=self._parse_select()) 3637 self._match_r_paren() 3638 return this 3639 3640 if functions is None: 3641 functions = self.FUNCTIONS 3642 3643 function = functions.get(upper) 3644 3645 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3646 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3647 3648 if function and not anonymous: 3649 func = self.validate_expression(function(args), args) 3650 if not self.NORMALIZE_FUNCTIONS: 3651 func.meta["name"] = this 3652 this = func 3653 else: 3654 this = self.expression(exp.Anonymous, this=this, expressions=args) 3655 3656 self._match_r_paren(this) 3657 return self._parse_window(this) 3658 3659 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3660 return self._parse_column_def(self._parse_id_var()) 3661 3662 def _parse_user_defined_function( 3663 self, kind: t.Optional[TokenType] = None 3664 ) -> t.Optional[exp.Expression]: 3665 this = self._parse_id_var() 3666 3667 while self._match(TokenType.DOT): 3668 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3669 3670 if not self._match(TokenType.L_PAREN): 3671 return this 3672 3673 expressions = self._parse_csv(self._parse_function_parameter) 3674 self._match_r_paren() 3675 return self.expression( 3676 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3677 ) 3678 3679 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3680 literal = self._parse_primary() 3681 if literal: 3682 return self.expression(exp.Introducer, this=token.text, expression=literal) 3683 3684 return self.expression(exp.Identifier, this=token.text) 3685 3686 def _parse_session_parameter(self) -> exp.SessionParameter: 3687 kind = None 3688 this = self._parse_id_var() or self._parse_primary() 3689 3690 if this and self._match(TokenType.DOT): 3691 kind = this.name 3692 this = self._parse_var() or self._parse_primary() 3693 3694 return self.expression(exp.SessionParameter, this=this, kind=kind) 3695 3696 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3697 index = self._index 3698 3699 if self._match(TokenType.L_PAREN): 3700 expressions = t.cast( 3701 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3702 ) 3703 3704 if not self._match(TokenType.R_PAREN): 3705 self._retreat(index) 3706 else: 3707 expressions = [self._parse_id_var()] 3708 3709 if self._match_set(self.LAMBDAS): 3710 return self.LAMBDAS[self._prev.token_type](self, expressions) 3711 3712 self._retreat(index) 3713 3714 this: t.Optional[exp.Expression] 3715 3716 if self._match(TokenType.DISTINCT): 3717 this = self.expression( 3718 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3719 ) 3720 else: 3721 this = self._parse_select_or_expression(alias=alias) 3722 3723 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3724 3725 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3726 index = self._index 3727 3728 if not self.errors: 3729 try: 3730 if self._parse_select(nested=True): 3731 return this 3732 except ParseError: 3733 pass 3734 finally: 3735 self.errors.clear() 3736 self._retreat(index) 3737 3738 if not self._match(TokenType.L_PAREN): 3739 return this 3740 3741 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3742 3743 self._match_r_paren() 3744 return self.expression(exp.Schema, this=this, expressions=args) 3745 3746 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3747 return self._parse_column_def(self._parse_field(any_token=True)) 3748 3749 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3750 # column defs are not really columns, they're identifiers 3751 if isinstance(this, exp.Column): 3752 this = this.this 3753 3754 kind = self._parse_types(schema=True) 3755 3756 if self._match_text_seq("FOR", "ORDINALITY"): 3757 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3758 3759 constraints: t.List[exp.Expression] = [] 3760 3761 if not kind and self._match(TokenType.ALIAS): 3762 constraints.append( 3763 self.expression( 3764 exp.ComputedColumnConstraint, 3765 this=self._parse_conjunction(), 3766 persisted=self._match_text_seq("PERSISTED"), 3767 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3768 ) 3769 ) 3770 3771 while True: 3772 constraint = self._parse_column_constraint() 3773 if not constraint: 3774 break 3775 constraints.append(constraint) 3776 3777 if not kind and not constraints: 3778 return this 3779 3780 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3781 3782 def _parse_auto_increment( 3783 self, 3784 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3785 start = None 3786 increment = None 3787 3788 if self._match(TokenType.L_PAREN, advance=False): 3789 args = self._parse_wrapped_csv(self._parse_bitwise) 3790 start = seq_get(args, 0) 3791 increment = seq_get(args, 1) 3792 elif self._match_text_seq("START"): 3793 start = self._parse_bitwise() 3794 self._match_text_seq("INCREMENT") 3795 increment = self._parse_bitwise() 3796 3797 if start and increment: 3798 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3799 3800 return exp.AutoIncrementColumnConstraint() 3801 3802 def _parse_compress(self) -> exp.CompressColumnConstraint: 3803 if self._match(TokenType.L_PAREN, advance=False): 3804 return self.expression( 3805 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3806 ) 3807 3808 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3809 3810 def _parse_generated_as_identity( 3811 self, 3812 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.ComputedColumnConstraint: 3813 if self._match_text_seq("BY", "DEFAULT"): 3814 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3815 this = self.expression( 3816 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3817 ) 3818 else: 3819 self._match_text_seq("ALWAYS") 3820 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3821 3822 self._match(TokenType.ALIAS) 3823 identity = self._match_text_seq("IDENTITY") 3824 3825 if self._match(TokenType.L_PAREN): 3826 if self._match(TokenType.START_WITH): 3827 this.set("start", self._parse_bitwise()) 3828 if self._match_text_seq("INCREMENT", "BY"): 3829 this.set("increment", self._parse_bitwise()) 3830 if self._match_text_seq("MINVALUE"): 3831 this.set("minvalue", self._parse_bitwise()) 3832 if self._match_text_seq("MAXVALUE"): 3833 this.set("maxvalue", self._parse_bitwise()) 3834 3835 if self._match_text_seq("CYCLE"): 3836 this.set("cycle", True) 3837 elif self._match_text_seq("NO", "CYCLE"): 3838 this.set("cycle", False) 3839 3840 if not identity: 3841 this.set("expression", self._parse_bitwise()) 3842 3843 self._match_r_paren() 3844 3845 return this 3846 3847 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3848 self._match_text_seq("LENGTH") 3849 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3850 3851 def _parse_not_constraint( 3852 self, 3853 ) -> t.Optional[exp.Expression]: 3854 if self._match_text_seq("NULL"): 3855 return self.expression(exp.NotNullColumnConstraint) 3856 if self._match_text_seq("CASESPECIFIC"): 3857 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3858 if self._match_text_seq("FOR", "REPLICATION"): 3859 return self.expression(exp.NotForReplicationColumnConstraint) 3860 return None 3861 3862 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3863 if self._match(TokenType.CONSTRAINT): 3864 this = self._parse_id_var() 3865 else: 3866 this = None 3867 3868 if self._match_texts(self.CONSTRAINT_PARSERS): 3869 return self.expression( 3870 exp.ColumnConstraint, 3871 this=this, 3872 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3873 ) 3874 3875 return this 3876 3877 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3878 if not self._match(TokenType.CONSTRAINT): 3879 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3880 3881 this = self._parse_id_var() 3882 expressions = [] 3883 3884 while True: 3885 constraint = self._parse_unnamed_constraint() or self._parse_function() 3886 if not constraint: 3887 break 3888 expressions.append(constraint) 3889 3890 return self.expression(exp.Constraint, this=this, expressions=expressions) 3891 3892 def _parse_unnamed_constraint( 3893 self, constraints: t.Optional[t.Collection[str]] = None 3894 ) -> t.Optional[exp.Expression]: 3895 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3896 return None 3897 3898 constraint = self._prev.text.upper() 3899 if constraint not in self.CONSTRAINT_PARSERS: 3900 self.raise_error(f"No parser found for schema constraint {constraint}.") 3901 3902 return self.CONSTRAINT_PARSERS[constraint](self) 3903 3904 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3905 self._match_text_seq("KEY") 3906 return self.expression( 3907 exp.UniqueColumnConstraint, 3908 this=self._parse_schema(self._parse_id_var(any_token=False)), 3909 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 3910 ) 3911 3912 def _parse_key_constraint_options(self) -> t.List[str]: 3913 options = [] 3914 while True: 3915 if not self._curr: 3916 break 3917 3918 if self._match(TokenType.ON): 3919 action = None 3920 on = self._advance_any() and self._prev.text 3921 3922 if self._match_text_seq("NO", "ACTION"): 3923 action = "NO ACTION" 3924 elif self._match_text_seq("CASCADE"): 3925 action = "CASCADE" 3926 elif self._match_text_seq("RESTRICT"): 3927 action = "RESTRICT" 3928 elif self._match_pair(TokenType.SET, TokenType.NULL): 3929 action = "SET NULL" 3930 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3931 action = "SET DEFAULT" 3932 else: 3933 self.raise_error("Invalid key constraint") 3934 3935 options.append(f"ON {on} {action}") 3936 elif self._match_text_seq("NOT", "ENFORCED"): 3937 options.append("NOT ENFORCED") 3938 elif self._match_text_seq("DEFERRABLE"): 3939 options.append("DEFERRABLE") 3940 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3941 options.append("INITIALLY DEFERRED") 3942 elif self._match_text_seq("NORELY"): 3943 options.append("NORELY") 3944 elif self._match_text_seq("MATCH", "FULL"): 3945 options.append("MATCH FULL") 3946 else: 3947 break 3948 3949 return options 3950 3951 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3952 if match and not self._match(TokenType.REFERENCES): 3953 return None 3954 3955 expressions = None 3956 this = self._parse_table(schema=True) 3957 options = self._parse_key_constraint_options() 3958 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3959 3960 def _parse_foreign_key(self) -> exp.ForeignKey: 3961 expressions = self._parse_wrapped_id_vars() 3962 reference = self._parse_references() 3963 options = {} 3964 3965 while self._match(TokenType.ON): 3966 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3967 self.raise_error("Expected DELETE or UPDATE") 3968 3969 kind = self._prev.text.lower() 3970 3971 if self._match_text_seq("NO", "ACTION"): 3972 action = "NO ACTION" 3973 elif self._match(TokenType.SET): 3974 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3975 action = "SET " + self._prev.text.upper() 3976 else: 3977 self._advance() 3978 action = self._prev.text.upper() 3979 3980 options[kind] = action 3981 3982 return self.expression( 3983 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3984 ) 3985 3986 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 3987 return self._parse_field() 3988 3989 def _parse_primary_key( 3990 self, wrapped_optional: bool = False, in_props: bool = False 3991 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3992 desc = ( 3993 self._match_set((TokenType.ASC, TokenType.DESC)) 3994 and self._prev.token_type == TokenType.DESC 3995 ) 3996 3997 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3998 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3999 4000 expressions = self._parse_wrapped_csv( 4001 self._parse_primary_key_part, optional=wrapped_optional 4002 ) 4003 options = self._parse_key_constraint_options() 4004 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4005 4006 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4007 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4008 return this 4009 4010 bracket_kind = self._prev.token_type 4011 4012 if self._match(TokenType.COLON): 4013 expressions: t.List[exp.Expression] = [ 4014 self.expression(exp.Slice, expression=self._parse_conjunction()) 4015 ] 4016 else: 4017 expressions = self._parse_csv( 4018 lambda: self._parse_slice( 4019 self._parse_alias(self._parse_conjunction(), explicit=True) 4020 ) 4021 ) 4022 4023 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4024 if bracket_kind == TokenType.L_BRACE: 4025 this = self.expression(exp.Struct, expressions=expressions) 4026 elif not this or this.name.upper() == "ARRAY": 4027 this = self.expression(exp.Array, expressions=expressions) 4028 else: 4029 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 4030 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4031 4032 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4033 self.raise_error("Expected ]") 4034 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4035 self.raise_error("Expected }") 4036 4037 self._add_comments(this) 4038 return self._parse_bracket(this) 4039 4040 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4041 if self._match(TokenType.COLON): 4042 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4043 return this 4044 4045 def _parse_case(self) -> t.Optional[exp.Expression]: 4046 ifs = [] 4047 default = None 4048 4049 comments = self._prev_comments 4050 expression = self._parse_conjunction() 4051 4052 while self._match(TokenType.WHEN): 4053 this = self._parse_conjunction() 4054 self._match(TokenType.THEN) 4055 then = self._parse_conjunction() 4056 ifs.append(self.expression(exp.If, this=this, true=then)) 4057 4058 if self._match(TokenType.ELSE): 4059 default = self._parse_conjunction() 4060 4061 if not self._match(TokenType.END): 4062 self.raise_error("Expected END after CASE", self._prev) 4063 4064 return self._parse_window( 4065 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4066 ) 4067 4068 def _parse_if(self) -> t.Optional[exp.Expression]: 4069 if self._match(TokenType.L_PAREN): 4070 args = self._parse_csv(self._parse_conjunction) 4071 this = self.validate_expression(exp.If.from_arg_list(args), args) 4072 self._match_r_paren() 4073 else: 4074 index = self._index - 1 4075 condition = self._parse_conjunction() 4076 4077 if not condition: 4078 self._retreat(index) 4079 return None 4080 4081 self._match(TokenType.THEN) 4082 true = self._parse_conjunction() 4083 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4084 self._match(TokenType.END) 4085 this = self.expression(exp.If, this=condition, true=true, false=false) 4086 4087 return self._parse_window(this) 4088 4089 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4090 if not self._match_text_seq("VALUE", "FOR"): 4091 self._retreat(self._index - 1) 4092 return None 4093 4094 return self.expression( 4095 exp.NextValueFor, 4096 this=self._parse_column(), 4097 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4098 ) 4099 4100 def _parse_extract(self) -> exp.Extract: 4101 this = self._parse_function() or self._parse_var() or self._parse_type() 4102 4103 if self._match(TokenType.FROM): 4104 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4105 4106 if not self._match(TokenType.COMMA): 4107 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4108 4109 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4110 4111 def _parse_any_value(self) -> exp.AnyValue: 4112 this = self._parse_lambda() 4113 is_max = None 4114 having = None 4115 4116 if self._match(TokenType.HAVING): 4117 self._match_texts(("MAX", "MIN")) 4118 is_max = self._prev.text == "MAX" 4119 having = self._parse_column() 4120 4121 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4122 4123 def _parse_cast(self, strict: bool) -> exp.Expression: 4124 this = self._parse_conjunction() 4125 4126 if not self._match(TokenType.ALIAS): 4127 if self._match(TokenType.COMMA): 4128 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4129 4130 self.raise_error("Expected AS after CAST") 4131 4132 fmt = None 4133 to = self._parse_types() 4134 4135 if not to: 4136 self.raise_error("Expected TYPE after CAST") 4137 elif isinstance(to, exp.Identifier): 4138 to = exp.DataType.build(to.name, udt=True) 4139 elif to.this == exp.DataType.Type.CHAR: 4140 if self._match(TokenType.CHARACTER_SET): 4141 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4142 elif self._match(TokenType.FORMAT): 4143 fmt_string = self._parse_string() 4144 fmt = self._parse_at_time_zone(fmt_string) 4145 4146 if to.this in exp.DataType.TEMPORAL_TYPES: 4147 this = self.expression( 4148 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4149 this=this, 4150 format=exp.Literal.string( 4151 format_time( 4152 fmt_string.this if fmt_string else "", 4153 self.FORMAT_MAPPING or self.TIME_MAPPING, 4154 self.FORMAT_TRIE or self.TIME_TRIE, 4155 ) 4156 ), 4157 ) 4158 4159 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4160 this.set("zone", fmt.args["zone"]) 4161 4162 return this 4163 4164 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 4165 4166 def _parse_concat(self) -> t.Optional[exp.Expression]: 4167 args = self._parse_csv(self._parse_conjunction) 4168 if self.CONCAT_NULL_OUTPUTS_STRING: 4169 args = self._ensure_string_if_null(args) 4170 4171 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4172 # we find such a call we replace it with its argument. 4173 if len(args) == 1: 4174 return args[0] 4175 4176 return self.expression( 4177 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4178 ) 4179 4180 def _parse_concat_ws(self) -> t.Optional[exp.Expression]: 4181 args = self._parse_csv(self._parse_conjunction) 4182 if len(args) < 2: 4183 return self.expression(exp.ConcatWs, expressions=args) 4184 delim, *values = args 4185 if self.CONCAT_NULL_OUTPUTS_STRING: 4186 values = self._ensure_string_if_null(values) 4187 4188 return self.expression(exp.ConcatWs, expressions=[delim] + values) 4189 4190 def _parse_string_agg(self) -> exp.Expression: 4191 if self._match(TokenType.DISTINCT): 4192 args: t.List[t.Optional[exp.Expression]] = [ 4193 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4194 ] 4195 if self._match(TokenType.COMMA): 4196 args.extend(self._parse_csv(self._parse_conjunction)) 4197 else: 4198 args = self._parse_csv(self._parse_conjunction) # type: ignore 4199 4200 index = self._index 4201 if not self._match(TokenType.R_PAREN) and args: 4202 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4203 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4204 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4205 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4206 4207 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4208 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4209 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4210 if not self._match_text_seq("WITHIN", "GROUP"): 4211 self._retreat(index) 4212 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4213 4214 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4215 order = self._parse_order(this=seq_get(args, 0)) 4216 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4217 4218 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 4219 this = self._parse_bitwise() 4220 4221 if self._match(TokenType.USING): 4222 to: t.Optional[exp.Expression] = self.expression( 4223 exp.CharacterSet, this=self._parse_var() 4224 ) 4225 elif self._match(TokenType.COMMA): 4226 to = self._parse_types() 4227 else: 4228 to = None 4229 4230 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 4231 4232 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4233 """ 4234 There are generally two variants of the DECODE function: 4235 4236 - DECODE(bin, charset) 4237 - DECODE(expression, search, result [, search, result] ... [, default]) 4238 4239 The second variant will always be parsed into a CASE expression. Note that NULL 4240 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4241 instead of relying on pattern matching. 4242 """ 4243 args = self._parse_csv(self._parse_conjunction) 4244 4245 if len(args) < 3: 4246 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4247 4248 expression, *expressions = args 4249 if not expression: 4250 return None 4251 4252 ifs = [] 4253 for search, result in zip(expressions[::2], expressions[1::2]): 4254 if not search or not result: 4255 return None 4256 4257 if isinstance(search, exp.Literal): 4258 ifs.append( 4259 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4260 ) 4261 elif isinstance(search, exp.Null): 4262 ifs.append( 4263 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4264 ) 4265 else: 4266 cond = exp.or_( 4267 exp.EQ(this=expression.copy(), expression=search), 4268 exp.and_( 4269 exp.Is(this=expression.copy(), expression=exp.Null()), 4270 exp.Is(this=search.copy(), expression=exp.Null()), 4271 copy=False, 4272 ), 4273 copy=False, 4274 ) 4275 ifs.append(exp.If(this=cond, true=result)) 4276 4277 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4278 4279 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4280 self._match_text_seq("KEY") 4281 key = self._parse_column() 4282 self._match_set((TokenType.COLON, TokenType.COMMA)) 4283 self._match_text_seq("VALUE") 4284 value = self._parse_bitwise() 4285 4286 if not key and not value: 4287 return None 4288 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4289 4290 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4291 if not this or not self._match_text_seq("FORMAT", "JSON"): 4292 return this 4293 4294 return self.expression(exp.FormatJson, this=this) 4295 4296 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4297 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4298 for value in values: 4299 if self._match_text_seq(value, "ON", on): 4300 return f"{value} ON {on}" 4301 4302 return None 4303 4304 def _parse_json_object(self) -> exp.JSONObject: 4305 star = self._parse_star() 4306 expressions = ( 4307 [star] 4308 if star 4309 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4310 ) 4311 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4312 4313 unique_keys = None 4314 if self._match_text_seq("WITH", "UNIQUE"): 4315 unique_keys = True 4316 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4317 unique_keys = False 4318 4319 self._match_text_seq("KEYS") 4320 4321 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4322 self._parse_type() 4323 ) 4324 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4325 4326 return self.expression( 4327 exp.JSONObject, 4328 expressions=expressions, 4329 null_handling=null_handling, 4330 unique_keys=unique_keys, 4331 return_type=return_type, 4332 encoding=encoding, 4333 ) 4334 4335 def _parse_logarithm(self) -> exp.Func: 4336 # Default argument order is base, expression 4337 args = self._parse_csv(self._parse_range) 4338 4339 if len(args) > 1: 4340 if not self.LOG_BASE_FIRST: 4341 args.reverse() 4342 return exp.Log.from_arg_list(args) 4343 4344 return self.expression( 4345 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4346 ) 4347 4348 def _parse_match_against(self) -> exp.MatchAgainst: 4349 expressions = self._parse_csv(self._parse_column) 4350 4351 self._match_text_seq(")", "AGAINST", "(") 4352 4353 this = self._parse_string() 4354 4355 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4356 modifier = "IN NATURAL LANGUAGE MODE" 4357 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4358 modifier = f"{modifier} WITH QUERY EXPANSION" 4359 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4360 modifier = "IN BOOLEAN MODE" 4361 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4362 modifier = "WITH QUERY EXPANSION" 4363 else: 4364 modifier = None 4365 4366 return self.expression( 4367 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4368 ) 4369 4370 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4371 def _parse_open_json(self) -> exp.OpenJSON: 4372 this = self._parse_bitwise() 4373 path = self._match(TokenType.COMMA) and self._parse_string() 4374 4375 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4376 this = self._parse_field(any_token=True) 4377 kind = self._parse_types() 4378 path = self._parse_string() 4379 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4380 4381 return self.expression( 4382 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4383 ) 4384 4385 expressions = None 4386 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4387 self._match_l_paren() 4388 expressions = self._parse_csv(_parse_open_json_column_def) 4389 4390 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4391 4392 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4393 args = self._parse_csv(self._parse_bitwise) 4394 4395 if self._match(TokenType.IN): 4396 return self.expression( 4397 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4398 ) 4399 4400 if haystack_first: 4401 haystack = seq_get(args, 0) 4402 needle = seq_get(args, 1) 4403 else: 4404 needle = seq_get(args, 0) 4405 haystack = seq_get(args, 1) 4406 4407 return self.expression( 4408 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4409 ) 4410 4411 def _parse_predict(self) -> exp.Predict: 4412 self._match_text_seq("MODEL") 4413 this = self._parse_table() 4414 4415 self._match(TokenType.COMMA) 4416 self._match_text_seq("TABLE") 4417 4418 return self.expression( 4419 exp.Predict, 4420 this=this, 4421 expression=self._parse_table(), 4422 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4423 ) 4424 4425 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4426 args = self._parse_csv(self._parse_table) 4427 return exp.JoinHint(this=func_name.upper(), expressions=args) 4428 4429 def _parse_substring(self) -> exp.Substring: 4430 # Postgres supports the form: substring(string [from int] [for int]) 4431 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4432 4433 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4434 4435 if self._match(TokenType.FROM): 4436 args.append(self._parse_bitwise()) 4437 if self._match(TokenType.FOR): 4438 args.append(self._parse_bitwise()) 4439 4440 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4441 4442 def _parse_trim(self) -> exp.Trim: 4443 # https://www.w3resource.com/sql/character-functions/trim.php 4444 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4445 4446 position = None 4447 collation = None 4448 expression = None 4449 4450 if self._match_texts(self.TRIM_TYPES): 4451 position = self._prev.text.upper() 4452 4453 this = self._parse_bitwise() 4454 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4455 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4456 expression = self._parse_bitwise() 4457 4458 if invert_order: 4459 this, expression = expression, this 4460 4461 if self._match(TokenType.COLLATE): 4462 collation = self._parse_bitwise() 4463 4464 return self.expression( 4465 exp.Trim, this=this, position=position, expression=expression, collation=collation 4466 ) 4467 4468 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4469 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4470 4471 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4472 return self._parse_window(self._parse_id_var(), alias=True) 4473 4474 def _parse_respect_or_ignore_nulls( 4475 self, this: t.Optional[exp.Expression] 4476 ) -> t.Optional[exp.Expression]: 4477 if self._match_text_seq("IGNORE", "NULLS"): 4478 return self.expression(exp.IgnoreNulls, this=this) 4479 if self._match_text_seq("RESPECT", "NULLS"): 4480 return self.expression(exp.RespectNulls, this=this) 4481 return this 4482 4483 def _parse_window( 4484 self, this: t.Optional[exp.Expression], alias: bool = False 4485 ) -> t.Optional[exp.Expression]: 4486 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4487 self._match(TokenType.WHERE) 4488 this = self.expression( 4489 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4490 ) 4491 self._match_r_paren() 4492 4493 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4494 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4495 if self._match_text_seq("WITHIN", "GROUP"): 4496 order = self._parse_wrapped(self._parse_order) 4497 this = self.expression(exp.WithinGroup, this=this, expression=order) 4498 4499 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4500 # Some dialects choose to implement and some do not. 4501 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4502 4503 # There is some code above in _parse_lambda that handles 4504 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4505 4506 # The below changes handle 4507 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4508 4509 # Oracle allows both formats 4510 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4511 # and Snowflake chose to do the same for familiarity 4512 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4513 this = self._parse_respect_or_ignore_nulls(this) 4514 4515 # bigquery select from window x AS (partition by ...) 4516 if alias: 4517 over = None 4518 self._match(TokenType.ALIAS) 4519 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4520 return this 4521 else: 4522 over = self._prev.text.upper() 4523 4524 if not self._match(TokenType.L_PAREN): 4525 return self.expression( 4526 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4527 ) 4528 4529 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4530 4531 first = self._match(TokenType.FIRST) 4532 if self._match_text_seq("LAST"): 4533 first = False 4534 4535 partition, order = self._parse_partition_and_order() 4536 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4537 4538 if kind: 4539 self._match(TokenType.BETWEEN) 4540 start = self._parse_window_spec() 4541 self._match(TokenType.AND) 4542 end = self._parse_window_spec() 4543 4544 spec = self.expression( 4545 exp.WindowSpec, 4546 kind=kind, 4547 start=start["value"], 4548 start_side=start["side"], 4549 end=end["value"], 4550 end_side=end["side"], 4551 ) 4552 else: 4553 spec = None 4554 4555 self._match_r_paren() 4556 4557 window = self.expression( 4558 exp.Window, 4559 this=this, 4560 partition_by=partition, 4561 order=order, 4562 spec=spec, 4563 alias=window_alias, 4564 over=over, 4565 first=first, 4566 ) 4567 4568 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4569 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4570 return self._parse_window(window, alias=alias) 4571 4572 return window 4573 4574 def _parse_partition_and_order( 4575 self, 4576 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4577 return self._parse_partition_by(), self._parse_order() 4578 4579 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4580 self._match(TokenType.BETWEEN) 4581 4582 return { 4583 "value": ( 4584 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4585 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4586 or self._parse_bitwise() 4587 ), 4588 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4589 } 4590 4591 def _parse_alias( 4592 self, this: t.Optional[exp.Expression], explicit: bool = False 4593 ) -> t.Optional[exp.Expression]: 4594 any_token = self._match(TokenType.ALIAS) 4595 4596 if explicit and not any_token: 4597 return this 4598 4599 if self._match(TokenType.L_PAREN): 4600 aliases = self.expression( 4601 exp.Aliases, 4602 this=this, 4603 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4604 ) 4605 self._match_r_paren(aliases) 4606 return aliases 4607 4608 alias = self._parse_id_var(any_token) 4609 4610 if alias: 4611 return self.expression(exp.Alias, this=this, alias=alias) 4612 4613 return this 4614 4615 def _parse_id_var( 4616 self, 4617 any_token: bool = True, 4618 tokens: t.Optional[t.Collection[TokenType]] = None, 4619 ) -> t.Optional[exp.Expression]: 4620 identifier = self._parse_identifier() 4621 4622 if identifier: 4623 return identifier 4624 4625 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4626 quoted = self._prev.token_type == TokenType.STRING 4627 return exp.Identifier(this=self._prev.text, quoted=quoted) 4628 4629 return None 4630 4631 def _parse_string(self) -> t.Optional[exp.Expression]: 4632 if self._match(TokenType.STRING): 4633 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4634 return self._parse_placeholder() 4635 4636 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4637 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4638 4639 def _parse_number(self) -> t.Optional[exp.Expression]: 4640 if self._match(TokenType.NUMBER): 4641 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4642 return self._parse_placeholder() 4643 4644 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4645 if self._match(TokenType.IDENTIFIER): 4646 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4647 return self._parse_placeholder() 4648 4649 def _parse_var( 4650 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4651 ) -> t.Optional[exp.Expression]: 4652 if ( 4653 (any_token and self._advance_any()) 4654 or self._match(TokenType.VAR) 4655 or (self._match_set(tokens) if tokens else False) 4656 ): 4657 return self.expression(exp.Var, this=self._prev.text) 4658 return self._parse_placeholder() 4659 4660 def _advance_any(self) -> t.Optional[Token]: 4661 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4662 self._advance() 4663 return self._prev 4664 return None 4665 4666 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4667 return self._parse_var() or self._parse_string() 4668 4669 def _parse_null(self) -> t.Optional[exp.Expression]: 4670 if self._match_set(self.NULL_TOKENS): 4671 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4672 return self._parse_placeholder() 4673 4674 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4675 if self._match(TokenType.TRUE): 4676 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4677 if self._match(TokenType.FALSE): 4678 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4679 return self._parse_placeholder() 4680 4681 def _parse_star(self) -> t.Optional[exp.Expression]: 4682 if self._match(TokenType.STAR): 4683 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4684 return self._parse_placeholder() 4685 4686 def _parse_parameter(self) -> exp.Parameter: 4687 wrapped = self._match(TokenType.L_BRACE) 4688 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4689 self._match(TokenType.R_BRACE) 4690 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4691 4692 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4693 if self._match_set(self.PLACEHOLDER_PARSERS): 4694 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4695 if placeholder: 4696 return placeholder 4697 self._advance(-1) 4698 return None 4699 4700 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4701 if not self._match(TokenType.EXCEPT): 4702 return None 4703 if self._match(TokenType.L_PAREN, advance=False): 4704 return self._parse_wrapped_csv(self._parse_column) 4705 4706 except_column = self._parse_column() 4707 return [except_column] if except_column else None 4708 4709 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4710 if not self._match(TokenType.REPLACE): 4711 return None 4712 if self._match(TokenType.L_PAREN, advance=False): 4713 return self._parse_wrapped_csv(self._parse_expression) 4714 4715 replace_expression = self._parse_expression() 4716 return [replace_expression] if replace_expression else None 4717 4718 def _parse_csv( 4719 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4720 ) -> t.List[exp.Expression]: 4721 parse_result = parse_method() 4722 items = [parse_result] if parse_result is not None else [] 4723 4724 while self._match(sep): 4725 self._add_comments(parse_result) 4726 parse_result = parse_method() 4727 if parse_result is not None: 4728 items.append(parse_result) 4729 4730 return items 4731 4732 def _parse_tokens( 4733 self, parse_method: t.Callable, expressions: t.Dict 4734 ) -> t.Optional[exp.Expression]: 4735 this = parse_method() 4736 4737 while self._match_set(expressions): 4738 this = self.expression( 4739 expressions[self._prev.token_type], 4740 this=this, 4741 comments=self._prev_comments, 4742 expression=parse_method(), 4743 ) 4744 4745 return this 4746 4747 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4748 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4749 4750 def _parse_wrapped_csv( 4751 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4752 ) -> t.List[exp.Expression]: 4753 return self._parse_wrapped( 4754 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4755 ) 4756 4757 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4758 wrapped = self._match(TokenType.L_PAREN) 4759 if not wrapped and not optional: 4760 self.raise_error("Expecting (") 4761 parse_result = parse_method() 4762 if wrapped: 4763 self._match_r_paren() 4764 return parse_result 4765 4766 def _parse_expressions(self) -> t.List[exp.Expression]: 4767 return self._parse_csv(self._parse_expression) 4768 4769 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4770 return self._parse_select() or self._parse_set_operations( 4771 self._parse_expression() if alias else self._parse_conjunction() 4772 ) 4773 4774 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4775 return self._parse_query_modifiers( 4776 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4777 ) 4778 4779 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4780 this = None 4781 if self._match_texts(self.TRANSACTION_KIND): 4782 this = self._prev.text 4783 4784 self._match_texts({"TRANSACTION", "WORK"}) 4785 4786 modes = [] 4787 while True: 4788 mode = [] 4789 while self._match(TokenType.VAR): 4790 mode.append(self._prev.text) 4791 4792 if mode: 4793 modes.append(" ".join(mode)) 4794 if not self._match(TokenType.COMMA): 4795 break 4796 4797 return self.expression(exp.Transaction, this=this, modes=modes) 4798 4799 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4800 chain = None 4801 savepoint = None 4802 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4803 4804 self._match_texts({"TRANSACTION", "WORK"}) 4805 4806 if self._match_text_seq("TO"): 4807 self._match_text_seq("SAVEPOINT") 4808 savepoint = self._parse_id_var() 4809 4810 if self._match(TokenType.AND): 4811 chain = not self._match_text_seq("NO") 4812 self._match_text_seq("CHAIN") 4813 4814 if is_rollback: 4815 return self.expression(exp.Rollback, savepoint=savepoint) 4816 4817 return self.expression(exp.Commit, chain=chain) 4818 4819 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4820 if not self._match_text_seq("ADD"): 4821 return None 4822 4823 self._match(TokenType.COLUMN) 4824 exists_column = self._parse_exists(not_=True) 4825 expression = self._parse_field_def() 4826 4827 if expression: 4828 expression.set("exists", exists_column) 4829 4830 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4831 if self._match_texts(("FIRST", "AFTER")): 4832 position = self._prev.text 4833 column_position = self.expression( 4834 exp.ColumnPosition, this=self._parse_column(), position=position 4835 ) 4836 expression.set("position", column_position) 4837 4838 return expression 4839 4840 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4841 drop = self._match(TokenType.DROP) and self._parse_drop() 4842 if drop and not isinstance(drop, exp.Command): 4843 drop.set("kind", drop.args.get("kind", "COLUMN")) 4844 return drop 4845 4846 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4847 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4848 return self.expression( 4849 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4850 ) 4851 4852 def _parse_add_constraint(self) -> exp.AddConstraint: 4853 this = None 4854 kind = self._prev.token_type 4855 4856 if kind == TokenType.CONSTRAINT: 4857 this = self._parse_id_var() 4858 4859 if self._match_text_seq("CHECK"): 4860 expression = self._parse_wrapped(self._parse_conjunction) 4861 enforced = self._match_text_seq("ENFORCED") 4862 4863 return self.expression( 4864 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4865 ) 4866 4867 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4868 expression = self._parse_foreign_key() 4869 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4870 expression = self._parse_primary_key() 4871 else: 4872 expression = None 4873 4874 return self.expression(exp.AddConstraint, this=this, expression=expression) 4875 4876 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4877 index = self._index - 1 4878 4879 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4880 return self._parse_csv(self._parse_add_constraint) 4881 4882 self._retreat(index) 4883 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 4884 return self._parse_csv(self._parse_field_def) 4885 4886 return self._parse_csv(self._parse_add_column) 4887 4888 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4889 self._match(TokenType.COLUMN) 4890 column = self._parse_field(any_token=True) 4891 4892 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4893 return self.expression(exp.AlterColumn, this=column, drop=True) 4894 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4895 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4896 4897 self._match_text_seq("SET", "DATA") 4898 return self.expression( 4899 exp.AlterColumn, 4900 this=column, 4901 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4902 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4903 using=self._match(TokenType.USING) and self._parse_conjunction(), 4904 ) 4905 4906 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 4907 index = self._index - 1 4908 4909 partition_exists = self._parse_exists() 4910 if self._match(TokenType.PARTITION, advance=False): 4911 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4912 4913 self._retreat(index) 4914 return self._parse_csv(self._parse_drop_column) 4915 4916 def _parse_alter_table_rename(self) -> exp.RenameTable: 4917 self._match_text_seq("TO") 4918 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4919 4920 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4921 start = self._prev 4922 4923 if not self._match(TokenType.TABLE): 4924 return self._parse_as_command(start) 4925 4926 exists = self._parse_exists() 4927 only = self._match_text_seq("ONLY") 4928 this = self._parse_table(schema=True) 4929 4930 if self._next: 4931 self._advance() 4932 4933 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4934 if parser: 4935 actions = ensure_list(parser(self)) 4936 4937 if not self._curr: 4938 return self.expression( 4939 exp.AlterTable, 4940 this=this, 4941 exists=exists, 4942 actions=actions, 4943 only=only, 4944 ) 4945 4946 return self._parse_as_command(start) 4947 4948 def _parse_merge(self) -> exp.Merge: 4949 self._match(TokenType.INTO) 4950 target = self._parse_table() 4951 4952 if target and self._match(TokenType.ALIAS, advance=False): 4953 target.set("alias", self._parse_table_alias()) 4954 4955 self._match(TokenType.USING) 4956 using = self._parse_table() 4957 4958 self._match(TokenType.ON) 4959 on = self._parse_conjunction() 4960 4961 whens = [] 4962 while self._match(TokenType.WHEN): 4963 matched = not self._match(TokenType.NOT) 4964 self._match_text_seq("MATCHED") 4965 source = ( 4966 False 4967 if self._match_text_seq("BY", "TARGET") 4968 else self._match_text_seq("BY", "SOURCE") 4969 ) 4970 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4971 4972 self._match(TokenType.THEN) 4973 4974 if self._match(TokenType.INSERT): 4975 _this = self._parse_star() 4976 if _this: 4977 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4978 else: 4979 then = self.expression( 4980 exp.Insert, 4981 this=self._parse_value(), 4982 expression=self._match(TokenType.VALUES) and self._parse_value(), 4983 ) 4984 elif self._match(TokenType.UPDATE): 4985 expressions = self._parse_star() 4986 if expressions: 4987 then = self.expression(exp.Update, expressions=expressions) 4988 else: 4989 then = self.expression( 4990 exp.Update, 4991 expressions=self._match(TokenType.SET) 4992 and self._parse_csv(self._parse_equality), 4993 ) 4994 elif self._match(TokenType.DELETE): 4995 then = self.expression(exp.Var, this=self._prev.text) 4996 else: 4997 then = None 4998 4999 whens.append( 5000 self.expression( 5001 exp.When, 5002 matched=matched, 5003 source=source, 5004 condition=condition, 5005 then=then, 5006 ) 5007 ) 5008 5009 return self.expression( 5010 exp.Merge, 5011 this=target, 5012 using=using, 5013 on=on, 5014 expressions=whens, 5015 ) 5016 5017 def _parse_show(self) -> t.Optional[exp.Expression]: 5018 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5019 if parser: 5020 return parser(self) 5021 return self._parse_as_command(self._prev) 5022 5023 def _parse_set_item_assignment( 5024 self, kind: t.Optional[str] = None 5025 ) -> t.Optional[exp.Expression]: 5026 index = self._index 5027 5028 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 5029 return self._parse_set_transaction(global_=kind == "GLOBAL") 5030 5031 left = self._parse_primary() or self._parse_id_var() 5032 assignment_delimiter = self._match_texts(("=", "TO")) 5033 5034 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5035 self._retreat(index) 5036 return None 5037 5038 right = self._parse_statement() or self._parse_id_var() 5039 this = self.expression(exp.EQ, this=left, expression=right) 5040 5041 return self.expression(exp.SetItem, this=this, kind=kind) 5042 5043 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5044 self._match_text_seq("TRANSACTION") 5045 characteristics = self._parse_csv( 5046 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5047 ) 5048 return self.expression( 5049 exp.SetItem, 5050 expressions=characteristics, 5051 kind="TRANSACTION", 5052 **{"global": global_}, # type: ignore 5053 ) 5054 5055 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5056 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5057 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5058 5059 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5060 index = self._index 5061 set_ = self.expression( 5062 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5063 ) 5064 5065 if self._curr: 5066 self._retreat(index) 5067 return self._parse_as_command(self._prev) 5068 5069 return set_ 5070 5071 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5072 for option in options: 5073 if self._match_text_seq(*option.split(" ")): 5074 return exp.var(option) 5075 return None 5076 5077 def _parse_as_command(self, start: Token) -> exp.Command: 5078 while self._curr: 5079 self._advance() 5080 text = self._find_sql(start, self._prev) 5081 size = len(start.text) 5082 return exp.Command(this=text[:size], expression=text[size:]) 5083 5084 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5085 settings = [] 5086 5087 self._match_l_paren() 5088 kind = self._parse_id_var() 5089 5090 if self._match(TokenType.L_PAREN): 5091 while True: 5092 key = self._parse_id_var() 5093 value = self._parse_primary() 5094 5095 if not key and value is None: 5096 break 5097 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5098 self._match(TokenType.R_PAREN) 5099 5100 self._match_r_paren() 5101 5102 return self.expression( 5103 exp.DictProperty, 5104 this=this, 5105 kind=kind.this if kind else None, 5106 settings=settings, 5107 ) 5108 5109 def _parse_dict_range(self, this: str) -> exp.DictRange: 5110 self._match_l_paren() 5111 has_min = self._match_text_seq("MIN") 5112 if has_min: 5113 min = self._parse_var() or self._parse_primary() 5114 self._match_text_seq("MAX") 5115 max = self._parse_var() or self._parse_primary() 5116 else: 5117 max = self._parse_var() or self._parse_primary() 5118 min = exp.Literal.number(0) 5119 self._match_r_paren() 5120 return self.expression(exp.DictRange, this=this, min=min, max=max) 5121 5122 def _parse_comprehension(self, this: exp.Expression) -> t.Optional[exp.Comprehension]: 5123 index = self._index 5124 expression = self._parse_column() 5125 if not self._match(TokenType.IN): 5126 self._retreat(index - 1) 5127 return None 5128 iterator = self._parse_column() 5129 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5130 return self.expression( 5131 exp.Comprehension, 5132 this=this, 5133 expression=expression, 5134 iterator=iterator, 5135 condition=condition, 5136 ) 5137 5138 def _find_parser( 5139 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5140 ) -> t.Optional[t.Callable]: 5141 if not self._curr: 5142 return None 5143 5144 index = self._index 5145 this = [] 5146 while True: 5147 # The current token might be multiple words 5148 curr = self._curr.text.upper() 5149 key = curr.split(" ") 5150 this.append(curr) 5151 5152 self._advance() 5153 result, trie = in_trie(trie, key) 5154 if result == TrieResult.FAILED: 5155 break 5156 5157 if result == TrieResult.EXISTS: 5158 subparser = parsers[" ".join(this)] 5159 return subparser 5160 5161 self._retreat(index) 5162 return None 5163 5164 def _match(self, token_type, advance=True, expression=None): 5165 if not self._curr: 5166 return None 5167 5168 if self._curr.token_type == token_type: 5169 if advance: 5170 self._advance() 5171 self._add_comments(expression) 5172 return True 5173 5174 return None 5175 5176 def _match_set(self, types, advance=True): 5177 if not self._curr: 5178 return None 5179 5180 if self._curr.token_type in types: 5181 if advance: 5182 self._advance() 5183 return True 5184 5185 return None 5186 5187 def _match_pair(self, token_type_a, token_type_b, advance=True): 5188 if not self._curr or not self._next: 5189 return None 5190 5191 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5192 if advance: 5193 self._advance(2) 5194 return True 5195 5196 return None 5197 5198 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5199 if not self._match(TokenType.L_PAREN, expression=expression): 5200 self.raise_error("Expecting (") 5201 5202 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5203 if not self._match(TokenType.R_PAREN, expression=expression): 5204 self.raise_error("Expecting )") 5205 5206 def _match_texts(self, texts, advance=True): 5207 if self._curr and self._curr.text.upper() in texts: 5208 if advance: 5209 self._advance() 5210 return True 5211 return False 5212 5213 def _match_text_seq(self, *texts, advance=True): 5214 index = self._index 5215 for text in texts: 5216 if self._curr and self._curr.text.upper() == text: 5217 self._advance() 5218 else: 5219 self._retreat(index) 5220 return False 5221 5222 if not advance: 5223 self._retreat(index) 5224 5225 return True 5226 5227 @t.overload 5228 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5229 ... 5230 5231 @t.overload 5232 def _replace_columns_with_dots( 5233 self, this: t.Optional[exp.Expression] 5234 ) -> t.Optional[exp.Expression]: 5235 ... 5236 5237 def _replace_columns_with_dots(self, this): 5238 if isinstance(this, exp.Dot): 5239 exp.replace_children(this, self._replace_columns_with_dots) 5240 elif isinstance(this, exp.Column): 5241 exp.replace_children(this, self._replace_columns_with_dots) 5242 table = this.args.get("table") 5243 this = ( 5244 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5245 ) 5246 5247 return this 5248 5249 def _replace_lambda( 5250 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5251 ) -> t.Optional[exp.Expression]: 5252 if not node: 5253 return node 5254 5255 for column in node.find_all(exp.Column): 5256 if column.parts[0].name in lambda_variables: 5257 dot_or_id = column.to_dot() if column.table else column.this 5258 parent = column.parent 5259 5260 while isinstance(parent, exp.Dot): 5261 if not isinstance(parent.parent, exp.Dot): 5262 parent.replace(dot_or_id) 5263 break 5264 parent = parent.parent 5265 else: 5266 if column is node: 5267 node = dot_or_id 5268 else: 5269 column.replace(dot_or_id) 5270 return node 5271 5272 def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]: 5273 return [ 5274 exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) 5275 for value in values 5276 if value 5277 ]
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 STRUCT_TYPE_TOKENS = { 107 TokenType.NESTED, 108 TokenType.STRUCT, 109 } 110 111 NESTED_TYPE_TOKENS = { 112 TokenType.ARRAY, 113 TokenType.LOWCARDINALITY, 114 TokenType.MAP, 115 TokenType.NULLABLE, 116 *STRUCT_TYPE_TOKENS, 117 } 118 119 ENUM_TYPE_TOKENS = { 120 TokenType.ENUM, 121 TokenType.ENUM8, 122 TokenType.ENUM16, 123 } 124 125 TYPE_TOKENS = { 126 TokenType.BIT, 127 TokenType.BOOLEAN, 128 TokenType.TINYINT, 129 TokenType.UTINYINT, 130 TokenType.SMALLINT, 131 TokenType.USMALLINT, 132 TokenType.INT, 133 TokenType.UINT, 134 TokenType.BIGINT, 135 TokenType.UBIGINT, 136 TokenType.INT128, 137 TokenType.UINT128, 138 TokenType.INT256, 139 TokenType.UINT256, 140 TokenType.MEDIUMINT, 141 TokenType.UMEDIUMINT, 142 TokenType.FIXEDSTRING, 143 TokenType.FLOAT, 144 TokenType.DOUBLE, 145 TokenType.CHAR, 146 TokenType.NCHAR, 147 TokenType.VARCHAR, 148 TokenType.NVARCHAR, 149 TokenType.TEXT, 150 TokenType.MEDIUMTEXT, 151 TokenType.LONGTEXT, 152 TokenType.MEDIUMBLOB, 153 TokenType.LONGBLOB, 154 TokenType.BINARY, 155 TokenType.VARBINARY, 156 TokenType.JSON, 157 TokenType.JSONB, 158 TokenType.INTERVAL, 159 TokenType.TINYBLOB, 160 TokenType.TINYTEXT, 161 TokenType.TIME, 162 TokenType.TIMETZ, 163 TokenType.TIMESTAMP, 164 TokenType.TIMESTAMPTZ, 165 TokenType.TIMESTAMPLTZ, 166 TokenType.DATETIME, 167 TokenType.DATETIME64, 168 TokenType.DATE, 169 TokenType.INT4RANGE, 170 TokenType.INT4MULTIRANGE, 171 TokenType.INT8RANGE, 172 TokenType.INT8MULTIRANGE, 173 TokenType.NUMRANGE, 174 TokenType.NUMMULTIRANGE, 175 TokenType.TSRANGE, 176 TokenType.TSMULTIRANGE, 177 TokenType.TSTZRANGE, 178 TokenType.TSTZMULTIRANGE, 179 TokenType.DATERANGE, 180 TokenType.DATEMULTIRANGE, 181 TokenType.DECIMAL, 182 TokenType.UDECIMAL, 183 TokenType.BIGDECIMAL, 184 TokenType.UUID, 185 TokenType.GEOGRAPHY, 186 TokenType.GEOMETRY, 187 TokenType.HLLSKETCH, 188 TokenType.HSTORE, 189 TokenType.PSEUDO_TYPE, 190 TokenType.SUPER, 191 TokenType.SERIAL, 192 TokenType.SMALLSERIAL, 193 TokenType.BIGSERIAL, 194 TokenType.XML, 195 TokenType.YEAR, 196 TokenType.UNIQUEIDENTIFIER, 197 TokenType.USERDEFINED, 198 TokenType.MONEY, 199 TokenType.SMALLMONEY, 200 TokenType.ROWVERSION, 201 TokenType.IMAGE, 202 TokenType.VARIANT, 203 TokenType.OBJECT, 204 TokenType.OBJECT_IDENTIFIER, 205 TokenType.INET, 206 TokenType.IPADDRESS, 207 TokenType.IPPREFIX, 208 TokenType.UNKNOWN, 209 TokenType.NULL, 210 *ENUM_TYPE_TOKENS, 211 *NESTED_TYPE_TOKENS, 212 } 213 214 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 215 TokenType.BIGINT: TokenType.UBIGINT, 216 TokenType.INT: TokenType.UINT, 217 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 218 TokenType.SMALLINT: TokenType.USMALLINT, 219 TokenType.TINYINT: TokenType.UTINYINT, 220 TokenType.DECIMAL: TokenType.UDECIMAL, 221 } 222 223 SUBQUERY_PREDICATES = { 224 TokenType.ANY: exp.Any, 225 TokenType.ALL: exp.All, 226 TokenType.EXISTS: exp.Exists, 227 TokenType.SOME: exp.Any, 228 } 229 230 RESERVED_KEYWORDS = { 231 *Tokenizer.SINGLE_TOKENS.values(), 232 TokenType.SELECT, 233 } 234 235 DB_CREATABLES = { 236 TokenType.DATABASE, 237 TokenType.SCHEMA, 238 TokenType.TABLE, 239 TokenType.VIEW, 240 TokenType.DICTIONARY, 241 } 242 243 CREATABLES = { 244 TokenType.COLUMN, 245 TokenType.FUNCTION, 246 TokenType.INDEX, 247 TokenType.PROCEDURE, 248 *DB_CREATABLES, 249 } 250 251 # Tokens that can represent identifiers 252 ID_VAR_TOKENS = { 253 TokenType.VAR, 254 TokenType.ANTI, 255 TokenType.APPLY, 256 TokenType.ASC, 257 TokenType.AUTO_INCREMENT, 258 TokenType.BEGIN, 259 TokenType.CACHE, 260 TokenType.CASE, 261 TokenType.COLLATE, 262 TokenType.COMMAND, 263 TokenType.COMMENT, 264 TokenType.COMMIT, 265 TokenType.CONSTRAINT, 266 TokenType.DEFAULT, 267 TokenType.DELETE, 268 TokenType.DESC, 269 TokenType.DESCRIBE, 270 TokenType.DICTIONARY, 271 TokenType.DIV, 272 TokenType.END, 273 TokenType.EXECUTE, 274 TokenType.ESCAPE, 275 TokenType.FALSE, 276 TokenType.FIRST, 277 TokenType.FILTER, 278 TokenType.FORMAT, 279 TokenType.FULL, 280 TokenType.IS, 281 TokenType.ISNULL, 282 TokenType.INTERVAL, 283 TokenType.KEEP, 284 TokenType.KILL, 285 TokenType.LEFT, 286 TokenType.LOAD, 287 TokenType.MERGE, 288 TokenType.NATURAL, 289 TokenType.NEXT, 290 TokenType.OFFSET, 291 TokenType.ORDINALITY, 292 TokenType.OVERLAPS, 293 TokenType.OVERWRITE, 294 TokenType.PARTITION, 295 TokenType.PERCENT, 296 TokenType.PIVOT, 297 TokenType.PRAGMA, 298 TokenType.RANGE, 299 TokenType.REFERENCES, 300 TokenType.RIGHT, 301 TokenType.ROW, 302 TokenType.ROWS, 303 TokenType.SEMI, 304 TokenType.SET, 305 TokenType.SETTINGS, 306 TokenType.SHOW, 307 TokenType.TEMPORARY, 308 TokenType.TOP, 309 TokenType.TRUE, 310 TokenType.UNIQUE, 311 TokenType.UNPIVOT, 312 TokenType.UPDATE, 313 TokenType.VOLATILE, 314 TokenType.WINDOW, 315 *CREATABLES, 316 *SUBQUERY_PREDICATES, 317 *TYPE_TOKENS, 318 *NO_PAREN_FUNCTIONS, 319 } 320 321 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 322 323 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 324 TokenType.ANTI, 325 TokenType.APPLY, 326 TokenType.ASOF, 327 TokenType.FULL, 328 TokenType.LEFT, 329 TokenType.LOCK, 330 TokenType.NATURAL, 331 TokenType.OFFSET, 332 TokenType.RIGHT, 333 TokenType.SEMI, 334 TokenType.WINDOW, 335 } 336 337 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 338 339 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 340 341 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 342 343 FUNC_TOKENS = { 344 TokenType.COLLATE, 345 TokenType.COMMAND, 346 TokenType.CURRENT_DATE, 347 TokenType.CURRENT_DATETIME, 348 TokenType.CURRENT_TIMESTAMP, 349 TokenType.CURRENT_TIME, 350 TokenType.CURRENT_USER, 351 TokenType.FILTER, 352 TokenType.FIRST, 353 TokenType.FORMAT, 354 TokenType.GLOB, 355 TokenType.IDENTIFIER, 356 TokenType.INDEX, 357 TokenType.ISNULL, 358 TokenType.ILIKE, 359 TokenType.INSERT, 360 TokenType.LIKE, 361 TokenType.MERGE, 362 TokenType.OFFSET, 363 TokenType.PRIMARY_KEY, 364 TokenType.RANGE, 365 TokenType.REPLACE, 366 TokenType.RLIKE, 367 TokenType.ROW, 368 TokenType.UNNEST, 369 TokenType.VAR, 370 TokenType.LEFT, 371 TokenType.RIGHT, 372 TokenType.DATE, 373 TokenType.DATETIME, 374 TokenType.TABLE, 375 TokenType.TIMESTAMP, 376 TokenType.TIMESTAMPTZ, 377 TokenType.WINDOW, 378 TokenType.XOR, 379 *TYPE_TOKENS, 380 *SUBQUERY_PREDICATES, 381 } 382 383 CONJUNCTION = { 384 TokenType.AND: exp.And, 385 TokenType.OR: exp.Or, 386 } 387 388 EQUALITY = { 389 TokenType.EQ: exp.EQ, 390 TokenType.NEQ: exp.NEQ, 391 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 392 } 393 394 COMPARISON = { 395 TokenType.GT: exp.GT, 396 TokenType.GTE: exp.GTE, 397 TokenType.LT: exp.LT, 398 TokenType.LTE: exp.LTE, 399 } 400 401 BITWISE = { 402 TokenType.AMP: exp.BitwiseAnd, 403 TokenType.CARET: exp.BitwiseXor, 404 TokenType.PIPE: exp.BitwiseOr, 405 TokenType.DPIPE: exp.DPipe, 406 } 407 408 TERM = { 409 TokenType.DASH: exp.Sub, 410 TokenType.PLUS: exp.Add, 411 TokenType.MOD: exp.Mod, 412 TokenType.COLLATE: exp.Collate, 413 } 414 415 FACTOR = { 416 TokenType.DIV: exp.IntDiv, 417 TokenType.LR_ARROW: exp.Distance, 418 TokenType.SLASH: exp.Div, 419 TokenType.STAR: exp.Mul, 420 } 421 422 TIMES = { 423 TokenType.TIME, 424 TokenType.TIMETZ, 425 } 426 427 TIMESTAMPS = { 428 TokenType.TIMESTAMP, 429 TokenType.TIMESTAMPTZ, 430 TokenType.TIMESTAMPLTZ, 431 *TIMES, 432 } 433 434 SET_OPERATIONS = { 435 TokenType.UNION, 436 TokenType.INTERSECT, 437 TokenType.EXCEPT, 438 } 439 440 JOIN_METHODS = { 441 TokenType.NATURAL, 442 TokenType.ASOF, 443 } 444 445 JOIN_SIDES = { 446 TokenType.LEFT, 447 TokenType.RIGHT, 448 TokenType.FULL, 449 } 450 451 JOIN_KINDS = { 452 TokenType.INNER, 453 TokenType.OUTER, 454 TokenType.CROSS, 455 TokenType.SEMI, 456 TokenType.ANTI, 457 } 458 459 JOIN_HINTS: t.Set[str] = set() 460 461 LAMBDAS = { 462 TokenType.ARROW: lambda self, expressions: self.expression( 463 exp.Lambda, 464 this=self._replace_lambda( 465 self._parse_conjunction(), 466 {node.name for node in expressions}, 467 ), 468 expressions=expressions, 469 ), 470 TokenType.FARROW: lambda self, expressions: self.expression( 471 exp.Kwarg, 472 this=exp.var(expressions[0].name), 473 expression=self._parse_conjunction(), 474 ), 475 } 476 477 COLUMN_OPERATORS = { 478 TokenType.DOT: None, 479 TokenType.DCOLON: lambda self, this, to: self.expression( 480 exp.Cast if self.STRICT_CAST else exp.TryCast, 481 this=this, 482 to=to, 483 ), 484 TokenType.ARROW: lambda self, this, path: self.expression( 485 exp.JSONExtract, 486 this=this, 487 expression=path, 488 ), 489 TokenType.DARROW: lambda self, this, path: self.expression( 490 exp.JSONExtractScalar, 491 this=this, 492 expression=path, 493 ), 494 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 495 exp.JSONBExtract, 496 this=this, 497 expression=path, 498 ), 499 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 500 exp.JSONBExtractScalar, 501 this=this, 502 expression=path, 503 ), 504 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 505 exp.JSONBContains, 506 this=this, 507 expression=key, 508 ), 509 } 510 511 EXPRESSION_PARSERS = { 512 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 513 exp.Column: lambda self: self._parse_column(), 514 exp.Condition: lambda self: self._parse_conjunction(), 515 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 516 exp.Expression: lambda self: self._parse_statement(), 517 exp.From: lambda self: self._parse_from(), 518 exp.Group: lambda self: self._parse_group(), 519 exp.Having: lambda self: self._parse_having(), 520 exp.Identifier: lambda self: self._parse_id_var(), 521 exp.Join: lambda self: self._parse_join(), 522 exp.Lambda: lambda self: self._parse_lambda(), 523 exp.Lateral: lambda self: self._parse_lateral(), 524 exp.Limit: lambda self: self._parse_limit(), 525 exp.Offset: lambda self: self._parse_offset(), 526 exp.Order: lambda self: self._parse_order(), 527 exp.Ordered: lambda self: self._parse_ordered(), 528 exp.Properties: lambda self: self._parse_properties(), 529 exp.Qualify: lambda self: self._parse_qualify(), 530 exp.Returning: lambda self: self._parse_returning(), 531 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 532 exp.Table: lambda self: self._parse_table_parts(), 533 exp.TableAlias: lambda self: self._parse_table_alias(), 534 exp.Where: lambda self: self._parse_where(), 535 exp.Window: lambda self: self._parse_named_window(), 536 exp.With: lambda self: self._parse_with(), 537 "JOIN_TYPE": lambda self: self._parse_join_parts(), 538 } 539 540 STATEMENT_PARSERS = { 541 TokenType.ALTER: lambda self: self._parse_alter(), 542 TokenType.BEGIN: lambda self: self._parse_transaction(), 543 TokenType.CACHE: lambda self: self._parse_cache(), 544 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 545 TokenType.COMMENT: lambda self: self._parse_comment(), 546 TokenType.CREATE: lambda self: self._parse_create(), 547 TokenType.DELETE: lambda self: self._parse_delete(), 548 TokenType.DESC: lambda self: self._parse_describe(), 549 TokenType.DESCRIBE: lambda self: self._parse_describe(), 550 TokenType.DROP: lambda self: self._parse_drop(), 551 TokenType.INSERT: lambda self: self._parse_insert(), 552 TokenType.KILL: lambda self: self._parse_kill(), 553 TokenType.LOAD: lambda self: self._parse_load(), 554 TokenType.MERGE: lambda self: self._parse_merge(), 555 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 556 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 557 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 558 TokenType.SET: lambda self: self._parse_set(), 559 TokenType.UNCACHE: lambda self: self._parse_uncache(), 560 TokenType.UPDATE: lambda self: self._parse_update(), 561 TokenType.USE: lambda self: self.expression( 562 exp.Use, 563 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 564 and exp.var(self._prev.text), 565 this=self._parse_table(schema=False), 566 ), 567 } 568 569 UNARY_PARSERS = { 570 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 571 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 572 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 573 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 574 } 575 576 PRIMARY_PARSERS = { 577 TokenType.STRING: lambda self, token: self.expression( 578 exp.Literal, this=token.text, is_string=True 579 ), 580 TokenType.NUMBER: lambda self, token: self.expression( 581 exp.Literal, this=token.text, is_string=False 582 ), 583 TokenType.STAR: lambda self, _: self.expression( 584 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 585 ), 586 TokenType.NULL: lambda self, _: self.expression(exp.Null), 587 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 588 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 589 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 590 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 591 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 592 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 593 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 594 exp.National, this=token.text 595 ), 596 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 597 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 598 exp.RawString, this=token.text 599 ), 600 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 601 } 602 603 PLACEHOLDER_PARSERS = { 604 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 605 TokenType.PARAMETER: lambda self: self._parse_parameter(), 606 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 607 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 608 else None, 609 } 610 611 RANGE_PARSERS = { 612 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 613 TokenType.GLOB: binary_range_parser(exp.Glob), 614 TokenType.ILIKE: binary_range_parser(exp.ILike), 615 TokenType.IN: lambda self, this: self._parse_in(this), 616 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 617 TokenType.IS: lambda self, this: self._parse_is(this), 618 TokenType.LIKE: binary_range_parser(exp.Like), 619 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 620 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 621 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 622 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 623 } 624 625 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 626 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 627 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 628 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 629 "CHARACTER SET": lambda self: self._parse_character_set(), 630 "CHECKSUM": lambda self: self._parse_checksum(), 631 "CLUSTER BY": lambda self: self._parse_cluster(), 632 "CLUSTERED": lambda self: self._parse_clustered_by(), 633 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 634 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 635 "COPY": lambda self: self._parse_copy_property(), 636 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 637 "DEFINER": lambda self: self._parse_definer(), 638 "DETERMINISTIC": lambda self: self.expression( 639 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 640 ), 641 "DISTKEY": lambda self: self._parse_distkey(), 642 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 643 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 644 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 645 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 646 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 647 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 648 "FREESPACE": lambda self: self._parse_freespace(), 649 "HEAP": lambda self: self.expression(exp.HeapProperty), 650 "IMMUTABLE": lambda self: self.expression( 651 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 652 ), 653 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 654 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 655 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 656 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 657 "LIKE": lambda self: self._parse_create_like(), 658 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 659 "LOCK": lambda self: self._parse_locking(), 660 "LOCKING": lambda self: self._parse_locking(), 661 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 662 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 663 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 664 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 665 "NO": lambda self: self._parse_no_property(), 666 "ON": lambda self: self._parse_on_property(), 667 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 668 "PARTITION BY": lambda self: self._parse_partitioned_by(), 669 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 670 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 671 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 672 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 673 "RETURNS": lambda self: self._parse_returns(), 674 "ROW": lambda self: self._parse_row(), 675 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 676 "SAMPLE": lambda self: self.expression( 677 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 678 ), 679 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 680 "SETTINGS": lambda self: self.expression( 681 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 682 ), 683 "SORTKEY": lambda self: self._parse_sortkey(), 684 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 685 "STABLE": lambda self: self.expression( 686 exp.StabilityProperty, this=exp.Literal.string("STABLE") 687 ), 688 "STORED": lambda self: self._parse_stored(), 689 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 690 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 691 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 692 "TO": lambda self: self._parse_to_table(), 693 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 694 "TTL": lambda self: self._parse_ttl(), 695 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 696 "VOLATILE": lambda self: self._parse_volatile_property(), 697 "WITH": lambda self: self._parse_with_property(), 698 } 699 700 CONSTRAINT_PARSERS = { 701 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 702 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 703 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 704 "CHARACTER SET": lambda self: self.expression( 705 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 706 ), 707 "CHECK": lambda self: self.expression( 708 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 709 ), 710 "COLLATE": lambda self: self.expression( 711 exp.CollateColumnConstraint, this=self._parse_var() 712 ), 713 "COMMENT": lambda self: self.expression( 714 exp.CommentColumnConstraint, this=self._parse_string() 715 ), 716 "COMPRESS": lambda self: self._parse_compress(), 717 "CLUSTERED": lambda self: self.expression( 718 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 719 ), 720 "NONCLUSTERED": lambda self: self.expression( 721 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 722 ), 723 "DEFAULT": lambda self: self.expression( 724 exp.DefaultColumnConstraint, this=self._parse_bitwise() 725 ), 726 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 727 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 728 "FORMAT": lambda self: self.expression( 729 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 730 ), 731 "GENERATED": lambda self: self._parse_generated_as_identity(), 732 "IDENTITY": lambda self: self._parse_auto_increment(), 733 "INLINE": lambda self: self._parse_inline(), 734 "LIKE": lambda self: self._parse_create_like(), 735 "NOT": lambda self: self._parse_not_constraint(), 736 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 737 "ON": lambda self: ( 738 self._match(TokenType.UPDATE) 739 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 740 ) 741 or self.expression(exp.OnProperty, this=self._parse_id_var()), 742 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 743 "PRIMARY KEY": lambda self: self._parse_primary_key(), 744 "REFERENCES": lambda self: self._parse_references(match=False), 745 "TITLE": lambda self: self.expression( 746 exp.TitleColumnConstraint, this=self._parse_var_or_string() 747 ), 748 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 749 "UNIQUE": lambda self: self._parse_unique(), 750 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 751 "WITH": lambda self: self.expression( 752 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 753 ), 754 } 755 756 ALTER_PARSERS = { 757 "ADD": lambda self: self._parse_alter_table_add(), 758 "ALTER": lambda self: self._parse_alter_table_alter(), 759 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 760 "DROP": lambda self: self._parse_alter_table_drop(), 761 "RENAME": lambda self: self._parse_alter_table_rename(), 762 } 763 764 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 765 766 NO_PAREN_FUNCTION_PARSERS = { 767 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 768 "CASE": lambda self: self._parse_case(), 769 "IF": lambda self: self._parse_if(), 770 "NEXT": lambda self: self._parse_next_value_for(), 771 } 772 773 INVALID_FUNC_NAME_TOKENS = { 774 TokenType.IDENTIFIER, 775 TokenType.STRING, 776 } 777 778 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 779 780 FUNCTION_PARSERS = { 781 "ANY_VALUE": lambda self: self._parse_any_value(), 782 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 783 "CONCAT": lambda self: self._parse_concat(), 784 "CONCAT_WS": lambda self: self._parse_concat_ws(), 785 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 786 "DECODE": lambda self: self._parse_decode(), 787 "EXTRACT": lambda self: self._parse_extract(), 788 "JSON_OBJECT": lambda self: self._parse_json_object(), 789 "LOG": lambda self: self._parse_logarithm(), 790 "MATCH": lambda self: self._parse_match_against(), 791 "OPENJSON": lambda self: self._parse_open_json(), 792 "POSITION": lambda self: self._parse_position(), 793 "PREDICT": lambda self: self._parse_predict(), 794 "SAFE_CAST": lambda self: self._parse_cast(False), 795 "STRING_AGG": lambda self: self._parse_string_agg(), 796 "SUBSTRING": lambda self: self._parse_substring(), 797 "TRIM": lambda self: self._parse_trim(), 798 "TRY_CAST": lambda self: self._parse_cast(False), 799 "TRY_CONVERT": lambda self: self._parse_convert(False), 800 } 801 802 QUERY_MODIFIER_PARSERS = { 803 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 804 TokenType.WHERE: lambda self: ("where", self._parse_where()), 805 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 806 TokenType.HAVING: lambda self: ("having", self._parse_having()), 807 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 808 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 809 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 810 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 811 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 812 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 813 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 814 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 815 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 816 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 817 TokenType.CLUSTER_BY: lambda self: ( 818 "cluster", 819 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 820 ), 821 TokenType.DISTRIBUTE_BY: lambda self: ( 822 "distribute", 823 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 824 ), 825 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 826 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 827 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 828 } 829 830 SET_PARSERS = { 831 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 832 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 833 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 834 "TRANSACTION": lambda self: self._parse_set_transaction(), 835 } 836 837 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 838 839 TYPE_LITERAL_PARSERS = { 840 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 841 } 842 843 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 844 845 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 846 847 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 848 849 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 850 TRANSACTION_CHARACTERISTICS = { 851 "ISOLATION LEVEL REPEATABLE READ", 852 "ISOLATION LEVEL READ COMMITTED", 853 "ISOLATION LEVEL READ UNCOMMITTED", 854 "ISOLATION LEVEL SERIALIZABLE", 855 "READ WRITE", 856 "READ ONLY", 857 } 858 859 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 860 861 CLONE_KEYWORDS = {"CLONE", "COPY"} 862 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 863 864 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS"} 865 866 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 867 868 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 869 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 870 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 871 872 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 873 874 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 875 876 DISTINCT_TOKENS = {TokenType.DISTINCT} 877 878 NULL_TOKENS = {TokenType.NULL} 879 880 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 881 882 STRICT_CAST = True 883 884 # A NULL arg in CONCAT yields NULL by default 885 CONCAT_NULL_OUTPUTS_STRING = False 886 887 PREFIXED_PIVOT_COLUMNS = False 888 IDENTIFY_PIVOT_STRINGS = False 889 890 LOG_BASE_FIRST = True 891 LOG_DEFAULTS_TO_LN = False 892 893 # Whether or not ADD is present for each column added by ALTER TABLE 894 ALTER_TABLE_ADD_COLUMN_KEYWORD = True 895 896 # Whether or not the table sample clause expects CSV syntax 897 TABLESAMPLE_CSV = False 898 899 # Whether or not the SET command needs a delimiter (e.g. "=") for assignments 900 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 901 902 # Whether the TRIM function expects the characters to trim as its first argument 903 TRIM_PATTERN_FIRST = False 904 905 __slots__ = ( 906 "error_level", 907 "error_message_context", 908 "max_errors", 909 "sql", 910 "errors", 911 "_tokens", 912 "_index", 913 "_curr", 914 "_next", 915 "_prev", 916 "_prev_comments", 917 "_tokenizer", 918 ) 919 920 # Autofilled 921 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 922 INDEX_OFFSET: int = 0 923 UNNEST_COLUMN_ONLY: bool = False 924 ALIAS_POST_TABLESAMPLE: bool = False 925 STRICT_STRING_CONCAT = False 926 SUPPORTS_USER_DEFINED_TYPES = True 927 NORMALIZE_FUNCTIONS = "upper" 928 NULL_ORDERING: str = "nulls_are_small" 929 SHOW_TRIE: t.Dict = {} 930 SET_TRIE: t.Dict = {} 931 FORMAT_MAPPING: t.Dict[str, str] = {} 932 FORMAT_TRIE: t.Dict = {} 933 TIME_MAPPING: t.Dict[str, str] = {} 934 TIME_TRIE: t.Dict = {} 935 936 def __init__( 937 self, 938 error_level: t.Optional[ErrorLevel] = None, 939 error_message_context: int = 100, 940 max_errors: int = 3, 941 ): 942 self.error_level = error_level or ErrorLevel.IMMEDIATE 943 self.error_message_context = error_message_context 944 self.max_errors = max_errors 945 self._tokenizer = self.TOKENIZER_CLASS() 946 self.reset() 947 948 def reset(self): 949 self.sql = "" 950 self.errors = [] 951 self._tokens = [] 952 self._index = 0 953 self._curr = None 954 self._next = None 955 self._prev = None 956 self._prev_comments = None 957 958 def parse( 959 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 960 ) -> t.List[t.Optional[exp.Expression]]: 961 """ 962 Parses a list of tokens and returns a list of syntax trees, one tree 963 per parsed SQL statement. 964 965 Args: 966 raw_tokens: The list of tokens. 967 sql: The original SQL string, used to produce helpful debug messages. 968 969 Returns: 970 The list of the produced syntax trees. 971 """ 972 return self._parse( 973 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 974 ) 975 976 def parse_into( 977 self, 978 expression_types: exp.IntoType, 979 raw_tokens: t.List[Token], 980 sql: t.Optional[str] = None, 981 ) -> t.List[t.Optional[exp.Expression]]: 982 """ 983 Parses a list of tokens into a given Expression type. If a collection of Expression 984 types is given instead, this method will try to parse the token list into each one 985 of them, stopping at the first for which the parsing succeeds. 986 987 Args: 988 expression_types: The expression type(s) to try and parse the token list into. 989 raw_tokens: The list of tokens. 990 sql: The original SQL string, used to produce helpful debug messages. 991 992 Returns: 993 The target Expression. 994 """ 995 errors = [] 996 for expression_type in ensure_list(expression_types): 997 parser = self.EXPRESSION_PARSERS.get(expression_type) 998 if not parser: 999 raise TypeError(f"No parser registered for {expression_type}") 1000 1001 try: 1002 return self._parse(parser, raw_tokens, sql) 1003 except ParseError as e: 1004 e.errors[0]["into_expression"] = expression_type 1005 errors.append(e) 1006 1007 raise ParseError( 1008 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1009 errors=merge_errors(errors), 1010 ) from errors[-1] 1011 1012 def _parse( 1013 self, 1014 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1015 raw_tokens: t.List[Token], 1016 sql: t.Optional[str] = None, 1017 ) -> t.List[t.Optional[exp.Expression]]: 1018 self.reset() 1019 self.sql = sql or "" 1020 1021 total = len(raw_tokens) 1022 chunks: t.List[t.List[Token]] = [[]] 1023 1024 for i, token in enumerate(raw_tokens): 1025 if token.token_type == TokenType.SEMICOLON: 1026 if i < total - 1: 1027 chunks.append([]) 1028 else: 1029 chunks[-1].append(token) 1030 1031 expressions = [] 1032 1033 for tokens in chunks: 1034 self._index = -1 1035 self._tokens = tokens 1036 self._advance() 1037 1038 expressions.append(parse_method(self)) 1039 1040 if self._index < len(self._tokens): 1041 self.raise_error("Invalid expression / Unexpected token") 1042 1043 self.check_errors() 1044 1045 return expressions 1046 1047 def check_errors(self) -> None: 1048 """Logs or raises any found errors, depending on the chosen error level setting.""" 1049 if self.error_level == ErrorLevel.WARN: 1050 for error in self.errors: 1051 logger.error(str(error)) 1052 elif self.error_level == ErrorLevel.RAISE and self.errors: 1053 raise ParseError( 1054 concat_messages(self.errors, self.max_errors), 1055 errors=merge_errors(self.errors), 1056 ) 1057 1058 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1059 """ 1060 Appends an error in the list of recorded errors or raises it, depending on the chosen 1061 error level setting. 1062 """ 1063 token = token or self._curr or self._prev or Token.string("") 1064 start = token.start 1065 end = token.end + 1 1066 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1067 highlight = self.sql[start:end] 1068 end_context = self.sql[end : end + self.error_message_context] 1069 1070 error = ParseError.new( 1071 f"{message}. Line {token.line}, Col: {token.col}.\n" 1072 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1073 description=message, 1074 line=token.line, 1075 col=token.col, 1076 start_context=start_context, 1077 highlight=highlight, 1078 end_context=end_context, 1079 ) 1080 1081 if self.error_level == ErrorLevel.IMMEDIATE: 1082 raise error 1083 1084 self.errors.append(error) 1085 1086 def expression( 1087 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1088 ) -> E: 1089 """ 1090 Creates a new, validated Expression. 1091 1092 Args: 1093 exp_class: The expression class to instantiate. 1094 comments: An optional list of comments to attach to the expression. 1095 kwargs: The arguments to set for the expression along with their respective values. 1096 1097 Returns: 1098 The target expression. 1099 """ 1100 instance = exp_class(**kwargs) 1101 instance.add_comments(comments) if comments else self._add_comments(instance) 1102 return self.validate_expression(instance) 1103 1104 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1105 if expression and self._prev_comments: 1106 expression.add_comments(self._prev_comments) 1107 self._prev_comments = None 1108 1109 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1110 """ 1111 Validates an Expression, making sure that all its mandatory arguments are set. 1112 1113 Args: 1114 expression: The expression to validate. 1115 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1116 1117 Returns: 1118 The validated expression. 1119 """ 1120 if self.error_level != ErrorLevel.IGNORE: 1121 for error_message in expression.error_messages(args): 1122 self.raise_error(error_message) 1123 1124 return expression 1125 1126 def _find_sql(self, start: Token, end: Token) -> str: 1127 return self.sql[start.start : end.end + 1] 1128 1129 def _advance(self, times: int = 1) -> None: 1130 self._index += times 1131 self._curr = seq_get(self._tokens, self._index) 1132 self._next = seq_get(self._tokens, self._index + 1) 1133 1134 if self._index > 0: 1135 self._prev = self._tokens[self._index - 1] 1136 self._prev_comments = self._prev.comments 1137 else: 1138 self._prev = None 1139 self._prev_comments = None 1140 1141 def _retreat(self, index: int) -> None: 1142 if index != self._index: 1143 self._advance(index - self._index) 1144 1145 def _parse_command(self) -> exp.Command: 1146 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1147 1148 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1149 start = self._prev 1150 exists = self._parse_exists() if allow_exists else None 1151 1152 self._match(TokenType.ON) 1153 1154 kind = self._match_set(self.CREATABLES) and self._prev 1155 if not kind: 1156 return self._parse_as_command(start) 1157 1158 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1159 this = self._parse_user_defined_function(kind=kind.token_type) 1160 elif kind.token_type == TokenType.TABLE: 1161 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1162 elif kind.token_type == TokenType.COLUMN: 1163 this = self._parse_column() 1164 else: 1165 this = self._parse_id_var() 1166 1167 self._match(TokenType.IS) 1168 1169 return self.expression( 1170 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1171 ) 1172 1173 def _parse_to_table( 1174 self, 1175 ) -> exp.ToTableProperty: 1176 table = self._parse_table_parts(schema=True) 1177 return self.expression(exp.ToTableProperty, this=table) 1178 1179 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1180 def _parse_ttl(self) -> exp.Expression: 1181 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1182 this = self._parse_bitwise() 1183 1184 if self._match_text_seq("DELETE"): 1185 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1186 if self._match_text_seq("RECOMPRESS"): 1187 return self.expression( 1188 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1189 ) 1190 if self._match_text_seq("TO", "DISK"): 1191 return self.expression( 1192 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1193 ) 1194 if self._match_text_seq("TO", "VOLUME"): 1195 return self.expression( 1196 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1197 ) 1198 1199 return this 1200 1201 expressions = self._parse_csv(_parse_ttl_action) 1202 where = self._parse_where() 1203 group = self._parse_group() 1204 1205 aggregates = None 1206 if group and self._match(TokenType.SET): 1207 aggregates = self._parse_csv(self._parse_set_item) 1208 1209 return self.expression( 1210 exp.MergeTreeTTL, 1211 expressions=expressions, 1212 where=where, 1213 group=group, 1214 aggregates=aggregates, 1215 ) 1216 1217 def _parse_statement(self) -> t.Optional[exp.Expression]: 1218 if self._curr is None: 1219 return None 1220 1221 if self._match_set(self.STATEMENT_PARSERS): 1222 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1223 1224 if self._match_set(Tokenizer.COMMANDS): 1225 return self._parse_command() 1226 1227 expression = self._parse_expression() 1228 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1229 return self._parse_query_modifiers(expression) 1230 1231 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1232 start = self._prev 1233 temporary = self._match(TokenType.TEMPORARY) 1234 materialized = self._match_text_seq("MATERIALIZED") 1235 1236 kind = self._match_set(self.CREATABLES) and self._prev.text 1237 if not kind: 1238 return self._parse_as_command(start) 1239 1240 return self.expression( 1241 exp.Drop, 1242 comments=start.comments, 1243 exists=exists or self._parse_exists(), 1244 this=self._parse_table(schema=True), 1245 kind=kind, 1246 temporary=temporary, 1247 materialized=materialized, 1248 cascade=self._match_text_seq("CASCADE"), 1249 constraints=self._match_text_seq("CONSTRAINTS"), 1250 purge=self._match_text_seq("PURGE"), 1251 ) 1252 1253 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1254 return ( 1255 self._match_text_seq("IF") 1256 and (not not_ or self._match(TokenType.NOT)) 1257 and self._match(TokenType.EXISTS) 1258 ) 1259 1260 def _parse_create(self) -> exp.Create | exp.Command: 1261 # Note: this can't be None because we've matched a statement parser 1262 start = self._prev 1263 comments = self._prev_comments 1264 1265 replace = start.text.upper() == "REPLACE" or self._match_pair( 1266 TokenType.OR, TokenType.REPLACE 1267 ) 1268 unique = self._match(TokenType.UNIQUE) 1269 1270 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1271 self._advance() 1272 1273 properties = None 1274 create_token = self._match_set(self.CREATABLES) and self._prev 1275 1276 if not create_token: 1277 # exp.Properties.Location.POST_CREATE 1278 properties = self._parse_properties() 1279 create_token = self._match_set(self.CREATABLES) and self._prev 1280 1281 if not properties or not create_token: 1282 return self._parse_as_command(start) 1283 1284 exists = self._parse_exists(not_=True) 1285 this = None 1286 expression: t.Optional[exp.Expression] = None 1287 indexes = None 1288 no_schema_binding = None 1289 begin = None 1290 end = None 1291 clone = None 1292 1293 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1294 nonlocal properties 1295 if properties and temp_props: 1296 properties.expressions.extend(temp_props.expressions) 1297 elif temp_props: 1298 properties = temp_props 1299 1300 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1301 this = self._parse_user_defined_function(kind=create_token.token_type) 1302 1303 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1304 extend_props(self._parse_properties()) 1305 1306 self._match(TokenType.ALIAS) 1307 1308 if self._match(TokenType.COMMAND): 1309 expression = self._parse_as_command(self._prev) 1310 else: 1311 begin = self._match(TokenType.BEGIN) 1312 return_ = self._match_text_seq("RETURN") 1313 1314 if self._match(TokenType.STRING, advance=False): 1315 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1316 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1317 expression = self._parse_string() 1318 extend_props(self._parse_properties()) 1319 else: 1320 expression = self._parse_statement() 1321 1322 end = self._match_text_seq("END") 1323 1324 if return_: 1325 expression = self.expression(exp.Return, this=expression) 1326 elif create_token.token_type == TokenType.INDEX: 1327 this = self._parse_index(index=self._parse_id_var()) 1328 elif create_token.token_type in self.DB_CREATABLES: 1329 table_parts = self._parse_table_parts(schema=True) 1330 1331 # exp.Properties.Location.POST_NAME 1332 self._match(TokenType.COMMA) 1333 extend_props(self._parse_properties(before=True)) 1334 1335 this = self._parse_schema(this=table_parts) 1336 1337 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1338 extend_props(self._parse_properties()) 1339 1340 self._match(TokenType.ALIAS) 1341 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1342 # exp.Properties.Location.POST_ALIAS 1343 extend_props(self._parse_properties()) 1344 1345 expression = self._parse_ddl_select() 1346 1347 if create_token.token_type == TokenType.TABLE: 1348 # exp.Properties.Location.POST_EXPRESSION 1349 extend_props(self._parse_properties()) 1350 1351 indexes = [] 1352 while True: 1353 index = self._parse_index() 1354 1355 # exp.Properties.Location.POST_INDEX 1356 extend_props(self._parse_properties()) 1357 1358 if not index: 1359 break 1360 else: 1361 self._match(TokenType.COMMA) 1362 indexes.append(index) 1363 elif create_token.token_type == TokenType.VIEW: 1364 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1365 no_schema_binding = True 1366 1367 shallow = self._match_text_seq("SHALLOW") 1368 1369 if self._match_texts(self.CLONE_KEYWORDS): 1370 copy = self._prev.text.lower() == "copy" 1371 clone = self._parse_table(schema=True) 1372 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1373 clone_kind = ( 1374 self._match(TokenType.L_PAREN) 1375 and self._match_texts(self.CLONE_KINDS) 1376 and self._prev.text.upper() 1377 ) 1378 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1379 self._match(TokenType.R_PAREN) 1380 clone = self.expression( 1381 exp.Clone, 1382 this=clone, 1383 when=when, 1384 kind=clone_kind, 1385 shallow=shallow, 1386 expression=clone_expression, 1387 copy=copy, 1388 ) 1389 1390 return self.expression( 1391 exp.Create, 1392 comments=comments, 1393 this=this, 1394 kind=create_token.text, 1395 replace=replace, 1396 unique=unique, 1397 expression=expression, 1398 exists=exists, 1399 properties=properties, 1400 indexes=indexes, 1401 no_schema_binding=no_schema_binding, 1402 begin=begin, 1403 end=end, 1404 clone=clone, 1405 ) 1406 1407 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1408 # only used for teradata currently 1409 self._match(TokenType.COMMA) 1410 1411 kwargs = { 1412 "no": self._match_text_seq("NO"), 1413 "dual": self._match_text_seq("DUAL"), 1414 "before": self._match_text_seq("BEFORE"), 1415 "default": self._match_text_seq("DEFAULT"), 1416 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1417 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1418 "after": self._match_text_seq("AFTER"), 1419 "minimum": self._match_texts(("MIN", "MINIMUM")), 1420 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1421 } 1422 1423 if self._match_texts(self.PROPERTY_PARSERS): 1424 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1425 try: 1426 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1427 except TypeError: 1428 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1429 1430 return None 1431 1432 def _parse_property(self) -> t.Optional[exp.Expression]: 1433 if self._match_texts(self.PROPERTY_PARSERS): 1434 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1435 1436 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1437 return self._parse_character_set(default=True) 1438 1439 if self._match_text_seq("COMPOUND", "SORTKEY"): 1440 return self._parse_sortkey(compound=True) 1441 1442 if self._match_text_seq("SQL", "SECURITY"): 1443 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1444 1445 index = self._index 1446 key = self._parse_column() 1447 1448 if not self._match(TokenType.EQ): 1449 self._retreat(index) 1450 return None 1451 1452 return self.expression( 1453 exp.Property, 1454 this=key.to_dot() if isinstance(key, exp.Column) else key, 1455 value=self._parse_column() or self._parse_var(any_token=True), 1456 ) 1457 1458 def _parse_stored(self) -> exp.FileFormatProperty: 1459 self._match(TokenType.ALIAS) 1460 1461 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1462 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1463 1464 return self.expression( 1465 exp.FileFormatProperty, 1466 this=self.expression( 1467 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1468 ) 1469 if input_format or output_format 1470 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1471 ) 1472 1473 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1474 self._match(TokenType.EQ) 1475 self._match(TokenType.ALIAS) 1476 return self.expression(exp_class, this=self._parse_field()) 1477 1478 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1479 properties = [] 1480 while True: 1481 if before: 1482 prop = self._parse_property_before() 1483 else: 1484 prop = self._parse_property() 1485 1486 if not prop: 1487 break 1488 for p in ensure_list(prop): 1489 properties.append(p) 1490 1491 if properties: 1492 return self.expression(exp.Properties, expressions=properties) 1493 1494 return None 1495 1496 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1497 return self.expression( 1498 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1499 ) 1500 1501 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1502 if self._index >= 2: 1503 pre_volatile_token = self._tokens[self._index - 2] 1504 else: 1505 pre_volatile_token = None 1506 1507 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1508 return exp.VolatileProperty() 1509 1510 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1511 1512 def _parse_with_property( 1513 self, 1514 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1515 if self._match(TokenType.L_PAREN, advance=False): 1516 return self._parse_wrapped_csv(self._parse_property) 1517 1518 if self._match_text_seq("JOURNAL"): 1519 return self._parse_withjournaltable() 1520 1521 if self._match_text_seq("DATA"): 1522 return self._parse_withdata(no=False) 1523 elif self._match_text_seq("NO", "DATA"): 1524 return self._parse_withdata(no=True) 1525 1526 if not self._next: 1527 return None 1528 1529 return self._parse_withisolatedloading() 1530 1531 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1532 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1533 self._match(TokenType.EQ) 1534 1535 user = self._parse_id_var() 1536 self._match(TokenType.PARAMETER) 1537 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1538 1539 if not user or not host: 1540 return None 1541 1542 return exp.DefinerProperty(this=f"{user}@{host}") 1543 1544 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1545 self._match(TokenType.TABLE) 1546 self._match(TokenType.EQ) 1547 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1548 1549 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1550 return self.expression(exp.LogProperty, no=no) 1551 1552 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1553 return self.expression(exp.JournalProperty, **kwargs) 1554 1555 def _parse_checksum(self) -> exp.ChecksumProperty: 1556 self._match(TokenType.EQ) 1557 1558 on = None 1559 if self._match(TokenType.ON): 1560 on = True 1561 elif self._match_text_seq("OFF"): 1562 on = False 1563 1564 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1565 1566 def _parse_cluster(self) -> exp.Cluster: 1567 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1568 1569 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1570 self._match_text_seq("BY") 1571 1572 self._match_l_paren() 1573 expressions = self._parse_csv(self._parse_column) 1574 self._match_r_paren() 1575 1576 if self._match_text_seq("SORTED", "BY"): 1577 self._match_l_paren() 1578 sorted_by = self._parse_csv(self._parse_ordered) 1579 self._match_r_paren() 1580 else: 1581 sorted_by = None 1582 1583 self._match(TokenType.INTO) 1584 buckets = self._parse_number() 1585 self._match_text_seq("BUCKETS") 1586 1587 return self.expression( 1588 exp.ClusteredByProperty, 1589 expressions=expressions, 1590 sorted_by=sorted_by, 1591 buckets=buckets, 1592 ) 1593 1594 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1595 if not self._match_text_seq("GRANTS"): 1596 self._retreat(self._index - 1) 1597 return None 1598 1599 return self.expression(exp.CopyGrantsProperty) 1600 1601 def _parse_freespace(self) -> exp.FreespaceProperty: 1602 self._match(TokenType.EQ) 1603 return self.expression( 1604 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1605 ) 1606 1607 def _parse_mergeblockratio( 1608 self, no: bool = False, default: bool = False 1609 ) -> exp.MergeBlockRatioProperty: 1610 if self._match(TokenType.EQ): 1611 return self.expression( 1612 exp.MergeBlockRatioProperty, 1613 this=self._parse_number(), 1614 percent=self._match(TokenType.PERCENT), 1615 ) 1616 1617 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1618 1619 def _parse_datablocksize( 1620 self, 1621 default: t.Optional[bool] = None, 1622 minimum: t.Optional[bool] = None, 1623 maximum: t.Optional[bool] = None, 1624 ) -> exp.DataBlocksizeProperty: 1625 self._match(TokenType.EQ) 1626 size = self._parse_number() 1627 1628 units = None 1629 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1630 units = self._prev.text 1631 1632 return self.expression( 1633 exp.DataBlocksizeProperty, 1634 size=size, 1635 units=units, 1636 default=default, 1637 minimum=minimum, 1638 maximum=maximum, 1639 ) 1640 1641 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1642 self._match(TokenType.EQ) 1643 always = self._match_text_seq("ALWAYS") 1644 manual = self._match_text_seq("MANUAL") 1645 never = self._match_text_seq("NEVER") 1646 default = self._match_text_seq("DEFAULT") 1647 1648 autotemp = None 1649 if self._match_text_seq("AUTOTEMP"): 1650 autotemp = self._parse_schema() 1651 1652 return self.expression( 1653 exp.BlockCompressionProperty, 1654 always=always, 1655 manual=manual, 1656 never=never, 1657 default=default, 1658 autotemp=autotemp, 1659 ) 1660 1661 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1662 no = self._match_text_seq("NO") 1663 concurrent = self._match_text_seq("CONCURRENT") 1664 self._match_text_seq("ISOLATED", "LOADING") 1665 for_all = self._match_text_seq("FOR", "ALL") 1666 for_insert = self._match_text_seq("FOR", "INSERT") 1667 for_none = self._match_text_seq("FOR", "NONE") 1668 return self.expression( 1669 exp.IsolatedLoadingProperty, 1670 no=no, 1671 concurrent=concurrent, 1672 for_all=for_all, 1673 for_insert=for_insert, 1674 for_none=for_none, 1675 ) 1676 1677 def _parse_locking(self) -> exp.LockingProperty: 1678 if self._match(TokenType.TABLE): 1679 kind = "TABLE" 1680 elif self._match(TokenType.VIEW): 1681 kind = "VIEW" 1682 elif self._match(TokenType.ROW): 1683 kind = "ROW" 1684 elif self._match_text_seq("DATABASE"): 1685 kind = "DATABASE" 1686 else: 1687 kind = None 1688 1689 if kind in ("DATABASE", "TABLE", "VIEW"): 1690 this = self._parse_table_parts() 1691 else: 1692 this = None 1693 1694 if self._match(TokenType.FOR): 1695 for_or_in = "FOR" 1696 elif self._match(TokenType.IN): 1697 for_or_in = "IN" 1698 else: 1699 for_or_in = None 1700 1701 if self._match_text_seq("ACCESS"): 1702 lock_type = "ACCESS" 1703 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1704 lock_type = "EXCLUSIVE" 1705 elif self._match_text_seq("SHARE"): 1706 lock_type = "SHARE" 1707 elif self._match_text_seq("READ"): 1708 lock_type = "READ" 1709 elif self._match_text_seq("WRITE"): 1710 lock_type = "WRITE" 1711 elif self._match_text_seq("CHECKSUM"): 1712 lock_type = "CHECKSUM" 1713 else: 1714 lock_type = None 1715 1716 override = self._match_text_seq("OVERRIDE") 1717 1718 return self.expression( 1719 exp.LockingProperty, 1720 this=this, 1721 kind=kind, 1722 for_or_in=for_or_in, 1723 lock_type=lock_type, 1724 override=override, 1725 ) 1726 1727 def _parse_partition_by(self) -> t.List[exp.Expression]: 1728 if self._match(TokenType.PARTITION_BY): 1729 return self._parse_csv(self._parse_conjunction) 1730 return [] 1731 1732 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1733 self._match(TokenType.EQ) 1734 return self.expression( 1735 exp.PartitionedByProperty, 1736 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1737 ) 1738 1739 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1740 if self._match_text_seq("AND", "STATISTICS"): 1741 statistics = True 1742 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1743 statistics = False 1744 else: 1745 statistics = None 1746 1747 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1748 1749 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1750 if self._match_text_seq("PRIMARY", "INDEX"): 1751 return exp.NoPrimaryIndexProperty() 1752 return None 1753 1754 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1755 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1756 return exp.OnCommitProperty() 1757 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1758 return exp.OnCommitProperty(delete=True) 1759 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1760 1761 def _parse_distkey(self) -> exp.DistKeyProperty: 1762 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1763 1764 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1765 table = self._parse_table(schema=True) 1766 1767 options = [] 1768 while self._match_texts(("INCLUDING", "EXCLUDING")): 1769 this = self._prev.text.upper() 1770 1771 id_var = self._parse_id_var() 1772 if not id_var: 1773 return None 1774 1775 options.append( 1776 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1777 ) 1778 1779 return self.expression(exp.LikeProperty, this=table, expressions=options) 1780 1781 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1782 return self.expression( 1783 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1784 ) 1785 1786 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1787 self._match(TokenType.EQ) 1788 return self.expression( 1789 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1790 ) 1791 1792 def _parse_returns(self) -> exp.ReturnsProperty: 1793 value: t.Optional[exp.Expression] 1794 is_table = self._match(TokenType.TABLE) 1795 1796 if is_table: 1797 if self._match(TokenType.LT): 1798 value = self.expression( 1799 exp.Schema, 1800 this="TABLE", 1801 expressions=self._parse_csv(self._parse_struct_types), 1802 ) 1803 if not self._match(TokenType.GT): 1804 self.raise_error("Expecting >") 1805 else: 1806 value = self._parse_schema(exp.var("TABLE")) 1807 else: 1808 value = self._parse_types() 1809 1810 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1811 1812 def _parse_describe(self) -> exp.Describe: 1813 kind = self._match_set(self.CREATABLES) and self._prev.text 1814 this = self._parse_table(schema=True) 1815 properties = self._parse_properties() 1816 expressions = properties.expressions if properties else None 1817 return self.expression(exp.Describe, this=this, kind=kind, expressions=expressions) 1818 1819 def _parse_insert(self) -> exp.Insert: 1820 comments = ensure_list(self._prev_comments) 1821 overwrite = self._match(TokenType.OVERWRITE) 1822 ignore = self._match(TokenType.IGNORE) 1823 local = self._match_text_seq("LOCAL") 1824 alternative = None 1825 1826 if self._match_text_seq("DIRECTORY"): 1827 this: t.Optional[exp.Expression] = self.expression( 1828 exp.Directory, 1829 this=self._parse_var_or_string(), 1830 local=local, 1831 row_format=self._parse_row_format(match_row=True), 1832 ) 1833 else: 1834 if self._match(TokenType.OR): 1835 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1836 1837 self._match(TokenType.INTO) 1838 comments += ensure_list(self._prev_comments) 1839 self._match(TokenType.TABLE) 1840 this = self._parse_table(schema=True) 1841 1842 returning = self._parse_returning() 1843 1844 return self.expression( 1845 exp.Insert, 1846 comments=comments, 1847 this=this, 1848 by_name=self._match_text_seq("BY", "NAME"), 1849 exists=self._parse_exists(), 1850 partition=self._parse_partition(), 1851 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1852 and self._parse_conjunction(), 1853 expression=self._parse_ddl_select(), 1854 conflict=self._parse_on_conflict(), 1855 returning=returning or self._parse_returning(), 1856 overwrite=overwrite, 1857 alternative=alternative, 1858 ignore=ignore, 1859 ) 1860 1861 def _parse_kill(self) -> exp.Kill: 1862 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 1863 1864 return self.expression( 1865 exp.Kill, 1866 this=self._parse_primary(), 1867 kind=kind, 1868 ) 1869 1870 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1871 conflict = self._match_text_seq("ON", "CONFLICT") 1872 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1873 1874 if not conflict and not duplicate: 1875 return None 1876 1877 nothing = None 1878 expressions = None 1879 key = None 1880 constraint = None 1881 1882 if conflict: 1883 if self._match_text_seq("ON", "CONSTRAINT"): 1884 constraint = self._parse_id_var() 1885 else: 1886 key = self._parse_csv(self._parse_value) 1887 1888 self._match_text_seq("DO") 1889 if self._match_text_seq("NOTHING"): 1890 nothing = True 1891 else: 1892 self._match(TokenType.UPDATE) 1893 self._match(TokenType.SET) 1894 expressions = self._parse_csv(self._parse_equality) 1895 1896 return self.expression( 1897 exp.OnConflict, 1898 duplicate=duplicate, 1899 expressions=expressions, 1900 nothing=nothing, 1901 key=key, 1902 constraint=constraint, 1903 ) 1904 1905 def _parse_returning(self) -> t.Optional[exp.Returning]: 1906 if not self._match(TokenType.RETURNING): 1907 return None 1908 return self.expression( 1909 exp.Returning, 1910 expressions=self._parse_csv(self._parse_expression), 1911 into=self._match(TokenType.INTO) and self._parse_table_part(), 1912 ) 1913 1914 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1915 if not self._match(TokenType.FORMAT): 1916 return None 1917 return self._parse_row_format() 1918 1919 def _parse_row_format( 1920 self, match_row: bool = False 1921 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1922 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1923 return None 1924 1925 if self._match_text_seq("SERDE"): 1926 this = self._parse_string() 1927 1928 serde_properties = None 1929 if self._match(TokenType.SERDE_PROPERTIES): 1930 serde_properties = self.expression( 1931 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1932 ) 1933 1934 return self.expression( 1935 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1936 ) 1937 1938 self._match_text_seq("DELIMITED") 1939 1940 kwargs = {} 1941 1942 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1943 kwargs["fields"] = self._parse_string() 1944 if self._match_text_seq("ESCAPED", "BY"): 1945 kwargs["escaped"] = self._parse_string() 1946 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1947 kwargs["collection_items"] = self._parse_string() 1948 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1949 kwargs["map_keys"] = self._parse_string() 1950 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1951 kwargs["lines"] = self._parse_string() 1952 if self._match_text_seq("NULL", "DEFINED", "AS"): 1953 kwargs["null"] = self._parse_string() 1954 1955 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1956 1957 def _parse_load(self) -> exp.LoadData | exp.Command: 1958 if self._match_text_seq("DATA"): 1959 local = self._match_text_seq("LOCAL") 1960 self._match_text_seq("INPATH") 1961 inpath = self._parse_string() 1962 overwrite = self._match(TokenType.OVERWRITE) 1963 self._match_pair(TokenType.INTO, TokenType.TABLE) 1964 1965 return self.expression( 1966 exp.LoadData, 1967 this=self._parse_table(schema=True), 1968 local=local, 1969 overwrite=overwrite, 1970 inpath=inpath, 1971 partition=self._parse_partition(), 1972 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1973 serde=self._match_text_seq("SERDE") and self._parse_string(), 1974 ) 1975 return self._parse_as_command(self._prev) 1976 1977 def _parse_delete(self) -> exp.Delete: 1978 # This handles MySQL's "Multiple-Table Syntax" 1979 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1980 tables = None 1981 comments = self._prev_comments 1982 if not self._match(TokenType.FROM, advance=False): 1983 tables = self._parse_csv(self._parse_table) or None 1984 1985 returning = self._parse_returning() 1986 1987 return self.expression( 1988 exp.Delete, 1989 comments=comments, 1990 tables=tables, 1991 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1992 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1993 where=self._parse_where(), 1994 returning=returning or self._parse_returning(), 1995 limit=self._parse_limit(), 1996 ) 1997 1998 def _parse_update(self) -> exp.Update: 1999 comments = self._prev_comments 2000 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2001 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2002 returning = self._parse_returning() 2003 return self.expression( 2004 exp.Update, 2005 comments=comments, 2006 **{ # type: ignore 2007 "this": this, 2008 "expressions": expressions, 2009 "from": self._parse_from(joins=True), 2010 "where": self._parse_where(), 2011 "returning": returning or self._parse_returning(), 2012 "order": self._parse_order(), 2013 "limit": self._parse_limit(), 2014 }, 2015 ) 2016 2017 def _parse_uncache(self) -> exp.Uncache: 2018 if not self._match(TokenType.TABLE): 2019 self.raise_error("Expecting TABLE after UNCACHE") 2020 2021 return self.expression( 2022 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2023 ) 2024 2025 def _parse_cache(self) -> exp.Cache: 2026 lazy = self._match_text_seq("LAZY") 2027 self._match(TokenType.TABLE) 2028 table = self._parse_table(schema=True) 2029 2030 options = [] 2031 if self._match_text_seq("OPTIONS"): 2032 self._match_l_paren() 2033 k = self._parse_string() 2034 self._match(TokenType.EQ) 2035 v = self._parse_string() 2036 options = [k, v] 2037 self._match_r_paren() 2038 2039 self._match(TokenType.ALIAS) 2040 return self.expression( 2041 exp.Cache, 2042 this=table, 2043 lazy=lazy, 2044 options=options, 2045 expression=self._parse_select(nested=True), 2046 ) 2047 2048 def _parse_partition(self) -> t.Optional[exp.Partition]: 2049 if not self._match(TokenType.PARTITION): 2050 return None 2051 2052 return self.expression( 2053 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2054 ) 2055 2056 def _parse_value(self) -> exp.Tuple: 2057 if self._match(TokenType.L_PAREN): 2058 expressions = self._parse_csv(self._parse_conjunction) 2059 self._match_r_paren() 2060 return self.expression(exp.Tuple, expressions=expressions) 2061 2062 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 2063 # https://prestodb.io/docs/current/sql/values.html 2064 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 2065 2066 def _parse_projections(self) -> t.List[exp.Expression]: 2067 return self._parse_expressions() 2068 2069 def _parse_select( 2070 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 2071 ) -> t.Optional[exp.Expression]: 2072 cte = self._parse_with() 2073 2074 if cte: 2075 this = self._parse_statement() 2076 2077 if not this: 2078 self.raise_error("Failed to parse any statement following CTE") 2079 return cte 2080 2081 if "with" in this.arg_types: 2082 this.set("with", cte) 2083 else: 2084 self.raise_error(f"{this.key} does not support CTE") 2085 this = cte 2086 2087 return this 2088 2089 # duckdb supports leading with FROM x 2090 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2091 2092 if self._match(TokenType.SELECT): 2093 comments = self._prev_comments 2094 2095 hint = self._parse_hint() 2096 all_ = self._match(TokenType.ALL) 2097 distinct = self._match_set(self.DISTINCT_TOKENS) 2098 2099 kind = ( 2100 self._match(TokenType.ALIAS) 2101 and self._match_texts(("STRUCT", "VALUE")) 2102 and self._prev.text 2103 ) 2104 2105 if distinct: 2106 distinct = self.expression( 2107 exp.Distinct, 2108 on=self._parse_value() if self._match(TokenType.ON) else None, 2109 ) 2110 2111 if all_ and distinct: 2112 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2113 2114 limit = self._parse_limit(top=True) 2115 projections = self._parse_projections() 2116 2117 this = self.expression( 2118 exp.Select, 2119 kind=kind, 2120 hint=hint, 2121 distinct=distinct, 2122 expressions=projections, 2123 limit=limit, 2124 ) 2125 this.comments = comments 2126 2127 into = self._parse_into() 2128 if into: 2129 this.set("into", into) 2130 2131 if not from_: 2132 from_ = self._parse_from() 2133 2134 if from_: 2135 this.set("from", from_) 2136 2137 this = self._parse_query_modifiers(this) 2138 elif (table or nested) and self._match(TokenType.L_PAREN): 2139 if self._match(TokenType.PIVOT): 2140 this = self._parse_simplified_pivot() 2141 elif self._match(TokenType.FROM): 2142 this = exp.select("*").from_( 2143 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2144 ) 2145 else: 2146 this = self._parse_table() if table else self._parse_select(nested=True) 2147 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2148 2149 self._match_r_paren() 2150 2151 # We return early here so that the UNION isn't attached to the subquery by the 2152 # following call to _parse_set_operations, but instead becomes the parent node 2153 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2154 elif self._match(TokenType.VALUES): 2155 this = self.expression( 2156 exp.Values, 2157 expressions=self._parse_csv(self._parse_value), 2158 alias=self._parse_table_alias(), 2159 ) 2160 elif from_: 2161 this = exp.select("*").from_(from_.this, copy=False) 2162 else: 2163 this = None 2164 2165 return self._parse_set_operations(this) 2166 2167 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2168 if not skip_with_token and not self._match(TokenType.WITH): 2169 return None 2170 2171 comments = self._prev_comments 2172 recursive = self._match(TokenType.RECURSIVE) 2173 2174 expressions = [] 2175 while True: 2176 expressions.append(self._parse_cte()) 2177 2178 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2179 break 2180 else: 2181 self._match(TokenType.WITH) 2182 2183 return self.expression( 2184 exp.With, comments=comments, expressions=expressions, recursive=recursive 2185 ) 2186 2187 def _parse_cte(self) -> exp.CTE: 2188 alias = self._parse_table_alias() 2189 if not alias or not alias.this: 2190 self.raise_error("Expected CTE to have alias") 2191 2192 self._match(TokenType.ALIAS) 2193 return self.expression( 2194 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2195 ) 2196 2197 def _parse_table_alias( 2198 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2199 ) -> t.Optional[exp.TableAlias]: 2200 any_token = self._match(TokenType.ALIAS) 2201 alias = ( 2202 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2203 or self._parse_string_as_identifier() 2204 ) 2205 2206 index = self._index 2207 if self._match(TokenType.L_PAREN): 2208 columns = self._parse_csv(self._parse_function_parameter) 2209 self._match_r_paren() if columns else self._retreat(index) 2210 else: 2211 columns = None 2212 2213 if not alias and not columns: 2214 return None 2215 2216 return self.expression(exp.TableAlias, this=alias, columns=columns) 2217 2218 def _parse_subquery( 2219 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2220 ) -> t.Optional[exp.Subquery]: 2221 if not this: 2222 return None 2223 2224 return self.expression( 2225 exp.Subquery, 2226 this=this, 2227 pivots=self._parse_pivots(), 2228 alias=self._parse_table_alias() if parse_alias else None, 2229 ) 2230 2231 def _parse_query_modifiers( 2232 self, this: t.Optional[exp.Expression] 2233 ) -> t.Optional[exp.Expression]: 2234 if isinstance(this, self.MODIFIABLES): 2235 for join in iter(self._parse_join, None): 2236 this.append("joins", join) 2237 for lateral in iter(self._parse_lateral, None): 2238 this.append("laterals", lateral) 2239 2240 while True: 2241 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2242 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2243 key, expression = parser(self) 2244 2245 if expression: 2246 this.set(key, expression) 2247 if key == "limit": 2248 offset = expression.args.pop("offset", None) 2249 if offset: 2250 this.set("offset", exp.Offset(expression=offset)) 2251 continue 2252 break 2253 return this 2254 2255 def _parse_hint(self) -> t.Optional[exp.Hint]: 2256 if self._match(TokenType.HINT): 2257 hints = [] 2258 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2259 hints.extend(hint) 2260 2261 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2262 self.raise_error("Expected */ after HINT") 2263 2264 return self.expression(exp.Hint, expressions=hints) 2265 2266 return None 2267 2268 def _parse_into(self) -> t.Optional[exp.Into]: 2269 if not self._match(TokenType.INTO): 2270 return None 2271 2272 temp = self._match(TokenType.TEMPORARY) 2273 unlogged = self._match_text_seq("UNLOGGED") 2274 self._match(TokenType.TABLE) 2275 2276 return self.expression( 2277 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2278 ) 2279 2280 def _parse_from( 2281 self, joins: bool = False, skip_from_token: bool = False 2282 ) -> t.Optional[exp.From]: 2283 if not skip_from_token and not self._match(TokenType.FROM): 2284 return None 2285 2286 return self.expression( 2287 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2288 ) 2289 2290 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2291 if not self._match(TokenType.MATCH_RECOGNIZE): 2292 return None 2293 2294 self._match_l_paren() 2295 2296 partition = self._parse_partition_by() 2297 order = self._parse_order() 2298 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2299 2300 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2301 rows = exp.var("ONE ROW PER MATCH") 2302 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2303 text = "ALL ROWS PER MATCH" 2304 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2305 text += f" SHOW EMPTY MATCHES" 2306 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2307 text += f" OMIT EMPTY MATCHES" 2308 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2309 text += f" WITH UNMATCHED ROWS" 2310 rows = exp.var(text) 2311 else: 2312 rows = None 2313 2314 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2315 text = "AFTER MATCH SKIP" 2316 if self._match_text_seq("PAST", "LAST", "ROW"): 2317 text += f" PAST LAST ROW" 2318 elif self._match_text_seq("TO", "NEXT", "ROW"): 2319 text += f" TO NEXT ROW" 2320 elif self._match_text_seq("TO", "FIRST"): 2321 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2322 elif self._match_text_seq("TO", "LAST"): 2323 text += f" TO LAST {self._advance_any().text}" # type: ignore 2324 after = exp.var(text) 2325 else: 2326 after = None 2327 2328 if self._match_text_seq("PATTERN"): 2329 self._match_l_paren() 2330 2331 if not self._curr: 2332 self.raise_error("Expecting )", self._curr) 2333 2334 paren = 1 2335 start = self._curr 2336 2337 while self._curr and paren > 0: 2338 if self._curr.token_type == TokenType.L_PAREN: 2339 paren += 1 2340 if self._curr.token_type == TokenType.R_PAREN: 2341 paren -= 1 2342 2343 end = self._prev 2344 self._advance() 2345 2346 if paren > 0: 2347 self.raise_error("Expecting )", self._curr) 2348 2349 pattern = exp.var(self._find_sql(start, end)) 2350 else: 2351 pattern = None 2352 2353 define = ( 2354 self._parse_csv( 2355 lambda: self.expression( 2356 exp.Alias, 2357 alias=self._parse_id_var(any_token=True), 2358 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2359 ) 2360 ) 2361 if self._match_text_seq("DEFINE") 2362 else None 2363 ) 2364 2365 self._match_r_paren() 2366 2367 return self.expression( 2368 exp.MatchRecognize, 2369 partition_by=partition, 2370 order=order, 2371 measures=measures, 2372 rows=rows, 2373 after=after, 2374 pattern=pattern, 2375 define=define, 2376 alias=self._parse_table_alias(), 2377 ) 2378 2379 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2380 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2381 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2382 2383 if outer_apply or cross_apply: 2384 this = self._parse_select(table=True) 2385 view = None 2386 outer = not cross_apply 2387 elif self._match(TokenType.LATERAL): 2388 this = self._parse_select(table=True) 2389 view = self._match(TokenType.VIEW) 2390 outer = self._match(TokenType.OUTER) 2391 else: 2392 return None 2393 2394 if not this: 2395 this = ( 2396 self._parse_unnest() 2397 or self._parse_function() 2398 or self._parse_id_var(any_token=False) 2399 ) 2400 2401 while self._match(TokenType.DOT): 2402 this = exp.Dot( 2403 this=this, 2404 expression=self._parse_function() or self._parse_id_var(any_token=False), 2405 ) 2406 2407 if view: 2408 table = self._parse_id_var(any_token=False) 2409 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2410 table_alias: t.Optional[exp.TableAlias] = self.expression( 2411 exp.TableAlias, this=table, columns=columns 2412 ) 2413 elif isinstance(this, exp.Subquery) and this.alias: 2414 # Ensures parity between the Subquery's and the Lateral's "alias" args 2415 table_alias = this.args["alias"].copy() 2416 else: 2417 table_alias = self._parse_table_alias() 2418 2419 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2420 2421 def _parse_join_parts( 2422 self, 2423 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2424 return ( 2425 self._match_set(self.JOIN_METHODS) and self._prev, 2426 self._match_set(self.JOIN_SIDES) and self._prev, 2427 self._match_set(self.JOIN_KINDS) and self._prev, 2428 ) 2429 2430 def _parse_join( 2431 self, skip_join_token: bool = False, parse_bracket: bool = False 2432 ) -> t.Optional[exp.Join]: 2433 if self._match(TokenType.COMMA): 2434 return self.expression(exp.Join, this=self._parse_table()) 2435 2436 index = self._index 2437 method, side, kind = self._parse_join_parts() 2438 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2439 join = self._match(TokenType.JOIN) 2440 2441 if not skip_join_token and not join: 2442 self._retreat(index) 2443 kind = None 2444 method = None 2445 side = None 2446 2447 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2448 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2449 2450 if not skip_join_token and not join and not outer_apply and not cross_apply: 2451 return None 2452 2453 if outer_apply: 2454 side = Token(TokenType.LEFT, "LEFT") 2455 2456 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2457 2458 if method: 2459 kwargs["method"] = method.text 2460 if side: 2461 kwargs["side"] = side.text 2462 if kind: 2463 kwargs["kind"] = kind.text 2464 if hint: 2465 kwargs["hint"] = hint 2466 2467 if self._match(TokenType.ON): 2468 kwargs["on"] = self._parse_conjunction() 2469 elif self._match(TokenType.USING): 2470 kwargs["using"] = self._parse_wrapped_id_vars() 2471 elif not (kind and kind.token_type == TokenType.CROSS): 2472 index = self._index 2473 join = self._parse_join() 2474 2475 if join and self._match(TokenType.ON): 2476 kwargs["on"] = self._parse_conjunction() 2477 elif join and self._match(TokenType.USING): 2478 kwargs["using"] = self._parse_wrapped_id_vars() 2479 else: 2480 join = None 2481 self._retreat(index) 2482 2483 kwargs["this"].set("joins", [join] if join else None) 2484 2485 comments = [c for token in (method, side, kind) if token for c in token.comments] 2486 return self.expression(exp.Join, comments=comments, **kwargs) 2487 2488 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2489 this = self._parse_conjunction() 2490 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2491 return this 2492 2493 opclass = self._parse_var(any_token=True) 2494 if opclass: 2495 return self.expression(exp.Opclass, this=this, expression=opclass) 2496 2497 return this 2498 2499 def _parse_index( 2500 self, 2501 index: t.Optional[exp.Expression] = None, 2502 ) -> t.Optional[exp.Index]: 2503 if index: 2504 unique = None 2505 primary = None 2506 amp = None 2507 2508 self._match(TokenType.ON) 2509 self._match(TokenType.TABLE) # hive 2510 table = self._parse_table_parts(schema=True) 2511 else: 2512 unique = self._match(TokenType.UNIQUE) 2513 primary = self._match_text_seq("PRIMARY") 2514 amp = self._match_text_seq("AMP") 2515 2516 if not self._match(TokenType.INDEX): 2517 return None 2518 2519 index = self._parse_id_var() 2520 table = None 2521 2522 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2523 2524 if self._match(TokenType.L_PAREN, advance=False): 2525 columns = self._parse_wrapped_csv(lambda: self._parse_ordered(self._parse_opclass)) 2526 else: 2527 columns = None 2528 2529 return self.expression( 2530 exp.Index, 2531 this=index, 2532 table=table, 2533 using=using, 2534 columns=columns, 2535 unique=unique, 2536 primary=primary, 2537 amp=amp, 2538 partition_by=self._parse_partition_by(), 2539 where=self._parse_where(), 2540 ) 2541 2542 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2543 hints: t.List[exp.Expression] = [] 2544 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2545 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2546 hints.append( 2547 self.expression( 2548 exp.WithTableHint, 2549 expressions=self._parse_csv( 2550 lambda: self._parse_function() or self._parse_var(any_token=True) 2551 ), 2552 ) 2553 ) 2554 self._match_r_paren() 2555 else: 2556 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2557 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2558 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2559 2560 self._match_texts({"INDEX", "KEY"}) 2561 if self._match(TokenType.FOR): 2562 hint.set("target", self._advance_any() and self._prev.text.upper()) 2563 2564 hint.set("expressions", self._parse_wrapped_id_vars()) 2565 hints.append(hint) 2566 2567 return hints or None 2568 2569 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2570 return ( 2571 (not schema and self._parse_function(optional_parens=False)) 2572 or self._parse_id_var(any_token=False) 2573 or self._parse_string_as_identifier() 2574 or self._parse_placeholder() 2575 ) 2576 2577 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2578 catalog = None 2579 db = None 2580 table = self._parse_table_part(schema=schema) 2581 2582 while self._match(TokenType.DOT): 2583 if catalog: 2584 # This allows nesting the table in arbitrarily many dot expressions if needed 2585 table = self.expression( 2586 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2587 ) 2588 else: 2589 catalog = db 2590 db = table 2591 table = self._parse_table_part(schema=schema) 2592 2593 if not table: 2594 self.raise_error(f"Expected table name but got {self._curr}") 2595 2596 return self.expression( 2597 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2598 ) 2599 2600 def _parse_table( 2601 self, 2602 schema: bool = False, 2603 joins: bool = False, 2604 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2605 parse_bracket: bool = False, 2606 ) -> t.Optional[exp.Expression]: 2607 lateral = self._parse_lateral() 2608 if lateral: 2609 return lateral 2610 2611 unnest = self._parse_unnest() 2612 if unnest: 2613 return unnest 2614 2615 values = self._parse_derived_table_values() 2616 if values: 2617 return values 2618 2619 subquery = self._parse_select(table=True) 2620 if subquery: 2621 if not subquery.args.get("pivots"): 2622 subquery.set("pivots", self._parse_pivots()) 2623 return subquery 2624 2625 bracket = parse_bracket and self._parse_bracket(None) 2626 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2627 this = t.cast( 2628 exp.Expression, bracket or self._parse_bracket(self._parse_table_parts(schema=schema)) 2629 ) 2630 2631 if schema: 2632 return self._parse_schema(this=this) 2633 2634 version = self._parse_version() 2635 2636 if version: 2637 this.set("version", version) 2638 2639 if self.ALIAS_POST_TABLESAMPLE: 2640 table_sample = self._parse_table_sample() 2641 2642 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2643 if alias: 2644 this.set("alias", alias) 2645 2646 if self._match_text_seq("AT"): 2647 this.set("index", self._parse_id_var()) 2648 2649 this.set("hints", self._parse_table_hints()) 2650 2651 if not this.args.get("pivots"): 2652 this.set("pivots", self._parse_pivots()) 2653 2654 if not self.ALIAS_POST_TABLESAMPLE: 2655 table_sample = self._parse_table_sample() 2656 2657 if table_sample: 2658 table_sample.set("this", this) 2659 this = table_sample 2660 2661 if joins: 2662 for join in iter(self._parse_join, None): 2663 this.append("joins", join) 2664 2665 return this 2666 2667 def _parse_version(self) -> t.Optional[exp.Version]: 2668 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 2669 this = "TIMESTAMP" 2670 elif self._match(TokenType.VERSION_SNAPSHOT): 2671 this = "VERSION" 2672 else: 2673 return None 2674 2675 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 2676 kind = self._prev.text.upper() 2677 start = self._parse_bitwise() 2678 self._match_texts(("TO", "AND")) 2679 end = self._parse_bitwise() 2680 expression: t.Optional[exp.Expression] = self.expression( 2681 exp.Tuple, expressions=[start, end] 2682 ) 2683 elif self._match_text_seq("CONTAINED", "IN"): 2684 kind = "CONTAINED IN" 2685 expression = self.expression( 2686 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 2687 ) 2688 elif self._match(TokenType.ALL): 2689 kind = "ALL" 2690 expression = None 2691 else: 2692 self._match_text_seq("AS", "OF") 2693 kind = "AS OF" 2694 expression = self._parse_type() 2695 2696 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 2697 2698 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2699 if not self._match(TokenType.UNNEST): 2700 return None 2701 2702 expressions = self._parse_wrapped_csv(self._parse_type) 2703 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2704 2705 alias = self._parse_table_alias() if with_alias else None 2706 2707 if alias: 2708 if self.UNNEST_COLUMN_ONLY: 2709 if alias.args.get("columns"): 2710 self.raise_error("Unexpected extra column alias in unnest.") 2711 2712 alias.set("columns", [alias.this]) 2713 alias.set("this", None) 2714 2715 columns = alias.args.get("columns") or [] 2716 if offset and len(expressions) < len(columns): 2717 offset = columns.pop() 2718 2719 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 2720 self._match(TokenType.ALIAS) 2721 offset = self._parse_id_var( 2722 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 2723 ) or exp.to_identifier("offset") 2724 2725 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 2726 2727 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2728 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2729 if not is_derived and not self._match(TokenType.VALUES): 2730 return None 2731 2732 expressions = self._parse_csv(self._parse_value) 2733 alias = self._parse_table_alias() 2734 2735 if is_derived: 2736 self._match_r_paren() 2737 2738 return self.expression( 2739 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2740 ) 2741 2742 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2743 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2744 as_modifier and self._match_text_seq("USING", "SAMPLE") 2745 ): 2746 return None 2747 2748 bucket_numerator = None 2749 bucket_denominator = None 2750 bucket_field = None 2751 percent = None 2752 rows = None 2753 size = None 2754 seed = None 2755 2756 kind = ( 2757 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2758 ) 2759 method = self._parse_var(tokens=(TokenType.ROW,)) 2760 2761 matched_l_paren = self._match(TokenType.L_PAREN) 2762 2763 if self.TABLESAMPLE_CSV: 2764 num = None 2765 expressions = self._parse_csv(self._parse_primary) 2766 else: 2767 expressions = None 2768 num = ( 2769 self._parse_factor() 2770 if self._match(TokenType.NUMBER, advance=False) 2771 else self._parse_primary() 2772 ) 2773 2774 if self._match_text_seq("BUCKET"): 2775 bucket_numerator = self._parse_number() 2776 self._match_text_seq("OUT", "OF") 2777 bucket_denominator = bucket_denominator = self._parse_number() 2778 self._match(TokenType.ON) 2779 bucket_field = self._parse_field() 2780 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2781 percent = num 2782 elif self._match(TokenType.ROWS): 2783 rows = num 2784 elif num: 2785 size = num 2786 2787 if matched_l_paren: 2788 self._match_r_paren() 2789 2790 if self._match(TokenType.L_PAREN): 2791 method = self._parse_var() 2792 seed = self._match(TokenType.COMMA) and self._parse_number() 2793 self._match_r_paren() 2794 elif self._match_texts(("SEED", "REPEATABLE")): 2795 seed = self._parse_wrapped(self._parse_number) 2796 2797 return self.expression( 2798 exp.TableSample, 2799 expressions=expressions, 2800 method=method, 2801 bucket_numerator=bucket_numerator, 2802 bucket_denominator=bucket_denominator, 2803 bucket_field=bucket_field, 2804 percent=percent, 2805 rows=rows, 2806 size=size, 2807 seed=seed, 2808 kind=kind, 2809 ) 2810 2811 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2812 return list(iter(self._parse_pivot, None)) or None 2813 2814 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2815 return list(iter(self._parse_join, None)) or None 2816 2817 # https://duckdb.org/docs/sql/statements/pivot 2818 def _parse_simplified_pivot(self) -> exp.Pivot: 2819 def _parse_on() -> t.Optional[exp.Expression]: 2820 this = self._parse_bitwise() 2821 return self._parse_in(this) if self._match(TokenType.IN) else this 2822 2823 this = self._parse_table() 2824 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2825 using = self._match(TokenType.USING) and self._parse_csv( 2826 lambda: self._parse_alias(self._parse_function()) 2827 ) 2828 group = self._parse_group() 2829 return self.expression( 2830 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2831 ) 2832 2833 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2834 index = self._index 2835 include_nulls = None 2836 2837 if self._match(TokenType.PIVOT): 2838 unpivot = False 2839 elif self._match(TokenType.UNPIVOT): 2840 unpivot = True 2841 2842 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2843 if self._match_text_seq("INCLUDE", "NULLS"): 2844 include_nulls = True 2845 elif self._match_text_seq("EXCLUDE", "NULLS"): 2846 include_nulls = False 2847 else: 2848 return None 2849 2850 expressions = [] 2851 field = None 2852 2853 if not self._match(TokenType.L_PAREN): 2854 self._retreat(index) 2855 return None 2856 2857 if unpivot: 2858 expressions = self._parse_csv(self._parse_column) 2859 else: 2860 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2861 2862 if not expressions: 2863 self.raise_error("Failed to parse PIVOT's aggregation list") 2864 2865 if not self._match(TokenType.FOR): 2866 self.raise_error("Expecting FOR") 2867 2868 value = self._parse_column() 2869 2870 if not self._match(TokenType.IN): 2871 self.raise_error("Expecting IN") 2872 2873 field = self._parse_in(value, alias=True) 2874 2875 self._match_r_paren() 2876 2877 pivot = self.expression( 2878 exp.Pivot, 2879 expressions=expressions, 2880 field=field, 2881 unpivot=unpivot, 2882 include_nulls=include_nulls, 2883 ) 2884 2885 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2886 pivot.set("alias", self._parse_table_alias()) 2887 2888 if not unpivot: 2889 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2890 2891 columns: t.List[exp.Expression] = [] 2892 for fld in pivot.args["field"].expressions: 2893 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2894 for name in names: 2895 if self.PREFIXED_PIVOT_COLUMNS: 2896 name = f"{name}_{field_name}" if name else field_name 2897 else: 2898 name = f"{field_name}_{name}" if name else field_name 2899 2900 columns.append(exp.to_identifier(name)) 2901 2902 pivot.set("columns", columns) 2903 2904 return pivot 2905 2906 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2907 return [agg.alias for agg in aggregations] 2908 2909 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2910 if not skip_where_token and not self._match(TokenType.WHERE): 2911 return None 2912 2913 return self.expression( 2914 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2915 ) 2916 2917 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2918 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2919 return None 2920 2921 elements = defaultdict(list) 2922 2923 if self._match(TokenType.ALL): 2924 return self.expression(exp.Group, all=True) 2925 2926 while True: 2927 expressions = self._parse_csv(self._parse_conjunction) 2928 if expressions: 2929 elements["expressions"].extend(expressions) 2930 2931 grouping_sets = self._parse_grouping_sets() 2932 if grouping_sets: 2933 elements["grouping_sets"].extend(grouping_sets) 2934 2935 rollup = None 2936 cube = None 2937 totals = None 2938 2939 with_ = self._match(TokenType.WITH) 2940 if self._match(TokenType.ROLLUP): 2941 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2942 elements["rollup"].extend(ensure_list(rollup)) 2943 2944 if self._match(TokenType.CUBE): 2945 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2946 elements["cube"].extend(ensure_list(cube)) 2947 2948 if self._match_text_seq("TOTALS"): 2949 totals = True 2950 elements["totals"] = True # type: ignore 2951 2952 if not (grouping_sets or rollup or cube or totals): 2953 break 2954 2955 return self.expression(exp.Group, **elements) # type: ignore 2956 2957 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2958 if not self._match(TokenType.GROUPING_SETS): 2959 return None 2960 2961 return self._parse_wrapped_csv(self._parse_grouping_set) 2962 2963 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2964 if self._match(TokenType.L_PAREN): 2965 grouping_set = self._parse_csv(self._parse_column) 2966 self._match_r_paren() 2967 return self.expression(exp.Tuple, expressions=grouping_set) 2968 2969 return self._parse_column() 2970 2971 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2972 if not skip_having_token and not self._match(TokenType.HAVING): 2973 return None 2974 return self.expression(exp.Having, this=self._parse_conjunction()) 2975 2976 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2977 if not self._match(TokenType.QUALIFY): 2978 return None 2979 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2980 2981 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 2982 if skip_start_token: 2983 start = None 2984 elif self._match(TokenType.START_WITH): 2985 start = self._parse_conjunction() 2986 else: 2987 return None 2988 2989 self._match(TokenType.CONNECT_BY) 2990 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 2991 exp.Prior, this=self._parse_bitwise() 2992 ) 2993 connect = self._parse_conjunction() 2994 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 2995 2996 if not start and self._match(TokenType.START_WITH): 2997 start = self._parse_conjunction() 2998 2999 return self.expression(exp.Connect, start=start, connect=connect) 3000 3001 def _parse_order( 3002 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3003 ) -> t.Optional[exp.Expression]: 3004 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3005 return this 3006 3007 return self.expression( 3008 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 3009 ) 3010 3011 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3012 if not self._match(token): 3013 return None 3014 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3015 3016 def _parse_ordered(self, parse_method: t.Optional[t.Callable] = None) -> exp.Ordered: 3017 this = parse_method() if parse_method else self._parse_conjunction() 3018 3019 asc = self._match(TokenType.ASC) 3020 desc = self._match(TokenType.DESC) or (asc and False) 3021 3022 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3023 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3024 3025 nulls_first = is_nulls_first or False 3026 explicitly_null_ordered = is_nulls_first or is_nulls_last 3027 3028 if ( 3029 not explicitly_null_ordered 3030 and ( 3031 (not desc and self.NULL_ORDERING == "nulls_are_small") 3032 or (desc and self.NULL_ORDERING != "nulls_are_small") 3033 ) 3034 and self.NULL_ORDERING != "nulls_are_last" 3035 ): 3036 nulls_first = True 3037 3038 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 3039 3040 def _parse_limit( 3041 self, this: t.Optional[exp.Expression] = None, top: bool = False 3042 ) -> t.Optional[exp.Expression]: 3043 if self._match(TokenType.TOP if top else TokenType.LIMIT): 3044 comments = self._prev_comments 3045 if top: 3046 limit_paren = self._match(TokenType.L_PAREN) 3047 expression = self._parse_number() 3048 3049 if limit_paren: 3050 self._match_r_paren() 3051 else: 3052 expression = self._parse_term() 3053 3054 if self._match(TokenType.COMMA): 3055 offset = expression 3056 expression = self._parse_term() 3057 else: 3058 offset = None 3059 3060 limit_exp = self.expression( 3061 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 3062 ) 3063 3064 return limit_exp 3065 3066 if self._match(TokenType.FETCH): 3067 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3068 direction = self._prev.text if direction else "FIRST" 3069 3070 count = self._parse_field(tokens=self.FETCH_TOKENS) 3071 percent = self._match(TokenType.PERCENT) 3072 3073 self._match_set((TokenType.ROW, TokenType.ROWS)) 3074 3075 only = self._match_text_seq("ONLY") 3076 with_ties = self._match_text_seq("WITH", "TIES") 3077 3078 if only and with_ties: 3079 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3080 3081 return self.expression( 3082 exp.Fetch, 3083 direction=direction, 3084 count=count, 3085 percent=percent, 3086 with_ties=with_ties, 3087 ) 3088 3089 return this 3090 3091 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3092 if not self._match(TokenType.OFFSET): 3093 return this 3094 3095 count = self._parse_term() 3096 self._match_set((TokenType.ROW, TokenType.ROWS)) 3097 return self.expression(exp.Offset, this=this, expression=count) 3098 3099 def _parse_locks(self) -> t.List[exp.Lock]: 3100 locks = [] 3101 while True: 3102 if self._match_text_seq("FOR", "UPDATE"): 3103 update = True 3104 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3105 "LOCK", "IN", "SHARE", "MODE" 3106 ): 3107 update = False 3108 else: 3109 break 3110 3111 expressions = None 3112 if self._match_text_seq("OF"): 3113 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3114 3115 wait: t.Optional[bool | exp.Expression] = None 3116 if self._match_text_seq("NOWAIT"): 3117 wait = True 3118 elif self._match_text_seq("WAIT"): 3119 wait = self._parse_primary() 3120 elif self._match_text_seq("SKIP", "LOCKED"): 3121 wait = False 3122 3123 locks.append( 3124 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3125 ) 3126 3127 return locks 3128 3129 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3130 if not self._match_set(self.SET_OPERATIONS): 3131 return this 3132 3133 token_type = self._prev.token_type 3134 3135 if token_type == TokenType.UNION: 3136 expression = exp.Union 3137 elif token_type == TokenType.EXCEPT: 3138 expression = exp.Except 3139 else: 3140 expression = exp.Intersect 3141 3142 return self.expression( 3143 expression, 3144 this=this, 3145 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 3146 by_name=self._match_text_seq("BY", "NAME"), 3147 expression=self._parse_set_operations(self._parse_select(nested=True)), 3148 ) 3149 3150 def _parse_expression(self) -> t.Optional[exp.Expression]: 3151 return self._parse_alias(self._parse_conjunction()) 3152 3153 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3154 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3155 3156 def _parse_equality(self) -> t.Optional[exp.Expression]: 3157 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3158 3159 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3160 return self._parse_tokens(self._parse_range, self.COMPARISON) 3161 3162 def _parse_range(self) -> t.Optional[exp.Expression]: 3163 this = self._parse_bitwise() 3164 negate = self._match(TokenType.NOT) 3165 3166 if self._match_set(self.RANGE_PARSERS): 3167 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3168 if not expression: 3169 return this 3170 3171 this = expression 3172 elif self._match(TokenType.ISNULL): 3173 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3174 3175 # Postgres supports ISNULL and NOTNULL for conditions. 3176 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3177 if self._match(TokenType.NOTNULL): 3178 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3179 this = self.expression(exp.Not, this=this) 3180 3181 if negate: 3182 this = self.expression(exp.Not, this=this) 3183 3184 if self._match(TokenType.IS): 3185 this = self._parse_is(this) 3186 3187 return this 3188 3189 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3190 index = self._index - 1 3191 negate = self._match(TokenType.NOT) 3192 3193 if self._match_text_seq("DISTINCT", "FROM"): 3194 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3195 return self.expression(klass, this=this, expression=self._parse_conjunction()) 3196 3197 expression = self._parse_null() or self._parse_boolean() 3198 if not expression: 3199 self._retreat(index) 3200 return None 3201 3202 this = self.expression(exp.Is, this=this, expression=expression) 3203 return self.expression(exp.Not, this=this) if negate else this 3204 3205 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3206 unnest = self._parse_unnest(with_alias=False) 3207 if unnest: 3208 this = self.expression(exp.In, this=this, unnest=unnest) 3209 elif self._match(TokenType.L_PAREN): 3210 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3211 3212 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3213 this = self.expression(exp.In, this=this, query=expressions[0]) 3214 else: 3215 this = self.expression(exp.In, this=this, expressions=expressions) 3216 3217 self._match_r_paren(this) 3218 else: 3219 this = self.expression(exp.In, this=this, field=self._parse_field()) 3220 3221 return this 3222 3223 def _parse_between(self, this: exp.Expression) -> exp.Between: 3224 low = self._parse_bitwise() 3225 self._match(TokenType.AND) 3226 high = self._parse_bitwise() 3227 return self.expression(exp.Between, this=this, low=low, high=high) 3228 3229 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3230 if not self._match(TokenType.ESCAPE): 3231 return this 3232 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3233 3234 def _parse_interval(self) -> t.Optional[exp.Interval]: 3235 index = self._index 3236 3237 if not self._match(TokenType.INTERVAL): 3238 return None 3239 3240 if self._match(TokenType.STRING, advance=False): 3241 this = self._parse_primary() 3242 else: 3243 this = self._parse_term() 3244 3245 if not this: 3246 self._retreat(index) 3247 return None 3248 3249 unit = self._parse_function() or self._parse_var(any_token=True) 3250 3251 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3252 # each INTERVAL expression into this canonical form so it's easy to transpile 3253 if this and this.is_number: 3254 this = exp.Literal.string(this.name) 3255 elif this and this.is_string: 3256 parts = this.name.split() 3257 3258 if len(parts) == 2: 3259 if unit: 3260 # This is not actually a unit, it's something else (e.g. a "window side") 3261 unit = None 3262 self._retreat(self._index - 1) 3263 3264 this = exp.Literal.string(parts[0]) 3265 unit = self.expression(exp.Var, this=parts[1]) 3266 3267 return self.expression(exp.Interval, this=this, unit=unit) 3268 3269 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3270 this = self._parse_term() 3271 3272 while True: 3273 if self._match_set(self.BITWISE): 3274 this = self.expression( 3275 self.BITWISE[self._prev.token_type], 3276 this=this, 3277 expression=self._parse_term(), 3278 ) 3279 elif self._match(TokenType.DQMARK): 3280 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3281 elif self._match_pair(TokenType.LT, TokenType.LT): 3282 this = self.expression( 3283 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3284 ) 3285 elif self._match_pair(TokenType.GT, TokenType.GT): 3286 this = self.expression( 3287 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3288 ) 3289 else: 3290 break 3291 3292 return this 3293 3294 def _parse_term(self) -> t.Optional[exp.Expression]: 3295 return self._parse_tokens(self._parse_factor, self.TERM) 3296 3297 def _parse_factor(self) -> t.Optional[exp.Expression]: 3298 return self._parse_tokens(self._parse_unary, self.FACTOR) 3299 3300 def _parse_unary(self) -> t.Optional[exp.Expression]: 3301 if self._match_set(self.UNARY_PARSERS): 3302 return self.UNARY_PARSERS[self._prev.token_type](self) 3303 return self._parse_at_time_zone(self._parse_type()) 3304 3305 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3306 interval = parse_interval and self._parse_interval() 3307 if interval: 3308 return interval 3309 3310 index = self._index 3311 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3312 this = self._parse_column() 3313 3314 if data_type: 3315 if isinstance(this, exp.Literal): 3316 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3317 if parser: 3318 return parser(self, this, data_type) 3319 return self.expression(exp.Cast, this=this, to=data_type) 3320 if not data_type.expressions: 3321 self._retreat(index) 3322 return self._parse_column() 3323 return self._parse_column_ops(data_type) 3324 3325 return this and self._parse_column_ops(this) 3326 3327 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3328 this = self._parse_type() 3329 if not this: 3330 return None 3331 3332 return self.expression( 3333 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3334 ) 3335 3336 def _parse_types( 3337 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3338 ) -> t.Optional[exp.Expression]: 3339 index = self._index 3340 3341 prefix = self._match_text_seq("SYSUDTLIB", ".") 3342 3343 if not self._match_set(self.TYPE_TOKENS): 3344 identifier = allow_identifiers and self._parse_id_var( 3345 any_token=False, tokens=(TokenType.VAR,) 3346 ) 3347 3348 if identifier: 3349 tokens = self._tokenizer.tokenize(identifier.name) 3350 3351 if len(tokens) != 1: 3352 self.raise_error("Unexpected identifier", self._prev) 3353 3354 if tokens[0].token_type in self.TYPE_TOKENS: 3355 self._prev = tokens[0] 3356 elif self.SUPPORTS_USER_DEFINED_TYPES: 3357 type_name = identifier.name 3358 3359 while self._match(TokenType.DOT): 3360 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 3361 3362 return exp.DataType.build(type_name, udt=True) 3363 else: 3364 return None 3365 else: 3366 return None 3367 3368 type_token = self._prev.token_type 3369 3370 if type_token == TokenType.PSEUDO_TYPE: 3371 return self.expression(exp.PseudoType, this=self._prev.text) 3372 3373 if type_token == TokenType.OBJECT_IDENTIFIER: 3374 return self.expression(exp.ObjectIdentifier, this=self._prev.text) 3375 3376 nested = type_token in self.NESTED_TYPE_TOKENS 3377 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3378 expressions = None 3379 maybe_func = False 3380 3381 if self._match(TokenType.L_PAREN): 3382 if is_struct: 3383 expressions = self._parse_csv(self._parse_struct_types) 3384 elif nested: 3385 expressions = self._parse_csv( 3386 lambda: self._parse_types( 3387 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3388 ) 3389 ) 3390 elif type_token in self.ENUM_TYPE_TOKENS: 3391 expressions = self._parse_csv(self._parse_equality) 3392 else: 3393 expressions = self._parse_csv(self._parse_type_size) 3394 3395 if not expressions or not self._match(TokenType.R_PAREN): 3396 self._retreat(index) 3397 return None 3398 3399 maybe_func = True 3400 3401 this: t.Optional[exp.Expression] = None 3402 values: t.Optional[t.List[exp.Expression]] = None 3403 3404 if nested and self._match(TokenType.LT): 3405 if is_struct: 3406 expressions = self._parse_csv(self._parse_struct_types) 3407 else: 3408 expressions = self._parse_csv( 3409 lambda: self._parse_types( 3410 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3411 ) 3412 ) 3413 3414 if not self._match(TokenType.GT): 3415 self.raise_error("Expecting >") 3416 3417 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3418 values = self._parse_csv(self._parse_conjunction) 3419 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3420 3421 if type_token in self.TIMESTAMPS: 3422 if self._match_text_seq("WITH", "TIME", "ZONE"): 3423 maybe_func = False 3424 tz_type = ( 3425 exp.DataType.Type.TIMETZ 3426 if type_token in self.TIMES 3427 else exp.DataType.Type.TIMESTAMPTZ 3428 ) 3429 this = exp.DataType(this=tz_type, expressions=expressions) 3430 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3431 maybe_func = False 3432 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3433 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3434 maybe_func = False 3435 elif type_token == TokenType.INTERVAL: 3436 unit = self._parse_var() 3437 3438 if self._match_text_seq("TO"): 3439 span = [exp.IntervalSpan(this=unit, expression=self._parse_var())] 3440 else: 3441 span = None 3442 3443 if span or not unit: 3444 this = self.expression( 3445 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3446 ) 3447 else: 3448 this = self.expression(exp.Interval, unit=unit) 3449 3450 if maybe_func and check_func: 3451 index2 = self._index 3452 peek = self._parse_string() 3453 3454 if not peek: 3455 self._retreat(index) 3456 return None 3457 3458 self._retreat(index2) 3459 3460 if not this: 3461 if self._match_text_seq("UNSIGNED"): 3462 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 3463 if not unsigned_type_token: 3464 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 3465 3466 type_token = unsigned_type_token or type_token 3467 3468 this = exp.DataType( 3469 this=exp.DataType.Type[type_token.value], 3470 expressions=expressions, 3471 nested=nested, 3472 values=values, 3473 prefix=prefix, 3474 ) 3475 3476 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3477 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3478 3479 return this 3480 3481 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3482 this = self._parse_type(parse_interval=False) or self._parse_id_var() 3483 self._match(TokenType.COLON) 3484 return self._parse_column_def(this) 3485 3486 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3487 if not self._match_text_seq("AT", "TIME", "ZONE"): 3488 return this 3489 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3490 3491 def _parse_column(self) -> t.Optional[exp.Expression]: 3492 this = self._parse_field() 3493 if isinstance(this, exp.Identifier): 3494 this = self.expression(exp.Column, this=this) 3495 elif not this: 3496 return self._parse_bracket(this) 3497 return self._parse_column_ops(this) 3498 3499 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3500 this = self._parse_bracket(this) 3501 3502 while self._match_set(self.COLUMN_OPERATORS): 3503 op_token = self._prev.token_type 3504 op = self.COLUMN_OPERATORS.get(op_token) 3505 3506 if op_token == TokenType.DCOLON: 3507 field = self._parse_types() 3508 if not field: 3509 self.raise_error("Expected type") 3510 elif op and self._curr: 3511 self._advance() 3512 value = self._prev.text 3513 field = ( 3514 exp.Literal.number(value) 3515 if self._prev.token_type == TokenType.NUMBER 3516 else exp.Literal.string(value) 3517 ) 3518 else: 3519 field = self._parse_field(anonymous_func=True, any_token=True) 3520 3521 if isinstance(field, exp.Func): 3522 # bigquery allows function calls like x.y.count(...) 3523 # SAFE.SUBSTR(...) 3524 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3525 this = self._replace_columns_with_dots(this) 3526 3527 if op: 3528 this = op(self, this, field) 3529 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3530 this = self.expression( 3531 exp.Column, 3532 this=field, 3533 table=this.this, 3534 db=this.args.get("table"), 3535 catalog=this.args.get("db"), 3536 ) 3537 else: 3538 this = self.expression(exp.Dot, this=this, expression=field) 3539 this = self._parse_bracket(this) 3540 return this 3541 3542 def _parse_primary(self) -> t.Optional[exp.Expression]: 3543 if self._match_set(self.PRIMARY_PARSERS): 3544 token_type = self._prev.token_type 3545 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3546 3547 if token_type == TokenType.STRING: 3548 expressions = [primary] 3549 while self._match(TokenType.STRING): 3550 expressions.append(exp.Literal.string(self._prev.text)) 3551 3552 if len(expressions) > 1: 3553 return self.expression(exp.Concat, expressions=expressions) 3554 3555 return primary 3556 3557 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3558 return exp.Literal.number(f"0.{self._prev.text}") 3559 3560 if self._match(TokenType.L_PAREN): 3561 comments = self._prev_comments 3562 query = self._parse_select() 3563 3564 if query: 3565 expressions = [query] 3566 else: 3567 expressions = self._parse_expressions() 3568 3569 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3570 3571 if isinstance(this, exp.Subqueryable): 3572 this = self._parse_set_operations( 3573 self._parse_subquery(this=this, parse_alias=False) 3574 ) 3575 elif len(expressions) > 1: 3576 this = self.expression(exp.Tuple, expressions=expressions) 3577 else: 3578 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3579 3580 if this: 3581 this.add_comments(comments) 3582 3583 self._match_r_paren(expression=this) 3584 return this 3585 3586 return None 3587 3588 def _parse_field( 3589 self, 3590 any_token: bool = False, 3591 tokens: t.Optional[t.Collection[TokenType]] = None, 3592 anonymous_func: bool = False, 3593 ) -> t.Optional[exp.Expression]: 3594 return ( 3595 self._parse_primary() 3596 or self._parse_function(anonymous=anonymous_func) 3597 or self._parse_id_var(any_token=any_token, tokens=tokens) 3598 ) 3599 3600 def _parse_function( 3601 self, 3602 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3603 anonymous: bool = False, 3604 optional_parens: bool = True, 3605 ) -> t.Optional[exp.Expression]: 3606 if not self._curr: 3607 return None 3608 3609 token_type = self._curr.token_type 3610 this = self._curr.text 3611 upper = this.upper() 3612 3613 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3614 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3615 self._advance() 3616 return parser(self) 3617 3618 if not self._next or self._next.token_type != TokenType.L_PAREN: 3619 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3620 self._advance() 3621 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3622 3623 return None 3624 3625 if token_type not in self.FUNC_TOKENS: 3626 return None 3627 3628 self._advance(2) 3629 3630 parser = self.FUNCTION_PARSERS.get(upper) 3631 if parser and not anonymous: 3632 this = parser(self) 3633 else: 3634 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3635 3636 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3637 this = self.expression(subquery_predicate, this=self._parse_select()) 3638 self._match_r_paren() 3639 return this 3640 3641 if functions is None: 3642 functions = self.FUNCTIONS 3643 3644 function = functions.get(upper) 3645 3646 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3647 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3648 3649 if function and not anonymous: 3650 func = self.validate_expression(function(args), args) 3651 if not self.NORMALIZE_FUNCTIONS: 3652 func.meta["name"] = this 3653 this = func 3654 else: 3655 this = self.expression(exp.Anonymous, this=this, expressions=args) 3656 3657 self._match_r_paren(this) 3658 return self._parse_window(this) 3659 3660 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3661 return self._parse_column_def(self._parse_id_var()) 3662 3663 def _parse_user_defined_function( 3664 self, kind: t.Optional[TokenType] = None 3665 ) -> t.Optional[exp.Expression]: 3666 this = self._parse_id_var() 3667 3668 while self._match(TokenType.DOT): 3669 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3670 3671 if not self._match(TokenType.L_PAREN): 3672 return this 3673 3674 expressions = self._parse_csv(self._parse_function_parameter) 3675 self._match_r_paren() 3676 return self.expression( 3677 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3678 ) 3679 3680 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3681 literal = self._parse_primary() 3682 if literal: 3683 return self.expression(exp.Introducer, this=token.text, expression=literal) 3684 3685 return self.expression(exp.Identifier, this=token.text) 3686 3687 def _parse_session_parameter(self) -> exp.SessionParameter: 3688 kind = None 3689 this = self._parse_id_var() or self._parse_primary() 3690 3691 if this and self._match(TokenType.DOT): 3692 kind = this.name 3693 this = self._parse_var() or self._parse_primary() 3694 3695 return self.expression(exp.SessionParameter, this=this, kind=kind) 3696 3697 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3698 index = self._index 3699 3700 if self._match(TokenType.L_PAREN): 3701 expressions = t.cast( 3702 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3703 ) 3704 3705 if not self._match(TokenType.R_PAREN): 3706 self._retreat(index) 3707 else: 3708 expressions = [self._parse_id_var()] 3709 3710 if self._match_set(self.LAMBDAS): 3711 return self.LAMBDAS[self._prev.token_type](self, expressions) 3712 3713 self._retreat(index) 3714 3715 this: t.Optional[exp.Expression] 3716 3717 if self._match(TokenType.DISTINCT): 3718 this = self.expression( 3719 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3720 ) 3721 else: 3722 this = self._parse_select_or_expression(alias=alias) 3723 3724 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3725 3726 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3727 index = self._index 3728 3729 if not self.errors: 3730 try: 3731 if self._parse_select(nested=True): 3732 return this 3733 except ParseError: 3734 pass 3735 finally: 3736 self.errors.clear() 3737 self._retreat(index) 3738 3739 if not self._match(TokenType.L_PAREN): 3740 return this 3741 3742 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3743 3744 self._match_r_paren() 3745 return self.expression(exp.Schema, this=this, expressions=args) 3746 3747 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3748 return self._parse_column_def(self._parse_field(any_token=True)) 3749 3750 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3751 # column defs are not really columns, they're identifiers 3752 if isinstance(this, exp.Column): 3753 this = this.this 3754 3755 kind = self._parse_types(schema=True) 3756 3757 if self._match_text_seq("FOR", "ORDINALITY"): 3758 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3759 3760 constraints: t.List[exp.Expression] = [] 3761 3762 if not kind and self._match(TokenType.ALIAS): 3763 constraints.append( 3764 self.expression( 3765 exp.ComputedColumnConstraint, 3766 this=self._parse_conjunction(), 3767 persisted=self._match_text_seq("PERSISTED"), 3768 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3769 ) 3770 ) 3771 3772 while True: 3773 constraint = self._parse_column_constraint() 3774 if not constraint: 3775 break 3776 constraints.append(constraint) 3777 3778 if not kind and not constraints: 3779 return this 3780 3781 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3782 3783 def _parse_auto_increment( 3784 self, 3785 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3786 start = None 3787 increment = None 3788 3789 if self._match(TokenType.L_PAREN, advance=False): 3790 args = self._parse_wrapped_csv(self._parse_bitwise) 3791 start = seq_get(args, 0) 3792 increment = seq_get(args, 1) 3793 elif self._match_text_seq("START"): 3794 start = self._parse_bitwise() 3795 self._match_text_seq("INCREMENT") 3796 increment = self._parse_bitwise() 3797 3798 if start and increment: 3799 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3800 3801 return exp.AutoIncrementColumnConstraint() 3802 3803 def _parse_compress(self) -> exp.CompressColumnConstraint: 3804 if self._match(TokenType.L_PAREN, advance=False): 3805 return self.expression( 3806 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3807 ) 3808 3809 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3810 3811 def _parse_generated_as_identity( 3812 self, 3813 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.ComputedColumnConstraint: 3814 if self._match_text_seq("BY", "DEFAULT"): 3815 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3816 this = self.expression( 3817 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3818 ) 3819 else: 3820 self._match_text_seq("ALWAYS") 3821 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3822 3823 self._match(TokenType.ALIAS) 3824 identity = self._match_text_seq("IDENTITY") 3825 3826 if self._match(TokenType.L_PAREN): 3827 if self._match(TokenType.START_WITH): 3828 this.set("start", self._parse_bitwise()) 3829 if self._match_text_seq("INCREMENT", "BY"): 3830 this.set("increment", self._parse_bitwise()) 3831 if self._match_text_seq("MINVALUE"): 3832 this.set("minvalue", self._parse_bitwise()) 3833 if self._match_text_seq("MAXVALUE"): 3834 this.set("maxvalue", self._parse_bitwise()) 3835 3836 if self._match_text_seq("CYCLE"): 3837 this.set("cycle", True) 3838 elif self._match_text_seq("NO", "CYCLE"): 3839 this.set("cycle", False) 3840 3841 if not identity: 3842 this.set("expression", self._parse_bitwise()) 3843 3844 self._match_r_paren() 3845 3846 return this 3847 3848 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3849 self._match_text_seq("LENGTH") 3850 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3851 3852 def _parse_not_constraint( 3853 self, 3854 ) -> t.Optional[exp.Expression]: 3855 if self._match_text_seq("NULL"): 3856 return self.expression(exp.NotNullColumnConstraint) 3857 if self._match_text_seq("CASESPECIFIC"): 3858 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3859 if self._match_text_seq("FOR", "REPLICATION"): 3860 return self.expression(exp.NotForReplicationColumnConstraint) 3861 return None 3862 3863 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3864 if self._match(TokenType.CONSTRAINT): 3865 this = self._parse_id_var() 3866 else: 3867 this = None 3868 3869 if self._match_texts(self.CONSTRAINT_PARSERS): 3870 return self.expression( 3871 exp.ColumnConstraint, 3872 this=this, 3873 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3874 ) 3875 3876 return this 3877 3878 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3879 if not self._match(TokenType.CONSTRAINT): 3880 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3881 3882 this = self._parse_id_var() 3883 expressions = [] 3884 3885 while True: 3886 constraint = self._parse_unnamed_constraint() or self._parse_function() 3887 if not constraint: 3888 break 3889 expressions.append(constraint) 3890 3891 return self.expression(exp.Constraint, this=this, expressions=expressions) 3892 3893 def _parse_unnamed_constraint( 3894 self, constraints: t.Optional[t.Collection[str]] = None 3895 ) -> t.Optional[exp.Expression]: 3896 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3897 return None 3898 3899 constraint = self._prev.text.upper() 3900 if constraint not in self.CONSTRAINT_PARSERS: 3901 self.raise_error(f"No parser found for schema constraint {constraint}.") 3902 3903 return self.CONSTRAINT_PARSERS[constraint](self) 3904 3905 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3906 self._match_text_seq("KEY") 3907 return self.expression( 3908 exp.UniqueColumnConstraint, 3909 this=self._parse_schema(self._parse_id_var(any_token=False)), 3910 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 3911 ) 3912 3913 def _parse_key_constraint_options(self) -> t.List[str]: 3914 options = [] 3915 while True: 3916 if not self._curr: 3917 break 3918 3919 if self._match(TokenType.ON): 3920 action = None 3921 on = self._advance_any() and self._prev.text 3922 3923 if self._match_text_seq("NO", "ACTION"): 3924 action = "NO ACTION" 3925 elif self._match_text_seq("CASCADE"): 3926 action = "CASCADE" 3927 elif self._match_text_seq("RESTRICT"): 3928 action = "RESTRICT" 3929 elif self._match_pair(TokenType.SET, TokenType.NULL): 3930 action = "SET NULL" 3931 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3932 action = "SET DEFAULT" 3933 else: 3934 self.raise_error("Invalid key constraint") 3935 3936 options.append(f"ON {on} {action}") 3937 elif self._match_text_seq("NOT", "ENFORCED"): 3938 options.append("NOT ENFORCED") 3939 elif self._match_text_seq("DEFERRABLE"): 3940 options.append("DEFERRABLE") 3941 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3942 options.append("INITIALLY DEFERRED") 3943 elif self._match_text_seq("NORELY"): 3944 options.append("NORELY") 3945 elif self._match_text_seq("MATCH", "FULL"): 3946 options.append("MATCH FULL") 3947 else: 3948 break 3949 3950 return options 3951 3952 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3953 if match and not self._match(TokenType.REFERENCES): 3954 return None 3955 3956 expressions = None 3957 this = self._parse_table(schema=True) 3958 options = self._parse_key_constraint_options() 3959 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3960 3961 def _parse_foreign_key(self) -> exp.ForeignKey: 3962 expressions = self._parse_wrapped_id_vars() 3963 reference = self._parse_references() 3964 options = {} 3965 3966 while self._match(TokenType.ON): 3967 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3968 self.raise_error("Expected DELETE or UPDATE") 3969 3970 kind = self._prev.text.lower() 3971 3972 if self._match_text_seq("NO", "ACTION"): 3973 action = "NO ACTION" 3974 elif self._match(TokenType.SET): 3975 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3976 action = "SET " + self._prev.text.upper() 3977 else: 3978 self._advance() 3979 action = self._prev.text.upper() 3980 3981 options[kind] = action 3982 3983 return self.expression( 3984 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3985 ) 3986 3987 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 3988 return self._parse_field() 3989 3990 def _parse_primary_key( 3991 self, wrapped_optional: bool = False, in_props: bool = False 3992 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3993 desc = ( 3994 self._match_set((TokenType.ASC, TokenType.DESC)) 3995 and self._prev.token_type == TokenType.DESC 3996 ) 3997 3998 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3999 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4000 4001 expressions = self._parse_wrapped_csv( 4002 self._parse_primary_key_part, optional=wrapped_optional 4003 ) 4004 options = self._parse_key_constraint_options() 4005 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4006 4007 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4008 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4009 return this 4010 4011 bracket_kind = self._prev.token_type 4012 4013 if self._match(TokenType.COLON): 4014 expressions: t.List[exp.Expression] = [ 4015 self.expression(exp.Slice, expression=self._parse_conjunction()) 4016 ] 4017 else: 4018 expressions = self._parse_csv( 4019 lambda: self._parse_slice( 4020 self._parse_alias(self._parse_conjunction(), explicit=True) 4021 ) 4022 ) 4023 4024 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4025 if bracket_kind == TokenType.L_BRACE: 4026 this = self.expression(exp.Struct, expressions=expressions) 4027 elif not this or this.name.upper() == "ARRAY": 4028 this = self.expression(exp.Array, expressions=expressions) 4029 else: 4030 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 4031 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4032 4033 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 4034 self.raise_error("Expected ]") 4035 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 4036 self.raise_error("Expected }") 4037 4038 self._add_comments(this) 4039 return self._parse_bracket(this) 4040 4041 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4042 if self._match(TokenType.COLON): 4043 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4044 return this 4045 4046 def _parse_case(self) -> t.Optional[exp.Expression]: 4047 ifs = [] 4048 default = None 4049 4050 comments = self._prev_comments 4051 expression = self._parse_conjunction() 4052 4053 while self._match(TokenType.WHEN): 4054 this = self._parse_conjunction() 4055 self._match(TokenType.THEN) 4056 then = self._parse_conjunction() 4057 ifs.append(self.expression(exp.If, this=this, true=then)) 4058 4059 if self._match(TokenType.ELSE): 4060 default = self._parse_conjunction() 4061 4062 if not self._match(TokenType.END): 4063 self.raise_error("Expected END after CASE", self._prev) 4064 4065 return self._parse_window( 4066 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 4067 ) 4068 4069 def _parse_if(self) -> t.Optional[exp.Expression]: 4070 if self._match(TokenType.L_PAREN): 4071 args = self._parse_csv(self._parse_conjunction) 4072 this = self.validate_expression(exp.If.from_arg_list(args), args) 4073 self._match_r_paren() 4074 else: 4075 index = self._index - 1 4076 condition = self._parse_conjunction() 4077 4078 if not condition: 4079 self._retreat(index) 4080 return None 4081 4082 self._match(TokenType.THEN) 4083 true = self._parse_conjunction() 4084 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4085 self._match(TokenType.END) 4086 this = self.expression(exp.If, this=condition, true=true, false=false) 4087 4088 return self._parse_window(this) 4089 4090 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4091 if not self._match_text_seq("VALUE", "FOR"): 4092 self._retreat(self._index - 1) 4093 return None 4094 4095 return self.expression( 4096 exp.NextValueFor, 4097 this=self._parse_column(), 4098 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4099 ) 4100 4101 def _parse_extract(self) -> exp.Extract: 4102 this = self._parse_function() or self._parse_var() or self._parse_type() 4103 4104 if self._match(TokenType.FROM): 4105 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4106 4107 if not self._match(TokenType.COMMA): 4108 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4109 4110 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4111 4112 def _parse_any_value(self) -> exp.AnyValue: 4113 this = self._parse_lambda() 4114 is_max = None 4115 having = None 4116 4117 if self._match(TokenType.HAVING): 4118 self._match_texts(("MAX", "MIN")) 4119 is_max = self._prev.text == "MAX" 4120 having = self._parse_column() 4121 4122 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 4123 4124 def _parse_cast(self, strict: bool) -> exp.Expression: 4125 this = self._parse_conjunction() 4126 4127 if not self._match(TokenType.ALIAS): 4128 if self._match(TokenType.COMMA): 4129 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4130 4131 self.raise_error("Expected AS after CAST") 4132 4133 fmt = None 4134 to = self._parse_types() 4135 4136 if not to: 4137 self.raise_error("Expected TYPE after CAST") 4138 elif isinstance(to, exp.Identifier): 4139 to = exp.DataType.build(to.name, udt=True) 4140 elif to.this == exp.DataType.Type.CHAR: 4141 if self._match(TokenType.CHARACTER_SET): 4142 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4143 elif self._match(TokenType.FORMAT): 4144 fmt_string = self._parse_string() 4145 fmt = self._parse_at_time_zone(fmt_string) 4146 4147 if to.this in exp.DataType.TEMPORAL_TYPES: 4148 this = self.expression( 4149 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4150 this=this, 4151 format=exp.Literal.string( 4152 format_time( 4153 fmt_string.this if fmt_string else "", 4154 self.FORMAT_MAPPING or self.TIME_MAPPING, 4155 self.FORMAT_TRIE or self.TIME_TRIE, 4156 ) 4157 ), 4158 ) 4159 4160 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4161 this.set("zone", fmt.args["zone"]) 4162 4163 return this 4164 4165 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 4166 4167 def _parse_concat(self) -> t.Optional[exp.Expression]: 4168 args = self._parse_csv(self._parse_conjunction) 4169 if self.CONCAT_NULL_OUTPUTS_STRING: 4170 args = self._ensure_string_if_null(args) 4171 4172 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 4173 # we find such a call we replace it with its argument. 4174 if len(args) == 1: 4175 return args[0] 4176 4177 return self.expression( 4178 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4179 ) 4180 4181 def _parse_concat_ws(self) -> t.Optional[exp.Expression]: 4182 args = self._parse_csv(self._parse_conjunction) 4183 if len(args) < 2: 4184 return self.expression(exp.ConcatWs, expressions=args) 4185 delim, *values = args 4186 if self.CONCAT_NULL_OUTPUTS_STRING: 4187 values = self._ensure_string_if_null(values) 4188 4189 return self.expression(exp.ConcatWs, expressions=[delim] + values) 4190 4191 def _parse_string_agg(self) -> exp.Expression: 4192 if self._match(TokenType.DISTINCT): 4193 args: t.List[t.Optional[exp.Expression]] = [ 4194 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4195 ] 4196 if self._match(TokenType.COMMA): 4197 args.extend(self._parse_csv(self._parse_conjunction)) 4198 else: 4199 args = self._parse_csv(self._parse_conjunction) # type: ignore 4200 4201 index = self._index 4202 if not self._match(TokenType.R_PAREN) and args: 4203 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4204 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4205 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4206 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4207 4208 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4209 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4210 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4211 if not self._match_text_seq("WITHIN", "GROUP"): 4212 self._retreat(index) 4213 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4214 4215 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4216 order = self._parse_order(this=seq_get(args, 0)) 4217 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4218 4219 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 4220 this = self._parse_bitwise() 4221 4222 if self._match(TokenType.USING): 4223 to: t.Optional[exp.Expression] = self.expression( 4224 exp.CharacterSet, this=self._parse_var() 4225 ) 4226 elif self._match(TokenType.COMMA): 4227 to = self._parse_types() 4228 else: 4229 to = None 4230 4231 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 4232 4233 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4234 """ 4235 There are generally two variants of the DECODE function: 4236 4237 - DECODE(bin, charset) 4238 - DECODE(expression, search, result [, search, result] ... [, default]) 4239 4240 The second variant will always be parsed into a CASE expression. Note that NULL 4241 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4242 instead of relying on pattern matching. 4243 """ 4244 args = self._parse_csv(self._parse_conjunction) 4245 4246 if len(args) < 3: 4247 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4248 4249 expression, *expressions = args 4250 if not expression: 4251 return None 4252 4253 ifs = [] 4254 for search, result in zip(expressions[::2], expressions[1::2]): 4255 if not search or not result: 4256 return None 4257 4258 if isinstance(search, exp.Literal): 4259 ifs.append( 4260 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4261 ) 4262 elif isinstance(search, exp.Null): 4263 ifs.append( 4264 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4265 ) 4266 else: 4267 cond = exp.or_( 4268 exp.EQ(this=expression.copy(), expression=search), 4269 exp.and_( 4270 exp.Is(this=expression.copy(), expression=exp.Null()), 4271 exp.Is(this=search.copy(), expression=exp.Null()), 4272 copy=False, 4273 ), 4274 copy=False, 4275 ) 4276 ifs.append(exp.If(this=cond, true=result)) 4277 4278 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4279 4280 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4281 self._match_text_seq("KEY") 4282 key = self._parse_column() 4283 self._match_set((TokenType.COLON, TokenType.COMMA)) 4284 self._match_text_seq("VALUE") 4285 value = self._parse_bitwise() 4286 4287 if not key and not value: 4288 return None 4289 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4290 4291 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4292 if not this or not self._match_text_seq("FORMAT", "JSON"): 4293 return this 4294 4295 return self.expression(exp.FormatJson, this=this) 4296 4297 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 4298 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 4299 for value in values: 4300 if self._match_text_seq(value, "ON", on): 4301 return f"{value} ON {on}" 4302 4303 return None 4304 4305 def _parse_json_object(self) -> exp.JSONObject: 4306 star = self._parse_star() 4307 expressions = ( 4308 [star] 4309 if star 4310 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 4311 ) 4312 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 4313 4314 unique_keys = None 4315 if self._match_text_seq("WITH", "UNIQUE"): 4316 unique_keys = True 4317 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4318 unique_keys = False 4319 4320 self._match_text_seq("KEYS") 4321 4322 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 4323 self._parse_type() 4324 ) 4325 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4326 4327 return self.expression( 4328 exp.JSONObject, 4329 expressions=expressions, 4330 null_handling=null_handling, 4331 unique_keys=unique_keys, 4332 return_type=return_type, 4333 encoding=encoding, 4334 ) 4335 4336 def _parse_logarithm(self) -> exp.Func: 4337 # Default argument order is base, expression 4338 args = self._parse_csv(self._parse_range) 4339 4340 if len(args) > 1: 4341 if not self.LOG_BASE_FIRST: 4342 args.reverse() 4343 return exp.Log.from_arg_list(args) 4344 4345 return self.expression( 4346 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4347 ) 4348 4349 def _parse_match_against(self) -> exp.MatchAgainst: 4350 expressions = self._parse_csv(self._parse_column) 4351 4352 self._match_text_seq(")", "AGAINST", "(") 4353 4354 this = self._parse_string() 4355 4356 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4357 modifier = "IN NATURAL LANGUAGE MODE" 4358 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4359 modifier = f"{modifier} WITH QUERY EXPANSION" 4360 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4361 modifier = "IN BOOLEAN MODE" 4362 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4363 modifier = "WITH QUERY EXPANSION" 4364 else: 4365 modifier = None 4366 4367 return self.expression( 4368 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4369 ) 4370 4371 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4372 def _parse_open_json(self) -> exp.OpenJSON: 4373 this = self._parse_bitwise() 4374 path = self._match(TokenType.COMMA) and self._parse_string() 4375 4376 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4377 this = self._parse_field(any_token=True) 4378 kind = self._parse_types() 4379 path = self._parse_string() 4380 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4381 4382 return self.expression( 4383 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4384 ) 4385 4386 expressions = None 4387 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4388 self._match_l_paren() 4389 expressions = self._parse_csv(_parse_open_json_column_def) 4390 4391 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4392 4393 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4394 args = self._parse_csv(self._parse_bitwise) 4395 4396 if self._match(TokenType.IN): 4397 return self.expression( 4398 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4399 ) 4400 4401 if haystack_first: 4402 haystack = seq_get(args, 0) 4403 needle = seq_get(args, 1) 4404 else: 4405 needle = seq_get(args, 0) 4406 haystack = seq_get(args, 1) 4407 4408 return self.expression( 4409 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4410 ) 4411 4412 def _parse_predict(self) -> exp.Predict: 4413 self._match_text_seq("MODEL") 4414 this = self._parse_table() 4415 4416 self._match(TokenType.COMMA) 4417 self._match_text_seq("TABLE") 4418 4419 return self.expression( 4420 exp.Predict, 4421 this=this, 4422 expression=self._parse_table(), 4423 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 4424 ) 4425 4426 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4427 args = self._parse_csv(self._parse_table) 4428 return exp.JoinHint(this=func_name.upper(), expressions=args) 4429 4430 def _parse_substring(self) -> exp.Substring: 4431 # Postgres supports the form: substring(string [from int] [for int]) 4432 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4433 4434 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4435 4436 if self._match(TokenType.FROM): 4437 args.append(self._parse_bitwise()) 4438 if self._match(TokenType.FOR): 4439 args.append(self._parse_bitwise()) 4440 4441 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4442 4443 def _parse_trim(self) -> exp.Trim: 4444 # https://www.w3resource.com/sql/character-functions/trim.php 4445 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4446 4447 position = None 4448 collation = None 4449 expression = None 4450 4451 if self._match_texts(self.TRIM_TYPES): 4452 position = self._prev.text.upper() 4453 4454 this = self._parse_bitwise() 4455 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4456 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 4457 expression = self._parse_bitwise() 4458 4459 if invert_order: 4460 this, expression = expression, this 4461 4462 if self._match(TokenType.COLLATE): 4463 collation = self._parse_bitwise() 4464 4465 return self.expression( 4466 exp.Trim, this=this, position=position, expression=expression, collation=collation 4467 ) 4468 4469 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4470 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4471 4472 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4473 return self._parse_window(self._parse_id_var(), alias=True) 4474 4475 def _parse_respect_or_ignore_nulls( 4476 self, this: t.Optional[exp.Expression] 4477 ) -> t.Optional[exp.Expression]: 4478 if self._match_text_seq("IGNORE", "NULLS"): 4479 return self.expression(exp.IgnoreNulls, this=this) 4480 if self._match_text_seq("RESPECT", "NULLS"): 4481 return self.expression(exp.RespectNulls, this=this) 4482 return this 4483 4484 def _parse_window( 4485 self, this: t.Optional[exp.Expression], alias: bool = False 4486 ) -> t.Optional[exp.Expression]: 4487 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4488 self._match(TokenType.WHERE) 4489 this = self.expression( 4490 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4491 ) 4492 self._match_r_paren() 4493 4494 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4495 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4496 if self._match_text_seq("WITHIN", "GROUP"): 4497 order = self._parse_wrapped(self._parse_order) 4498 this = self.expression(exp.WithinGroup, this=this, expression=order) 4499 4500 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4501 # Some dialects choose to implement and some do not. 4502 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4503 4504 # There is some code above in _parse_lambda that handles 4505 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4506 4507 # The below changes handle 4508 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4509 4510 # Oracle allows both formats 4511 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4512 # and Snowflake chose to do the same for familiarity 4513 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4514 this = self._parse_respect_or_ignore_nulls(this) 4515 4516 # bigquery select from window x AS (partition by ...) 4517 if alias: 4518 over = None 4519 self._match(TokenType.ALIAS) 4520 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4521 return this 4522 else: 4523 over = self._prev.text.upper() 4524 4525 if not self._match(TokenType.L_PAREN): 4526 return self.expression( 4527 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4528 ) 4529 4530 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4531 4532 first = self._match(TokenType.FIRST) 4533 if self._match_text_seq("LAST"): 4534 first = False 4535 4536 partition, order = self._parse_partition_and_order() 4537 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4538 4539 if kind: 4540 self._match(TokenType.BETWEEN) 4541 start = self._parse_window_spec() 4542 self._match(TokenType.AND) 4543 end = self._parse_window_spec() 4544 4545 spec = self.expression( 4546 exp.WindowSpec, 4547 kind=kind, 4548 start=start["value"], 4549 start_side=start["side"], 4550 end=end["value"], 4551 end_side=end["side"], 4552 ) 4553 else: 4554 spec = None 4555 4556 self._match_r_paren() 4557 4558 window = self.expression( 4559 exp.Window, 4560 this=this, 4561 partition_by=partition, 4562 order=order, 4563 spec=spec, 4564 alias=window_alias, 4565 over=over, 4566 first=first, 4567 ) 4568 4569 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4570 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4571 return self._parse_window(window, alias=alias) 4572 4573 return window 4574 4575 def _parse_partition_and_order( 4576 self, 4577 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4578 return self._parse_partition_by(), self._parse_order() 4579 4580 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4581 self._match(TokenType.BETWEEN) 4582 4583 return { 4584 "value": ( 4585 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4586 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4587 or self._parse_bitwise() 4588 ), 4589 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4590 } 4591 4592 def _parse_alias( 4593 self, this: t.Optional[exp.Expression], explicit: bool = False 4594 ) -> t.Optional[exp.Expression]: 4595 any_token = self._match(TokenType.ALIAS) 4596 4597 if explicit and not any_token: 4598 return this 4599 4600 if self._match(TokenType.L_PAREN): 4601 aliases = self.expression( 4602 exp.Aliases, 4603 this=this, 4604 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4605 ) 4606 self._match_r_paren(aliases) 4607 return aliases 4608 4609 alias = self._parse_id_var(any_token) 4610 4611 if alias: 4612 return self.expression(exp.Alias, this=this, alias=alias) 4613 4614 return this 4615 4616 def _parse_id_var( 4617 self, 4618 any_token: bool = True, 4619 tokens: t.Optional[t.Collection[TokenType]] = None, 4620 ) -> t.Optional[exp.Expression]: 4621 identifier = self._parse_identifier() 4622 4623 if identifier: 4624 return identifier 4625 4626 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4627 quoted = self._prev.token_type == TokenType.STRING 4628 return exp.Identifier(this=self._prev.text, quoted=quoted) 4629 4630 return None 4631 4632 def _parse_string(self) -> t.Optional[exp.Expression]: 4633 if self._match(TokenType.STRING): 4634 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4635 return self._parse_placeholder() 4636 4637 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4638 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4639 4640 def _parse_number(self) -> t.Optional[exp.Expression]: 4641 if self._match(TokenType.NUMBER): 4642 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4643 return self._parse_placeholder() 4644 4645 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4646 if self._match(TokenType.IDENTIFIER): 4647 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4648 return self._parse_placeholder() 4649 4650 def _parse_var( 4651 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4652 ) -> t.Optional[exp.Expression]: 4653 if ( 4654 (any_token and self._advance_any()) 4655 or self._match(TokenType.VAR) 4656 or (self._match_set(tokens) if tokens else False) 4657 ): 4658 return self.expression(exp.Var, this=self._prev.text) 4659 return self._parse_placeholder() 4660 4661 def _advance_any(self) -> t.Optional[Token]: 4662 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4663 self._advance() 4664 return self._prev 4665 return None 4666 4667 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4668 return self._parse_var() or self._parse_string() 4669 4670 def _parse_null(self) -> t.Optional[exp.Expression]: 4671 if self._match_set(self.NULL_TOKENS): 4672 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4673 return self._parse_placeholder() 4674 4675 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4676 if self._match(TokenType.TRUE): 4677 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4678 if self._match(TokenType.FALSE): 4679 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4680 return self._parse_placeholder() 4681 4682 def _parse_star(self) -> t.Optional[exp.Expression]: 4683 if self._match(TokenType.STAR): 4684 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4685 return self._parse_placeholder() 4686 4687 def _parse_parameter(self) -> exp.Parameter: 4688 wrapped = self._match(TokenType.L_BRACE) 4689 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4690 self._match(TokenType.R_BRACE) 4691 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4692 4693 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4694 if self._match_set(self.PLACEHOLDER_PARSERS): 4695 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4696 if placeholder: 4697 return placeholder 4698 self._advance(-1) 4699 return None 4700 4701 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4702 if not self._match(TokenType.EXCEPT): 4703 return None 4704 if self._match(TokenType.L_PAREN, advance=False): 4705 return self._parse_wrapped_csv(self._parse_column) 4706 4707 except_column = self._parse_column() 4708 return [except_column] if except_column else None 4709 4710 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4711 if not self._match(TokenType.REPLACE): 4712 return None 4713 if self._match(TokenType.L_PAREN, advance=False): 4714 return self._parse_wrapped_csv(self._parse_expression) 4715 4716 replace_expression = self._parse_expression() 4717 return [replace_expression] if replace_expression else None 4718 4719 def _parse_csv( 4720 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4721 ) -> t.List[exp.Expression]: 4722 parse_result = parse_method() 4723 items = [parse_result] if parse_result is not None else [] 4724 4725 while self._match(sep): 4726 self._add_comments(parse_result) 4727 parse_result = parse_method() 4728 if parse_result is not None: 4729 items.append(parse_result) 4730 4731 return items 4732 4733 def _parse_tokens( 4734 self, parse_method: t.Callable, expressions: t.Dict 4735 ) -> t.Optional[exp.Expression]: 4736 this = parse_method() 4737 4738 while self._match_set(expressions): 4739 this = self.expression( 4740 expressions[self._prev.token_type], 4741 this=this, 4742 comments=self._prev_comments, 4743 expression=parse_method(), 4744 ) 4745 4746 return this 4747 4748 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4749 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4750 4751 def _parse_wrapped_csv( 4752 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4753 ) -> t.List[exp.Expression]: 4754 return self._parse_wrapped( 4755 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4756 ) 4757 4758 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4759 wrapped = self._match(TokenType.L_PAREN) 4760 if not wrapped and not optional: 4761 self.raise_error("Expecting (") 4762 parse_result = parse_method() 4763 if wrapped: 4764 self._match_r_paren() 4765 return parse_result 4766 4767 def _parse_expressions(self) -> t.List[exp.Expression]: 4768 return self._parse_csv(self._parse_expression) 4769 4770 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4771 return self._parse_select() or self._parse_set_operations( 4772 self._parse_expression() if alias else self._parse_conjunction() 4773 ) 4774 4775 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4776 return self._parse_query_modifiers( 4777 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4778 ) 4779 4780 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4781 this = None 4782 if self._match_texts(self.TRANSACTION_KIND): 4783 this = self._prev.text 4784 4785 self._match_texts({"TRANSACTION", "WORK"}) 4786 4787 modes = [] 4788 while True: 4789 mode = [] 4790 while self._match(TokenType.VAR): 4791 mode.append(self._prev.text) 4792 4793 if mode: 4794 modes.append(" ".join(mode)) 4795 if not self._match(TokenType.COMMA): 4796 break 4797 4798 return self.expression(exp.Transaction, this=this, modes=modes) 4799 4800 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4801 chain = None 4802 savepoint = None 4803 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4804 4805 self._match_texts({"TRANSACTION", "WORK"}) 4806 4807 if self._match_text_seq("TO"): 4808 self._match_text_seq("SAVEPOINT") 4809 savepoint = self._parse_id_var() 4810 4811 if self._match(TokenType.AND): 4812 chain = not self._match_text_seq("NO") 4813 self._match_text_seq("CHAIN") 4814 4815 if is_rollback: 4816 return self.expression(exp.Rollback, savepoint=savepoint) 4817 4818 return self.expression(exp.Commit, chain=chain) 4819 4820 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4821 if not self._match_text_seq("ADD"): 4822 return None 4823 4824 self._match(TokenType.COLUMN) 4825 exists_column = self._parse_exists(not_=True) 4826 expression = self._parse_field_def() 4827 4828 if expression: 4829 expression.set("exists", exists_column) 4830 4831 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4832 if self._match_texts(("FIRST", "AFTER")): 4833 position = self._prev.text 4834 column_position = self.expression( 4835 exp.ColumnPosition, this=self._parse_column(), position=position 4836 ) 4837 expression.set("position", column_position) 4838 4839 return expression 4840 4841 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4842 drop = self._match(TokenType.DROP) and self._parse_drop() 4843 if drop and not isinstance(drop, exp.Command): 4844 drop.set("kind", drop.args.get("kind", "COLUMN")) 4845 return drop 4846 4847 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4848 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4849 return self.expression( 4850 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4851 ) 4852 4853 def _parse_add_constraint(self) -> exp.AddConstraint: 4854 this = None 4855 kind = self._prev.token_type 4856 4857 if kind == TokenType.CONSTRAINT: 4858 this = self._parse_id_var() 4859 4860 if self._match_text_seq("CHECK"): 4861 expression = self._parse_wrapped(self._parse_conjunction) 4862 enforced = self._match_text_seq("ENFORCED") 4863 4864 return self.expression( 4865 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4866 ) 4867 4868 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4869 expression = self._parse_foreign_key() 4870 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4871 expression = self._parse_primary_key() 4872 else: 4873 expression = None 4874 4875 return self.expression(exp.AddConstraint, this=this, expression=expression) 4876 4877 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4878 index = self._index - 1 4879 4880 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4881 return self._parse_csv(self._parse_add_constraint) 4882 4883 self._retreat(index) 4884 if not self.ALTER_TABLE_ADD_COLUMN_KEYWORD and self._match_text_seq("ADD"): 4885 return self._parse_csv(self._parse_field_def) 4886 4887 return self._parse_csv(self._parse_add_column) 4888 4889 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4890 self._match(TokenType.COLUMN) 4891 column = self._parse_field(any_token=True) 4892 4893 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4894 return self.expression(exp.AlterColumn, this=column, drop=True) 4895 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4896 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4897 4898 self._match_text_seq("SET", "DATA") 4899 return self.expression( 4900 exp.AlterColumn, 4901 this=column, 4902 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4903 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4904 using=self._match(TokenType.USING) and self._parse_conjunction(), 4905 ) 4906 4907 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 4908 index = self._index - 1 4909 4910 partition_exists = self._parse_exists() 4911 if self._match(TokenType.PARTITION, advance=False): 4912 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4913 4914 self._retreat(index) 4915 return self._parse_csv(self._parse_drop_column) 4916 4917 def _parse_alter_table_rename(self) -> exp.RenameTable: 4918 self._match_text_seq("TO") 4919 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4920 4921 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4922 start = self._prev 4923 4924 if not self._match(TokenType.TABLE): 4925 return self._parse_as_command(start) 4926 4927 exists = self._parse_exists() 4928 only = self._match_text_seq("ONLY") 4929 this = self._parse_table(schema=True) 4930 4931 if self._next: 4932 self._advance() 4933 4934 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4935 if parser: 4936 actions = ensure_list(parser(self)) 4937 4938 if not self._curr: 4939 return self.expression( 4940 exp.AlterTable, 4941 this=this, 4942 exists=exists, 4943 actions=actions, 4944 only=only, 4945 ) 4946 4947 return self._parse_as_command(start) 4948 4949 def _parse_merge(self) -> exp.Merge: 4950 self._match(TokenType.INTO) 4951 target = self._parse_table() 4952 4953 if target and self._match(TokenType.ALIAS, advance=False): 4954 target.set("alias", self._parse_table_alias()) 4955 4956 self._match(TokenType.USING) 4957 using = self._parse_table() 4958 4959 self._match(TokenType.ON) 4960 on = self._parse_conjunction() 4961 4962 whens = [] 4963 while self._match(TokenType.WHEN): 4964 matched = not self._match(TokenType.NOT) 4965 self._match_text_seq("MATCHED") 4966 source = ( 4967 False 4968 if self._match_text_seq("BY", "TARGET") 4969 else self._match_text_seq("BY", "SOURCE") 4970 ) 4971 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4972 4973 self._match(TokenType.THEN) 4974 4975 if self._match(TokenType.INSERT): 4976 _this = self._parse_star() 4977 if _this: 4978 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4979 else: 4980 then = self.expression( 4981 exp.Insert, 4982 this=self._parse_value(), 4983 expression=self._match(TokenType.VALUES) and self._parse_value(), 4984 ) 4985 elif self._match(TokenType.UPDATE): 4986 expressions = self._parse_star() 4987 if expressions: 4988 then = self.expression(exp.Update, expressions=expressions) 4989 else: 4990 then = self.expression( 4991 exp.Update, 4992 expressions=self._match(TokenType.SET) 4993 and self._parse_csv(self._parse_equality), 4994 ) 4995 elif self._match(TokenType.DELETE): 4996 then = self.expression(exp.Var, this=self._prev.text) 4997 else: 4998 then = None 4999 5000 whens.append( 5001 self.expression( 5002 exp.When, 5003 matched=matched, 5004 source=source, 5005 condition=condition, 5006 then=then, 5007 ) 5008 ) 5009 5010 return self.expression( 5011 exp.Merge, 5012 this=target, 5013 using=using, 5014 on=on, 5015 expressions=whens, 5016 ) 5017 5018 def _parse_show(self) -> t.Optional[exp.Expression]: 5019 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5020 if parser: 5021 return parser(self) 5022 return self._parse_as_command(self._prev) 5023 5024 def _parse_set_item_assignment( 5025 self, kind: t.Optional[str] = None 5026 ) -> t.Optional[exp.Expression]: 5027 index = self._index 5028 5029 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 5030 return self._parse_set_transaction(global_=kind == "GLOBAL") 5031 5032 left = self._parse_primary() or self._parse_id_var() 5033 assignment_delimiter = self._match_texts(("=", "TO")) 5034 5035 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5036 self._retreat(index) 5037 return None 5038 5039 right = self._parse_statement() or self._parse_id_var() 5040 this = self.expression(exp.EQ, this=left, expression=right) 5041 5042 return self.expression(exp.SetItem, this=this, kind=kind) 5043 5044 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5045 self._match_text_seq("TRANSACTION") 5046 characteristics = self._parse_csv( 5047 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5048 ) 5049 return self.expression( 5050 exp.SetItem, 5051 expressions=characteristics, 5052 kind="TRANSACTION", 5053 **{"global": global_}, # type: ignore 5054 ) 5055 5056 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5057 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5058 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5059 5060 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5061 index = self._index 5062 set_ = self.expression( 5063 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5064 ) 5065 5066 if self._curr: 5067 self._retreat(index) 5068 return self._parse_as_command(self._prev) 5069 5070 return set_ 5071 5072 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 5073 for option in options: 5074 if self._match_text_seq(*option.split(" ")): 5075 return exp.var(option) 5076 return None 5077 5078 def _parse_as_command(self, start: Token) -> exp.Command: 5079 while self._curr: 5080 self._advance() 5081 text = self._find_sql(start, self._prev) 5082 size = len(start.text) 5083 return exp.Command(this=text[:size], expression=text[size:]) 5084 5085 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5086 settings = [] 5087 5088 self._match_l_paren() 5089 kind = self._parse_id_var() 5090 5091 if self._match(TokenType.L_PAREN): 5092 while True: 5093 key = self._parse_id_var() 5094 value = self._parse_primary() 5095 5096 if not key and value is None: 5097 break 5098 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 5099 self._match(TokenType.R_PAREN) 5100 5101 self._match_r_paren() 5102 5103 return self.expression( 5104 exp.DictProperty, 5105 this=this, 5106 kind=kind.this if kind else None, 5107 settings=settings, 5108 ) 5109 5110 def _parse_dict_range(self, this: str) -> exp.DictRange: 5111 self._match_l_paren() 5112 has_min = self._match_text_seq("MIN") 5113 if has_min: 5114 min = self._parse_var() or self._parse_primary() 5115 self._match_text_seq("MAX") 5116 max = self._parse_var() or self._parse_primary() 5117 else: 5118 max = self._parse_var() or self._parse_primary() 5119 min = exp.Literal.number(0) 5120 self._match_r_paren() 5121 return self.expression(exp.DictRange, this=this, min=min, max=max) 5122 5123 def _parse_comprehension(self, this: exp.Expression) -> t.Optional[exp.Comprehension]: 5124 index = self._index 5125 expression = self._parse_column() 5126 if not self._match(TokenType.IN): 5127 self._retreat(index - 1) 5128 return None 5129 iterator = self._parse_column() 5130 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 5131 return self.expression( 5132 exp.Comprehension, 5133 this=this, 5134 expression=expression, 5135 iterator=iterator, 5136 condition=condition, 5137 ) 5138 5139 def _find_parser( 5140 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 5141 ) -> t.Optional[t.Callable]: 5142 if not self._curr: 5143 return None 5144 5145 index = self._index 5146 this = [] 5147 while True: 5148 # The current token might be multiple words 5149 curr = self._curr.text.upper() 5150 key = curr.split(" ") 5151 this.append(curr) 5152 5153 self._advance() 5154 result, trie = in_trie(trie, key) 5155 if result == TrieResult.FAILED: 5156 break 5157 5158 if result == TrieResult.EXISTS: 5159 subparser = parsers[" ".join(this)] 5160 return subparser 5161 5162 self._retreat(index) 5163 return None 5164 5165 def _match(self, token_type, advance=True, expression=None): 5166 if not self._curr: 5167 return None 5168 5169 if self._curr.token_type == token_type: 5170 if advance: 5171 self._advance() 5172 self._add_comments(expression) 5173 return True 5174 5175 return None 5176 5177 def _match_set(self, types, advance=True): 5178 if not self._curr: 5179 return None 5180 5181 if self._curr.token_type in types: 5182 if advance: 5183 self._advance() 5184 return True 5185 5186 return None 5187 5188 def _match_pair(self, token_type_a, token_type_b, advance=True): 5189 if not self._curr or not self._next: 5190 return None 5191 5192 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 5193 if advance: 5194 self._advance(2) 5195 return True 5196 5197 return None 5198 5199 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5200 if not self._match(TokenType.L_PAREN, expression=expression): 5201 self.raise_error("Expecting (") 5202 5203 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 5204 if not self._match(TokenType.R_PAREN, expression=expression): 5205 self.raise_error("Expecting )") 5206 5207 def _match_texts(self, texts, advance=True): 5208 if self._curr and self._curr.text.upper() in texts: 5209 if advance: 5210 self._advance() 5211 return True 5212 return False 5213 5214 def _match_text_seq(self, *texts, advance=True): 5215 index = self._index 5216 for text in texts: 5217 if self._curr and self._curr.text.upper() == text: 5218 self._advance() 5219 else: 5220 self._retreat(index) 5221 return False 5222 5223 if not advance: 5224 self._retreat(index) 5225 5226 return True 5227 5228 @t.overload 5229 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 5230 ... 5231 5232 @t.overload 5233 def _replace_columns_with_dots( 5234 self, this: t.Optional[exp.Expression] 5235 ) -> t.Optional[exp.Expression]: 5236 ... 5237 5238 def _replace_columns_with_dots(self, this): 5239 if isinstance(this, exp.Dot): 5240 exp.replace_children(this, self._replace_columns_with_dots) 5241 elif isinstance(this, exp.Column): 5242 exp.replace_children(this, self._replace_columns_with_dots) 5243 table = this.args.get("table") 5244 this = ( 5245 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5246 ) 5247 5248 return this 5249 5250 def _replace_lambda( 5251 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5252 ) -> t.Optional[exp.Expression]: 5253 if not node: 5254 return node 5255 5256 for column in node.find_all(exp.Column): 5257 if column.parts[0].name in lambda_variables: 5258 dot_or_id = column.to_dot() if column.table else column.this 5259 parent = column.parent 5260 5261 while isinstance(parent, exp.Dot): 5262 if not isinstance(parent.parent, exp.Dot): 5263 parent.replace(dot_or_id) 5264 break 5265 parent = parent.parent 5266 else: 5267 if column is node: 5268 node = dot_or_id 5269 else: 5270 column.replace(dot_or_id) 5271 return node 5272 5273 def _ensure_string_if_null(self, values: t.List[exp.Expression]) -> t.List[exp.Expression]: 5274 return [ 5275 exp.func("COALESCE", exp.cast(value, "text"), exp.Literal.string("")) 5276 for value in values 5277 if value 5278 ]
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
936 def __init__( 937 self, 938 error_level: t.Optional[ErrorLevel] = None, 939 error_message_context: int = 100, 940 max_errors: int = 3, 941 ): 942 self.error_level = error_level or ErrorLevel.IMMEDIATE 943 self.error_message_context = error_message_context 944 self.max_errors = max_errors 945 self._tokenizer = self.TOKENIZER_CLASS() 946 self.reset()
958 def parse( 959 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 960 ) -> t.List[t.Optional[exp.Expression]]: 961 """ 962 Parses a list of tokens and returns a list of syntax trees, one tree 963 per parsed SQL statement. 964 965 Args: 966 raw_tokens: The list of tokens. 967 sql: The original SQL string, used to produce helpful debug messages. 968 969 Returns: 970 The list of the produced syntax trees. 971 """ 972 return self._parse( 973 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 974 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
976 def parse_into( 977 self, 978 expression_types: exp.IntoType, 979 raw_tokens: t.List[Token], 980 sql: t.Optional[str] = None, 981 ) -> t.List[t.Optional[exp.Expression]]: 982 """ 983 Parses a list of tokens into a given Expression type. If a collection of Expression 984 types is given instead, this method will try to parse the token list into each one 985 of them, stopping at the first for which the parsing succeeds. 986 987 Args: 988 expression_types: The expression type(s) to try and parse the token list into. 989 raw_tokens: The list of tokens. 990 sql: The original SQL string, used to produce helpful debug messages. 991 992 Returns: 993 The target Expression. 994 """ 995 errors = [] 996 for expression_type in ensure_list(expression_types): 997 parser = self.EXPRESSION_PARSERS.get(expression_type) 998 if not parser: 999 raise TypeError(f"No parser registered for {expression_type}") 1000 1001 try: 1002 return self._parse(parser, raw_tokens, sql) 1003 except ParseError as e: 1004 e.errors[0]["into_expression"] = expression_type 1005 errors.append(e) 1006 1007 raise ParseError( 1008 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1009 errors=merge_errors(errors), 1010 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1047 def check_errors(self) -> None: 1048 """Logs or raises any found errors, depending on the chosen error level setting.""" 1049 if self.error_level == ErrorLevel.WARN: 1050 for error in self.errors: 1051 logger.error(str(error)) 1052 elif self.error_level == ErrorLevel.RAISE and self.errors: 1053 raise ParseError( 1054 concat_messages(self.errors, self.max_errors), 1055 errors=merge_errors(self.errors), 1056 )
Logs or raises any found errors, depending on the chosen error level setting.
1058 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1059 """ 1060 Appends an error in the list of recorded errors or raises it, depending on the chosen 1061 error level setting. 1062 """ 1063 token = token or self._curr or self._prev or Token.string("") 1064 start = token.start 1065 end = token.end + 1 1066 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1067 highlight = self.sql[start:end] 1068 end_context = self.sql[end : end + self.error_message_context] 1069 1070 error = ParseError.new( 1071 f"{message}. Line {token.line}, Col: {token.col}.\n" 1072 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1073 description=message, 1074 line=token.line, 1075 col=token.col, 1076 start_context=start_context, 1077 highlight=highlight, 1078 end_context=end_context, 1079 ) 1080 1081 if self.error_level == ErrorLevel.IMMEDIATE: 1082 raise error 1083 1084 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1086 def expression( 1087 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1088 ) -> E: 1089 """ 1090 Creates a new, validated Expression. 1091 1092 Args: 1093 exp_class: The expression class to instantiate. 1094 comments: An optional list of comments to attach to the expression. 1095 kwargs: The arguments to set for the expression along with their respective values. 1096 1097 Returns: 1098 The target expression. 1099 """ 1100 instance = exp_class(**kwargs) 1101 instance.add_comments(comments) if comments else self._add_comments(instance) 1102 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1109 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1110 """ 1111 Validates an Expression, making sure that all its mandatory arguments are set. 1112 1113 Args: 1114 expression: The expression to validate. 1115 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1116 1117 Returns: 1118 The validated expression. 1119 """ 1120 if self.error_level != ErrorLevel.IGNORE: 1121 for error_message in expression.error_messages(args): 1122 self.raise_error(error_message) 1123 1124 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.