sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 NESTED_TYPE_TOKENS = { 106 TokenType.ARRAY, 107 TokenType.MAP, 108 TokenType.NULLABLE, 109 TokenType.STRUCT, 110 } 111 112 ENUM_TYPE_TOKENS = { 113 TokenType.ENUM, 114 } 115 116 TYPE_TOKENS = { 117 TokenType.BIT, 118 TokenType.BOOLEAN, 119 TokenType.TINYINT, 120 TokenType.UTINYINT, 121 TokenType.SMALLINT, 122 TokenType.USMALLINT, 123 TokenType.INT, 124 TokenType.UINT, 125 TokenType.BIGINT, 126 TokenType.UBIGINT, 127 TokenType.INT128, 128 TokenType.UINT128, 129 TokenType.INT256, 130 TokenType.UINT256, 131 TokenType.FLOAT, 132 TokenType.DOUBLE, 133 TokenType.CHAR, 134 TokenType.NCHAR, 135 TokenType.VARCHAR, 136 TokenType.NVARCHAR, 137 TokenType.TEXT, 138 TokenType.MEDIUMTEXT, 139 TokenType.LONGTEXT, 140 TokenType.MEDIUMBLOB, 141 TokenType.LONGBLOB, 142 TokenType.BINARY, 143 TokenType.VARBINARY, 144 TokenType.JSON, 145 TokenType.JSONB, 146 TokenType.INTERVAL, 147 TokenType.TIME, 148 TokenType.TIMESTAMP, 149 TokenType.TIMESTAMPTZ, 150 TokenType.TIMESTAMPLTZ, 151 TokenType.DATETIME, 152 TokenType.DATETIME64, 153 TokenType.DATE, 154 TokenType.INT4RANGE, 155 TokenType.INT4MULTIRANGE, 156 TokenType.INT8RANGE, 157 TokenType.INT8MULTIRANGE, 158 TokenType.NUMRANGE, 159 TokenType.NUMMULTIRANGE, 160 TokenType.TSRANGE, 161 TokenType.TSMULTIRANGE, 162 TokenType.TSTZRANGE, 163 TokenType.TSTZMULTIRANGE, 164 TokenType.DATERANGE, 165 TokenType.DATEMULTIRANGE, 166 TokenType.DECIMAL, 167 TokenType.BIGDECIMAL, 168 TokenType.UUID, 169 TokenType.GEOGRAPHY, 170 TokenType.GEOMETRY, 171 TokenType.HLLSKETCH, 172 TokenType.HSTORE, 173 TokenType.PSEUDO_TYPE, 174 TokenType.SUPER, 175 TokenType.SERIAL, 176 TokenType.SMALLSERIAL, 177 TokenType.BIGSERIAL, 178 TokenType.XML, 179 TokenType.UNIQUEIDENTIFIER, 180 TokenType.USERDEFINED, 181 TokenType.MONEY, 182 TokenType.SMALLMONEY, 183 TokenType.ROWVERSION, 184 TokenType.IMAGE, 185 TokenType.VARIANT, 186 TokenType.OBJECT, 187 TokenType.INET, 188 TokenType.ENUM, 189 *NESTED_TYPE_TOKENS, 190 } 191 192 SUBQUERY_PREDICATES = { 193 TokenType.ANY: exp.Any, 194 TokenType.ALL: exp.All, 195 TokenType.EXISTS: exp.Exists, 196 TokenType.SOME: exp.Any, 197 } 198 199 RESERVED_KEYWORDS = { 200 *Tokenizer.SINGLE_TOKENS.values(), 201 TokenType.SELECT, 202 } 203 204 DB_CREATABLES = { 205 TokenType.DATABASE, 206 TokenType.SCHEMA, 207 TokenType.TABLE, 208 TokenType.VIEW, 209 TokenType.DICTIONARY, 210 } 211 212 CREATABLES = { 213 TokenType.COLUMN, 214 TokenType.FUNCTION, 215 TokenType.INDEX, 216 TokenType.PROCEDURE, 217 *DB_CREATABLES, 218 } 219 220 # Tokens that can represent identifiers 221 ID_VAR_TOKENS = { 222 TokenType.VAR, 223 TokenType.ANTI, 224 TokenType.APPLY, 225 TokenType.ASC, 226 TokenType.AUTO_INCREMENT, 227 TokenType.BEGIN, 228 TokenType.CACHE, 229 TokenType.CASE, 230 TokenType.COLLATE, 231 TokenType.COMMAND, 232 TokenType.COMMENT, 233 TokenType.COMMIT, 234 TokenType.CONSTRAINT, 235 TokenType.DEFAULT, 236 TokenType.DELETE, 237 TokenType.DESC, 238 TokenType.DESCRIBE, 239 TokenType.DICTIONARY, 240 TokenType.DIV, 241 TokenType.END, 242 TokenType.EXECUTE, 243 TokenType.ESCAPE, 244 TokenType.FALSE, 245 TokenType.FIRST, 246 TokenType.FILTER, 247 TokenType.FORMAT, 248 TokenType.FULL, 249 TokenType.IF, 250 TokenType.IS, 251 TokenType.ISNULL, 252 TokenType.INTERVAL, 253 TokenType.KEEP, 254 TokenType.LEFT, 255 TokenType.LOAD, 256 TokenType.MERGE, 257 TokenType.NATURAL, 258 TokenType.NEXT, 259 TokenType.OFFSET, 260 TokenType.ORDINALITY, 261 TokenType.OVERWRITE, 262 TokenType.PARTITION, 263 TokenType.PERCENT, 264 TokenType.PIVOT, 265 TokenType.PRAGMA, 266 TokenType.RANGE, 267 TokenType.REFERENCES, 268 TokenType.RIGHT, 269 TokenType.ROW, 270 TokenType.ROWS, 271 TokenType.SEMI, 272 TokenType.SET, 273 TokenType.SETTINGS, 274 TokenType.SHOW, 275 TokenType.TEMPORARY, 276 TokenType.TOP, 277 TokenType.TRUE, 278 TokenType.UNIQUE, 279 TokenType.UNPIVOT, 280 TokenType.UPDATE, 281 TokenType.VOLATILE, 282 TokenType.WINDOW, 283 *CREATABLES, 284 *SUBQUERY_PREDICATES, 285 *TYPE_TOKENS, 286 *NO_PAREN_FUNCTIONS, 287 } 288 289 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 290 291 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 292 TokenType.APPLY, 293 TokenType.ASOF, 294 TokenType.FULL, 295 TokenType.LEFT, 296 TokenType.LOCK, 297 TokenType.NATURAL, 298 TokenType.OFFSET, 299 TokenType.RIGHT, 300 TokenType.WINDOW, 301 } 302 303 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 304 305 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 306 307 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 308 309 FUNC_TOKENS = { 310 TokenType.COMMAND, 311 TokenType.CURRENT_DATE, 312 TokenType.CURRENT_DATETIME, 313 TokenType.CURRENT_TIMESTAMP, 314 TokenType.CURRENT_TIME, 315 TokenType.CURRENT_USER, 316 TokenType.FILTER, 317 TokenType.FIRST, 318 TokenType.FORMAT, 319 TokenType.GLOB, 320 TokenType.IDENTIFIER, 321 TokenType.INDEX, 322 TokenType.ISNULL, 323 TokenType.ILIKE, 324 TokenType.LIKE, 325 TokenType.MERGE, 326 TokenType.OFFSET, 327 TokenType.PRIMARY_KEY, 328 TokenType.RANGE, 329 TokenType.REPLACE, 330 TokenType.ROW, 331 TokenType.UNNEST, 332 TokenType.VAR, 333 TokenType.LEFT, 334 TokenType.RIGHT, 335 TokenType.DATE, 336 TokenType.DATETIME, 337 TokenType.TABLE, 338 TokenType.TIMESTAMP, 339 TokenType.TIMESTAMPTZ, 340 TokenType.WINDOW, 341 *TYPE_TOKENS, 342 *SUBQUERY_PREDICATES, 343 } 344 345 CONJUNCTION = { 346 TokenType.AND: exp.And, 347 TokenType.OR: exp.Or, 348 } 349 350 EQUALITY = { 351 TokenType.EQ: exp.EQ, 352 TokenType.NEQ: exp.NEQ, 353 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 354 } 355 356 COMPARISON = { 357 TokenType.GT: exp.GT, 358 TokenType.GTE: exp.GTE, 359 TokenType.LT: exp.LT, 360 TokenType.LTE: exp.LTE, 361 } 362 363 BITWISE = { 364 TokenType.AMP: exp.BitwiseAnd, 365 TokenType.CARET: exp.BitwiseXor, 366 TokenType.PIPE: exp.BitwiseOr, 367 TokenType.DPIPE: exp.DPipe, 368 } 369 370 TERM = { 371 TokenType.DASH: exp.Sub, 372 TokenType.PLUS: exp.Add, 373 TokenType.MOD: exp.Mod, 374 TokenType.COLLATE: exp.Collate, 375 } 376 377 FACTOR = { 378 TokenType.DIV: exp.IntDiv, 379 TokenType.LR_ARROW: exp.Distance, 380 TokenType.SLASH: exp.Div, 381 TokenType.STAR: exp.Mul, 382 } 383 384 TIMESTAMPS = { 385 TokenType.TIME, 386 TokenType.TIMESTAMP, 387 TokenType.TIMESTAMPTZ, 388 TokenType.TIMESTAMPLTZ, 389 } 390 391 SET_OPERATIONS = { 392 TokenType.UNION, 393 TokenType.INTERSECT, 394 TokenType.EXCEPT, 395 } 396 397 JOIN_METHODS = { 398 TokenType.NATURAL, 399 TokenType.ASOF, 400 } 401 402 JOIN_SIDES = { 403 TokenType.LEFT, 404 TokenType.RIGHT, 405 TokenType.FULL, 406 } 407 408 JOIN_KINDS = { 409 TokenType.INNER, 410 TokenType.OUTER, 411 TokenType.CROSS, 412 TokenType.SEMI, 413 TokenType.ANTI, 414 } 415 416 JOIN_HINTS: t.Set[str] = set() 417 418 LAMBDAS = { 419 TokenType.ARROW: lambda self, expressions: self.expression( 420 exp.Lambda, 421 this=self._replace_lambda( 422 self._parse_conjunction(), 423 {node.name for node in expressions}, 424 ), 425 expressions=expressions, 426 ), 427 TokenType.FARROW: lambda self, expressions: self.expression( 428 exp.Kwarg, 429 this=exp.var(expressions[0].name), 430 expression=self._parse_conjunction(), 431 ), 432 } 433 434 COLUMN_OPERATORS = { 435 TokenType.DOT: None, 436 TokenType.DCOLON: lambda self, this, to: self.expression( 437 exp.Cast if self.STRICT_CAST else exp.TryCast, 438 this=this, 439 to=to, 440 ), 441 TokenType.ARROW: lambda self, this, path: self.expression( 442 exp.JSONExtract, 443 this=this, 444 expression=path, 445 ), 446 TokenType.DARROW: lambda self, this, path: self.expression( 447 exp.JSONExtractScalar, 448 this=this, 449 expression=path, 450 ), 451 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 452 exp.JSONBExtract, 453 this=this, 454 expression=path, 455 ), 456 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 457 exp.JSONBExtractScalar, 458 this=this, 459 expression=path, 460 ), 461 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 462 exp.JSONBContains, 463 this=this, 464 expression=key, 465 ), 466 } 467 468 EXPRESSION_PARSERS = { 469 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 470 exp.Column: lambda self: self._parse_column(), 471 exp.Condition: lambda self: self._parse_conjunction(), 472 exp.DataType: lambda self: self._parse_types(), 473 exp.Expression: lambda self: self._parse_statement(), 474 exp.From: lambda self: self._parse_from(), 475 exp.Group: lambda self: self._parse_group(), 476 exp.Having: lambda self: self._parse_having(), 477 exp.Identifier: lambda self: self._parse_id_var(), 478 exp.Join: lambda self: self._parse_join(), 479 exp.Lambda: lambda self: self._parse_lambda(), 480 exp.Lateral: lambda self: self._parse_lateral(), 481 exp.Limit: lambda self: self._parse_limit(), 482 exp.Offset: lambda self: self._parse_offset(), 483 exp.Order: lambda self: self._parse_order(), 484 exp.Ordered: lambda self: self._parse_ordered(), 485 exp.Properties: lambda self: self._parse_properties(), 486 exp.Qualify: lambda self: self._parse_qualify(), 487 exp.Returning: lambda self: self._parse_returning(), 488 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 489 exp.Table: lambda self: self._parse_table_parts(), 490 exp.TableAlias: lambda self: self._parse_table_alias(), 491 exp.Where: lambda self: self._parse_where(), 492 exp.Window: lambda self: self._parse_named_window(), 493 exp.With: lambda self: self._parse_with(), 494 "JOIN_TYPE": lambda self: self._parse_join_parts(), 495 } 496 497 STATEMENT_PARSERS = { 498 TokenType.ALTER: lambda self: self._parse_alter(), 499 TokenType.BEGIN: lambda self: self._parse_transaction(), 500 TokenType.CACHE: lambda self: self._parse_cache(), 501 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 502 TokenType.COMMENT: lambda self: self._parse_comment(), 503 TokenType.CREATE: lambda self: self._parse_create(), 504 TokenType.DELETE: lambda self: self._parse_delete(), 505 TokenType.DESC: lambda self: self._parse_describe(), 506 TokenType.DESCRIBE: lambda self: self._parse_describe(), 507 TokenType.DROP: lambda self: self._parse_drop(), 508 TokenType.END: lambda self: self._parse_commit_or_rollback(), 509 TokenType.FROM: lambda self: exp.select("*").from_( 510 t.cast(exp.From, self._parse_from(skip_from_token=True)) 511 ), 512 TokenType.INSERT: lambda self: self._parse_insert(), 513 TokenType.LOAD: lambda self: self._parse_load(), 514 TokenType.MERGE: lambda self: self._parse_merge(), 515 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 516 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 517 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 518 TokenType.SET: lambda self: self._parse_set(), 519 TokenType.UNCACHE: lambda self: self._parse_uncache(), 520 TokenType.UPDATE: lambda self: self._parse_update(), 521 TokenType.USE: lambda self: self.expression( 522 exp.Use, 523 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 524 and exp.var(self._prev.text), 525 this=self._parse_table(schema=False), 526 ), 527 } 528 529 UNARY_PARSERS = { 530 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 531 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 532 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 533 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 534 } 535 536 PRIMARY_PARSERS = { 537 TokenType.STRING: lambda self, token: self.expression( 538 exp.Literal, this=token.text, is_string=True 539 ), 540 TokenType.NUMBER: lambda self, token: self.expression( 541 exp.Literal, this=token.text, is_string=False 542 ), 543 TokenType.STAR: lambda self, _: self.expression( 544 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 545 ), 546 TokenType.NULL: lambda self, _: self.expression(exp.Null), 547 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 548 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 549 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 550 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 551 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 552 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 553 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 554 exp.National, this=token.text 555 ), 556 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 557 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 558 } 559 560 PLACEHOLDER_PARSERS = { 561 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 562 TokenType.PARAMETER: lambda self: self._parse_parameter(), 563 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 564 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 565 else None, 566 } 567 568 RANGE_PARSERS = { 569 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 570 TokenType.GLOB: binary_range_parser(exp.Glob), 571 TokenType.ILIKE: binary_range_parser(exp.ILike), 572 TokenType.IN: lambda self, this: self._parse_in(this), 573 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 574 TokenType.IS: lambda self, this: self._parse_is(this), 575 TokenType.LIKE: binary_range_parser(exp.Like), 576 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 577 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 578 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 579 } 580 581 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 582 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 583 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 584 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 585 "CHARACTER SET": lambda self: self._parse_character_set(), 586 "CHECKSUM": lambda self: self._parse_checksum(), 587 "CLUSTER BY": lambda self: self._parse_cluster(), 588 "CLUSTERED": lambda self: self._parse_clustered_by(), 589 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 590 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 591 "COPY": lambda self: self._parse_copy_property(), 592 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 593 "DEFINER": lambda self: self._parse_definer(), 594 "DETERMINISTIC": lambda self: self.expression( 595 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 596 ), 597 "DISTKEY": lambda self: self._parse_distkey(), 598 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 599 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 600 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 601 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 602 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 603 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 604 "FREESPACE": lambda self: self._parse_freespace(), 605 "IMMUTABLE": lambda self: self.expression( 606 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 607 ), 608 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 609 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 610 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 611 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 612 "LIKE": lambda self: self._parse_create_like(), 613 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 614 "LOCK": lambda self: self._parse_locking(), 615 "LOCKING": lambda self: self._parse_locking(), 616 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 617 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 618 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 619 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 620 "NO": lambda self: self._parse_no_property(), 621 "ON": lambda self: self._parse_on_property(), 622 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 623 "PARTITION BY": lambda self: self._parse_partitioned_by(), 624 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 625 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 626 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 627 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 628 "RETURNS": lambda self: self._parse_returns(), 629 "ROW": lambda self: self._parse_row(), 630 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 631 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 632 "SETTINGS": lambda self: self.expression( 633 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 634 ), 635 "SORTKEY": lambda self: self._parse_sortkey(), 636 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 637 "STABLE": lambda self: self.expression( 638 exp.StabilityProperty, this=exp.Literal.string("STABLE") 639 ), 640 "STORED": lambda self: self._parse_stored(), 641 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 642 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 643 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 644 "TO": lambda self: self._parse_to_table(), 645 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 646 "TTL": lambda self: self._parse_ttl(), 647 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 648 "VOLATILE": lambda self: self._parse_volatile_property(), 649 "WITH": lambda self: self._parse_with_property(), 650 } 651 652 CONSTRAINT_PARSERS = { 653 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 654 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 655 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 656 "CHARACTER SET": lambda self: self.expression( 657 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 658 ), 659 "CHECK": lambda self: self.expression( 660 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 661 ), 662 "COLLATE": lambda self: self.expression( 663 exp.CollateColumnConstraint, this=self._parse_var() 664 ), 665 "COMMENT": lambda self: self.expression( 666 exp.CommentColumnConstraint, this=self._parse_string() 667 ), 668 "COMPRESS": lambda self: self._parse_compress(), 669 "DEFAULT": lambda self: self.expression( 670 exp.DefaultColumnConstraint, this=self._parse_bitwise() 671 ), 672 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 673 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 674 "FORMAT": lambda self: self.expression( 675 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 676 ), 677 "GENERATED": lambda self: self._parse_generated_as_identity(), 678 "IDENTITY": lambda self: self._parse_auto_increment(), 679 "INLINE": lambda self: self._parse_inline(), 680 "LIKE": lambda self: self._parse_create_like(), 681 "NOT": lambda self: self._parse_not_constraint(), 682 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 683 "ON": lambda self: self._match(TokenType.UPDATE) 684 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 685 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 686 "PRIMARY KEY": lambda self: self._parse_primary_key(), 687 "REFERENCES": lambda self: self._parse_references(match=False), 688 "TITLE": lambda self: self.expression( 689 exp.TitleColumnConstraint, this=self._parse_var_or_string() 690 ), 691 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 692 "UNIQUE": lambda self: self._parse_unique(), 693 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 694 } 695 696 ALTER_PARSERS = { 697 "ADD": lambda self: self._parse_alter_table_add(), 698 "ALTER": lambda self: self._parse_alter_table_alter(), 699 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 700 "DROP": lambda self: self._parse_alter_table_drop(), 701 "RENAME": lambda self: self._parse_alter_table_rename(), 702 } 703 704 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 705 706 NO_PAREN_FUNCTION_PARSERS = { 707 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 708 TokenType.CASE: lambda self: self._parse_case(), 709 TokenType.IF: lambda self: self._parse_if(), 710 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 711 exp.NextValueFor, 712 this=self._parse_column(), 713 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 714 ), 715 } 716 717 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 718 719 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 720 "ANY_VALUE": lambda self: self._parse_any_value(), 721 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 722 "CONCAT": lambda self: self._parse_concat(), 723 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 724 "DECODE": lambda self: self._parse_decode(), 725 "EXTRACT": lambda self: self._parse_extract(), 726 "JSON_OBJECT": lambda self: self._parse_json_object(), 727 "LOG": lambda self: self._parse_logarithm(), 728 "MATCH": lambda self: self._parse_match_against(), 729 "OPENJSON": lambda self: self._parse_open_json(), 730 "POSITION": lambda self: self._parse_position(), 731 "SAFE_CAST": lambda self: self._parse_cast(False), 732 "STRING_AGG": lambda self: self._parse_string_agg(), 733 "SUBSTRING": lambda self: self._parse_substring(), 734 "TRIM": lambda self: self._parse_trim(), 735 "TRY_CAST": lambda self: self._parse_cast(False), 736 "TRY_CONVERT": lambda self: self._parse_convert(False), 737 } 738 739 QUERY_MODIFIER_PARSERS = { 740 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 741 TokenType.WHERE: lambda self: ("where", self._parse_where()), 742 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 743 TokenType.HAVING: lambda self: ("having", self._parse_having()), 744 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 745 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 746 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 747 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 748 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 749 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 750 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 751 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 752 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 753 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 754 TokenType.CLUSTER_BY: lambda self: ( 755 "cluster", 756 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 757 ), 758 TokenType.DISTRIBUTE_BY: lambda self: ( 759 "distribute", 760 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 761 ), 762 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 763 } 764 765 SET_PARSERS = { 766 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 767 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 768 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 769 "TRANSACTION": lambda self: self._parse_set_transaction(), 770 } 771 772 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 773 774 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 775 776 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 777 778 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 779 780 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 781 782 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 783 TRANSACTION_CHARACTERISTICS = { 784 "ISOLATION LEVEL REPEATABLE READ", 785 "ISOLATION LEVEL READ COMMITTED", 786 "ISOLATION LEVEL READ UNCOMMITTED", 787 "ISOLATION LEVEL SERIALIZABLE", 788 "READ WRITE", 789 "READ ONLY", 790 } 791 792 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 793 794 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 795 796 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 797 798 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 799 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 800 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 801 802 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 803 804 STRICT_CAST = True 805 806 # A NULL arg in CONCAT yields NULL by default 807 CONCAT_NULL_OUTPUTS_STRING = False 808 809 PREFIXED_PIVOT_COLUMNS = False 810 IDENTIFY_PIVOT_STRINGS = False 811 812 LOG_BASE_FIRST = True 813 LOG_DEFAULTS_TO_LN = False 814 815 __slots__ = ( 816 "error_level", 817 "error_message_context", 818 "max_errors", 819 "sql", 820 "errors", 821 "_tokens", 822 "_index", 823 "_curr", 824 "_next", 825 "_prev", 826 "_prev_comments", 827 ) 828 829 # Autofilled 830 INDEX_OFFSET: int = 0 831 UNNEST_COLUMN_ONLY: bool = False 832 ALIAS_POST_TABLESAMPLE: bool = False 833 STRICT_STRING_CONCAT = False 834 NULL_ORDERING: str = "nulls_are_small" 835 SHOW_TRIE: t.Dict = {} 836 SET_TRIE: t.Dict = {} 837 FORMAT_MAPPING: t.Dict[str, str] = {} 838 FORMAT_TRIE: t.Dict = {} 839 TIME_MAPPING: t.Dict[str, str] = {} 840 TIME_TRIE: t.Dict = {} 841 842 def __init__( 843 self, 844 error_level: t.Optional[ErrorLevel] = None, 845 error_message_context: int = 100, 846 max_errors: int = 3, 847 ): 848 self.error_level = error_level or ErrorLevel.IMMEDIATE 849 self.error_message_context = error_message_context 850 self.max_errors = max_errors 851 self.reset() 852 853 def reset(self): 854 self.sql = "" 855 self.errors = [] 856 self._tokens = [] 857 self._index = 0 858 self._curr = None 859 self._next = None 860 self._prev = None 861 self._prev_comments = None 862 863 def parse( 864 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 865 ) -> t.List[t.Optional[exp.Expression]]: 866 """ 867 Parses a list of tokens and returns a list of syntax trees, one tree 868 per parsed SQL statement. 869 870 Args: 871 raw_tokens: The list of tokens. 872 sql: The original SQL string, used to produce helpful debug messages. 873 874 Returns: 875 The list of the produced syntax trees. 876 """ 877 return self._parse( 878 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 879 ) 880 881 def parse_into( 882 self, 883 expression_types: exp.IntoType, 884 raw_tokens: t.List[Token], 885 sql: t.Optional[str] = None, 886 ) -> t.List[t.Optional[exp.Expression]]: 887 """ 888 Parses a list of tokens into a given Expression type. If a collection of Expression 889 types is given instead, this method will try to parse the token list into each one 890 of them, stopping at the first for which the parsing succeeds. 891 892 Args: 893 expression_types: The expression type(s) to try and parse the token list into. 894 raw_tokens: The list of tokens. 895 sql: The original SQL string, used to produce helpful debug messages. 896 897 Returns: 898 The target Expression. 899 """ 900 errors = [] 901 for expression_type in ensure_list(expression_types): 902 parser = self.EXPRESSION_PARSERS.get(expression_type) 903 if not parser: 904 raise TypeError(f"No parser registered for {expression_type}") 905 906 try: 907 return self._parse(parser, raw_tokens, sql) 908 except ParseError as e: 909 e.errors[0]["into_expression"] = expression_type 910 errors.append(e) 911 912 raise ParseError( 913 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 914 errors=merge_errors(errors), 915 ) from errors[-1] 916 917 def _parse( 918 self, 919 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 920 raw_tokens: t.List[Token], 921 sql: t.Optional[str] = None, 922 ) -> t.List[t.Optional[exp.Expression]]: 923 self.reset() 924 self.sql = sql or "" 925 926 total = len(raw_tokens) 927 chunks: t.List[t.List[Token]] = [[]] 928 929 for i, token in enumerate(raw_tokens): 930 if token.token_type == TokenType.SEMICOLON: 931 if i < total - 1: 932 chunks.append([]) 933 else: 934 chunks[-1].append(token) 935 936 expressions = [] 937 938 for tokens in chunks: 939 self._index = -1 940 self._tokens = tokens 941 self._advance() 942 943 expressions.append(parse_method(self)) 944 945 if self._index < len(self._tokens): 946 self.raise_error("Invalid expression / Unexpected token") 947 948 self.check_errors() 949 950 return expressions 951 952 def check_errors(self) -> None: 953 """Logs or raises any found errors, depending on the chosen error level setting.""" 954 if self.error_level == ErrorLevel.WARN: 955 for error in self.errors: 956 logger.error(str(error)) 957 elif self.error_level == ErrorLevel.RAISE and self.errors: 958 raise ParseError( 959 concat_messages(self.errors, self.max_errors), 960 errors=merge_errors(self.errors), 961 ) 962 963 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 964 """ 965 Appends an error in the list of recorded errors or raises it, depending on the chosen 966 error level setting. 967 """ 968 token = token or self._curr or self._prev or Token.string("") 969 start = token.start 970 end = token.end + 1 971 start_context = self.sql[max(start - self.error_message_context, 0) : start] 972 highlight = self.sql[start:end] 973 end_context = self.sql[end : end + self.error_message_context] 974 975 error = ParseError.new( 976 f"{message}. Line {token.line}, Col: {token.col}.\n" 977 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 978 description=message, 979 line=token.line, 980 col=token.col, 981 start_context=start_context, 982 highlight=highlight, 983 end_context=end_context, 984 ) 985 986 if self.error_level == ErrorLevel.IMMEDIATE: 987 raise error 988 989 self.errors.append(error) 990 991 def expression( 992 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 993 ) -> E: 994 """ 995 Creates a new, validated Expression. 996 997 Args: 998 exp_class: The expression class to instantiate. 999 comments: An optional list of comments to attach to the expression. 1000 kwargs: The arguments to set for the expression along with their respective values. 1001 1002 Returns: 1003 The target expression. 1004 """ 1005 instance = exp_class(**kwargs) 1006 instance.add_comments(comments) if comments else self._add_comments(instance) 1007 return self.validate_expression(instance) 1008 1009 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1010 if expression and self._prev_comments: 1011 expression.add_comments(self._prev_comments) 1012 self._prev_comments = None 1013 1014 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1015 """ 1016 Validates an Expression, making sure that all its mandatory arguments are set. 1017 1018 Args: 1019 expression: The expression to validate. 1020 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1021 1022 Returns: 1023 The validated expression. 1024 """ 1025 if self.error_level != ErrorLevel.IGNORE: 1026 for error_message in expression.error_messages(args): 1027 self.raise_error(error_message) 1028 1029 return expression 1030 1031 def _find_sql(self, start: Token, end: Token) -> str: 1032 return self.sql[start.start : end.end + 1] 1033 1034 def _advance(self, times: int = 1) -> None: 1035 self._index += times 1036 self._curr = seq_get(self._tokens, self._index) 1037 self._next = seq_get(self._tokens, self._index + 1) 1038 1039 if self._index > 0: 1040 self._prev = self._tokens[self._index - 1] 1041 self._prev_comments = self._prev.comments 1042 else: 1043 self._prev = None 1044 self._prev_comments = None 1045 1046 def _retreat(self, index: int) -> None: 1047 if index != self._index: 1048 self._advance(index - self._index) 1049 1050 def _parse_command(self) -> exp.Command: 1051 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1052 1053 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1054 start = self._prev 1055 exists = self._parse_exists() if allow_exists else None 1056 1057 self._match(TokenType.ON) 1058 1059 kind = self._match_set(self.CREATABLES) and self._prev 1060 if not kind: 1061 return self._parse_as_command(start) 1062 1063 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1064 this = self._parse_user_defined_function(kind=kind.token_type) 1065 elif kind.token_type == TokenType.TABLE: 1066 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1067 elif kind.token_type == TokenType.COLUMN: 1068 this = self._parse_column() 1069 else: 1070 this = self._parse_id_var() 1071 1072 self._match(TokenType.IS) 1073 1074 return self.expression( 1075 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1076 ) 1077 1078 def _parse_to_table( 1079 self, 1080 ) -> exp.ToTableProperty: 1081 table = self._parse_table_parts(schema=True) 1082 return self.expression(exp.ToTableProperty, this=table) 1083 1084 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1085 def _parse_ttl(self) -> exp.Expression: 1086 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1087 this = self._parse_bitwise() 1088 1089 if self._match_text_seq("DELETE"): 1090 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1091 if self._match_text_seq("RECOMPRESS"): 1092 return self.expression( 1093 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1094 ) 1095 if self._match_text_seq("TO", "DISK"): 1096 return self.expression( 1097 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1098 ) 1099 if self._match_text_seq("TO", "VOLUME"): 1100 return self.expression( 1101 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1102 ) 1103 1104 return this 1105 1106 expressions = self._parse_csv(_parse_ttl_action) 1107 where = self._parse_where() 1108 group = self._parse_group() 1109 1110 aggregates = None 1111 if group and self._match(TokenType.SET): 1112 aggregates = self._parse_csv(self._parse_set_item) 1113 1114 return self.expression( 1115 exp.MergeTreeTTL, 1116 expressions=expressions, 1117 where=where, 1118 group=group, 1119 aggregates=aggregates, 1120 ) 1121 1122 def _parse_statement(self) -> t.Optional[exp.Expression]: 1123 if self._curr is None: 1124 return None 1125 1126 if self._match_set(self.STATEMENT_PARSERS): 1127 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1128 1129 if self._match_set(Tokenizer.COMMANDS): 1130 return self._parse_command() 1131 1132 expression = self._parse_expression() 1133 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1134 return self._parse_query_modifiers(expression) 1135 1136 def _parse_drop(self) -> exp.Drop | exp.Command: 1137 start = self._prev 1138 temporary = self._match(TokenType.TEMPORARY) 1139 materialized = self._match_text_seq("MATERIALIZED") 1140 1141 kind = self._match_set(self.CREATABLES) and self._prev.text 1142 if not kind: 1143 return self._parse_as_command(start) 1144 1145 return self.expression( 1146 exp.Drop, 1147 exists=self._parse_exists(), 1148 this=self._parse_table(schema=True), 1149 kind=kind, 1150 temporary=temporary, 1151 materialized=materialized, 1152 cascade=self._match_text_seq("CASCADE"), 1153 constraints=self._match_text_seq("CONSTRAINTS"), 1154 purge=self._match_text_seq("PURGE"), 1155 ) 1156 1157 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1158 return ( 1159 self._match(TokenType.IF) 1160 and (not not_ or self._match(TokenType.NOT)) 1161 and self._match(TokenType.EXISTS) 1162 ) 1163 1164 def _parse_create(self) -> exp.Create | exp.Command: 1165 # Note: this can't be None because we've matched a statement parser 1166 start = self._prev 1167 replace = start.text.upper() == "REPLACE" or self._match_pair( 1168 TokenType.OR, TokenType.REPLACE 1169 ) 1170 unique = self._match(TokenType.UNIQUE) 1171 1172 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1173 self._advance() 1174 1175 properties = None 1176 create_token = self._match_set(self.CREATABLES) and self._prev 1177 1178 if not create_token: 1179 # exp.Properties.Location.POST_CREATE 1180 properties = self._parse_properties() 1181 create_token = self._match_set(self.CREATABLES) and self._prev 1182 1183 if not properties or not create_token: 1184 return self._parse_as_command(start) 1185 1186 exists = self._parse_exists(not_=True) 1187 this = None 1188 expression = None 1189 indexes = None 1190 no_schema_binding = None 1191 begin = None 1192 clone = None 1193 1194 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1195 nonlocal properties 1196 if properties and temp_props: 1197 properties.expressions.extend(temp_props.expressions) 1198 elif temp_props: 1199 properties = temp_props 1200 1201 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1202 this = self._parse_user_defined_function(kind=create_token.token_type) 1203 1204 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1205 extend_props(self._parse_properties()) 1206 1207 self._match(TokenType.ALIAS) 1208 begin = self._match(TokenType.BEGIN) 1209 return_ = self._match_text_seq("RETURN") 1210 expression = self._parse_statement() 1211 1212 if return_: 1213 expression = self.expression(exp.Return, this=expression) 1214 elif create_token.token_type == TokenType.INDEX: 1215 this = self._parse_index(index=self._parse_id_var()) 1216 elif create_token.token_type in self.DB_CREATABLES: 1217 table_parts = self._parse_table_parts(schema=True) 1218 1219 # exp.Properties.Location.POST_NAME 1220 self._match(TokenType.COMMA) 1221 extend_props(self._parse_properties(before=True)) 1222 1223 this = self._parse_schema(this=table_parts) 1224 1225 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1226 extend_props(self._parse_properties()) 1227 1228 self._match(TokenType.ALIAS) 1229 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1230 # exp.Properties.Location.POST_ALIAS 1231 extend_props(self._parse_properties()) 1232 1233 expression = self._parse_ddl_select() 1234 1235 if create_token.token_type == TokenType.TABLE: 1236 indexes = [] 1237 while True: 1238 index = self._parse_index() 1239 1240 # exp.Properties.Location.POST_EXPRESSION and POST_INDEX 1241 extend_props(self._parse_properties()) 1242 1243 if not index: 1244 break 1245 else: 1246 self._match(TokenType.COMMA) 1247 indexes.append(index) 1248 elif create_token.token_type == TokenType.VIEW: 1249 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1250 no_schema_binding = True 1251 1252 if self._match_text_seq("CLONE"): 1253 clone = self._parse_table(schema=True) 1254 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1255 clone_kind = ( 1256 self._match(TokenType.L_PAREN) 1257 and self._match_texts(self.CLONE_KINDS) 1258 and self._prev.text.upper() 1259 ) 1260 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1261 self._match(TokenType.R_PAREN) 1262 clone = self.expression( 1263 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1264 ) 1265 1266 return self.expression( 1267 exp.Create, 1268 this=this, 1269 kind=create_token.text, 1270 replace=replace, 1271 unique=unique, 1272 expression=expression, 1273 exists=exists, 1274 properties=properties, 1275 indexes=indexes, 1276 no_schema_binding=no_schema_binding, 1277 begin=begin, 1278 clone=clone, 1279 ) 1280 1281 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1282 # only used for teradata currently 1283 self._match(TokenType.COMMA) 1284 1285 kwargs = { 1286 "no": self._match_text_seq("NO"), 1287 "dual": self._match_text_seq("DUAL"), 1288 "before": self._match_text_seq("BEFORE"), 1289 "default": self._match_text_seq("DEFAULT"), 1290 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1291 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1292 "after": self._match_text_seq("AFTER"), 1293 "minimum": self._match_texts(("MIN", "MINIMUM")), 1294 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1295 } 1296 1297 if self._match_texts(self.PROPERTY_PARSERS): 1298 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1299 try: 1300 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1301 except TypeError: 1302 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1303 1304 return None 1305 1306 def _parse_property(self) -> t.Optional[exp.Expression]: 1307 if self._match_texts(self.PROPERTY_PARSERS): 1308 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1309 1310 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1311 return self._parse_character_set(default=True) 1312 1313 if self._match_text_seq("COMPOUND", "SORTKEY"): 1314 return self._parse_sortkey(compound=True) 1315 1316 if self._match_text_seq("SQL", "SECURITY"): 1317 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1318 1319 assignment = self._match_pair( 1320 TokenType.VAR, TokenType.EQ, advance=False 1321 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1322 1323 if assignment: 1324 key = self._parse_var_or_string() 1325 self._match(TokenType.EQ) 1326 return self.expression(exp.Property, this=key, value=self._parse_column()) 1327 1328 return None 1329 1330 def _parse_stored(self) -> exp.FileFormatProperty: 1331 self._match(TokenType.ALIAS) 1332 1333 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1334 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1335 1336 return self.expression( 1337 exp.FileFormatProperty, 1338 this=self.expression( 1339 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1340 ) 1341 if input_format or output_format 1342 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1343 ) 1344 1345 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1346 self._match(TokenType.EQ) 1347 self._match(TokenType.ALIAS) 1348 return self.expression(exp_class, this=self._parse_field()) 1349 1350 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1351 properties = [] 1352 while True: 1353 if before: 1354 prop = self._parse_property_before() 1355 else: 1356 prop = self._parse_property() 1357 1358 if not prop: 1359 break 1360 for p in ensure_list(prop): 1361 properties.append(p) 1362 1363 if properties: 1364 return self.expression(exp.Properties, expressions=properties) 1365 1366 return None 1367 1368 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1369 return self.expression( 1370 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1371 ) 1372 1373 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1374 if self._index >= 2: 1375 pre_volatile_token = self._tokens[self._index - 2] 1376 else: 1377 pre_volatile_token = None 1378 1379 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1380 return exp.VolatileProperty() 1381 1382 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1383 1384 def _parse_with_property( 1385 self, 1386 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1387 self._match(TokenType.WITH) 1388 if self._match(TokenType.L_PAREN, advance=False): 1389 return self._parse_wrapped_csv(self._parse_property) 1390 1391 if self._match_text_seq("JOURNAL"): 1392 return self._parse_withjournaltable() 1393 1394 if self._match_text_seq("DATA"): 1395 return self._parse_withdata(no=False) 1396 elif self._match_text_seq("NO", "DATA"): 1397 return self._parse_withdata(no=True) 1398 1399 if not self._next: 1400 return None 1401 1402 return self._parse_withisolatedloading() 1403 1404 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1405 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1406 self._match(TokenType.EQ) 1407 1408 user = self._parse_id_var() 1409 self._match(TokenType.PARAMETER) 1410 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1411 1412 if not user or not host: 1413 return None 1414 1415 return exp.DefinerProperty(this=f"{user}@{host}") 1416 1417 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1418 self._match(TokenType.TABLE) 1419 self._match(TokenType.EQ) 1420 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1421 1422 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1423 return self.expression(exp.LogProperty, no=no) 1424 1425 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1426 return self.expression(exp.JournalProperty, **kwargs) 1427 1428 def _parse_checksum(self) -> exp.ChecksumProperty: 1429 self._match(TokenType.EQ) 1430 1431 on = None 1432 if self._match(TokenType.ON): 1433 on = True 1434 elif self._match_text_seq("OFF"): 1435 on = False 1436 1437 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1438 1439 def _parse_cluster(self) -> exp.Cluster: 1440 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1441 1442 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1443 self._match_text_seq("BY") 1444 1445 self._match_l_paren() 1446 expressions = self._parse_csv(self._parse_column) 1447 self._match_r_paren() 1448 1449 if self._match_text_seq("SORTED", "BY"): 1450 self._match_l_paren() 1451 sorted_by = self._parse_csv(self._parse_ordered) 1452 self._match_r_paren() 1453 else: 1454 sorted_by = None 1455 1456 self._match(TokenType.INTO) 1457 buckets = self._parse_number() 1458 self._match_text_seq("BUCKETS") 1459 1460 return self.expression( 1461 exp.ClusteredByProperty, 1462 expressions=expressions, 1463 sorted_by=sorted_by, 1464 buckets=buckets, 1465 ) 1466 1467 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1468 if not self._match_text_seq("GRANTS"): 1469 self._retreat(self._index - 1) 1470 return None 1471 1472 return self.expression(exp.CopyGrantsProperty) 1473 1474 def _parse_freespace(self) -> exp.FreespaceProperty: 1475 self._match(TokenType.EQ) 1476 return self.expression( 1477 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1478 ) 1479 1480 def _parse_mergeblockratio( 1481 self, no: bool = False, default: bool = False 1482 ) -> exp.MergeBlockRatioProperty: 1483 if self._match(TokenType.EQ): 1484 return self.expression( 1485 exp.MergeBlockRatioProperty, 1486 this=self._parse_number(), 1487 percent=self._match(TokenType.PERCENT), 1488 ) 1489 1490 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1491 1492 def _parse_datablocksize( 1493 self, 1494 default: t.Optional[bool] = None, 1495 minimum: t.Optional[bool] = None, 1496 maximum: t.Optional[bool] = None, 1497 ) -> exp.DataBlocksizeProperty: 1498 self._match(TokenType.EQ) 1499 size = self._parse_number() 1500 1501 units = None 1502 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1503 units = self._prev.text 1504 1505 return self.expression( 1506 exp.DataBlocksizeProperty, 1507 size=size, 1508 units=units, 1509 default=default, 1510 minimum=minimum, 1511 maximum=maximum, 1512 ) 1513 1514 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1515 self._match(TokenType.EQ) 1516 always = self._match_text_seq("ALWAYS") 1517 manual = self._match_text_seq("MANUAL") 1518 never = self._match_text_seq("NEVER") 1519 default = self._match_text_seq("DEFAULT") 1520 1521 autotemp = None 1522 if self._match_text_seq("AUTOTEMP"): 1523 autotemp = self._parse_schema() 1524 1525 return self.expression( 1526 exp.BlockCompressionProperty, 1527 always=always, 1528 manual=manual, 1529 never=never, 1530 default=default, 1531 autotemp=autotemp, 1532 ) 1533 1534 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1535 no = self._match_text_seq("NO") 1536 concurrent = self._match_text_seq("CONCURRENT") 1537 self._match_text_seq("ISOLATED", "LOADING") 1538 for_all = self._match_text_seq("FOR", "ALL") 1539 for_insert = self._match_text_seq("FOR", "INSERT") 1540 for_none = self._match_text_seq("FOR", "NONE") 1541 return self.expression( 1542 exp.IsolatedLoadingProperty, 1543 no=no, 1544 concurrent=concurrent, 1545 for_all=for_all, 1546 for_insert=for_insert, 1547 for_none=for_none, 1548 ) 1549 1550 def _parse_locking(self) -> exp.LockingProperty: 1551 if self._match(TokenType.TABLE): 1552 kind = "TABLE" 1553 elif self._match(TokenType.VIEW): 1554 kind = "VIEW" 1555 elif self._match(TokenType.ROW): 1556 kind = "ROW" 1557 elif self._match_text_seq("DATABASE"): 1558 kind = "DATABASE" 1559 else: 1560 kind = None 1561 1562 if kind in ("DATABASE", "TABLE", "VIEW"): 1563 this = self._parse_table_parts() 1564 else: 1565 this = None 1566 1567 if self._match(TokenType.FOR): 1568 for_or_in = "FOR" 1569 elif self._match(TokenType.IN): 1570 for_or_in = "IN" 1571 else: 1572 for_or_in = None 1573 1574 if self._match_text_seq("ACCESS"): 1575 lock_type = "ACCESS" 1576 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1577 lock_type = "EXCLUSIVE" 1578 elif self._match_text_seq("SHARE"): 1579 lock_type = "SHARE" 1580 elif self._match_text_seq("READ"): 1581 lock_type = "READ" 1582 elif self._match_text_seq("WRITE"): 1583 lock_type = "WRITE" 1584 elif self._match_text_seq("CHECKSUM"): 1585 lock_type = "CHECKSUM" 1586 else: 1587 lock_type = None 1588 1589 override = self._match_text_seq("OVERRIDE") 1590 1591 return self.expression( 1592 exp.LockingProperty, 1593 this=this, 1594 kind=kind, 1595 for_or_in=for_or_in, 1596 lock_type=lock_type, 1597 override=override, 1598 ) 1599 1600 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1601 if self._match(TokenType.PARTITION_BY): 1602 return self._parse_csv(self._parse_conjunction) 1603 return [] 1604 1605 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1606 self._match(TokenType.EQ) 1607 return self.expression( 1608 exp.PartitionedByProperty, 1609 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1610 ) 1611 1612 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1613 if self._match_text_seq("AND", "STATISTICS"): 1614 statistics = True 1615 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1616 statistics = False 1617 else: 1618 statistics = None 1619 1620 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1621 1622 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1623 if self._match_text_seq("PRIMARY", "INDEX"): 1624 return exp.NoPrimaryIndexProperty() 1625 return None 1626 1627 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1628 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1629 return exp.OnCommitProperty() 1630 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1631 return exp.OnCommitProperty(delete=True) 1632 return None 1633 1634 def _parse_distkey(self) -> exp.DistKeyProperty: 1635 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1636 1637 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1638 table = self._parse_table(schema=True) 1639 1640 options = [] 1641 while self._match_texts(("INCLUDING", "EXCLUDING")): 1642 this = self._prev.text.upper() 1643 1644 id_var = self._parse_id_var() 1645 if not id_var: 1646 return None 1647 1648 options.append( 1649 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1650 ) 1651 1652 return self.expression(exp.LikeProperty, this=table, expressions=options) 1653 1654 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1655 return self.expression( 1656 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1657 ) 1658 1659 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1660 self._match(TokenType.EQ) 1661 return self.expression( 1662 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1663 ) 1664 1665 def _parse_returns(self) -> exp.ReturnsProperty: 1666 value: t.Optional[exp.Expression] 1667 is_table = self._match(TokenType.TABLE) 1668 1669 if is_table: 1670 if self._match(TokenType.LT): 1671 value = self.expression( 1672 exp.Schema, 1673 this="TABLE", 1674 expressions=self._parse_csv(self._parse_struct_types), 1675 ) 1676 if not self._match(TokenType.GT): 1677 self.raise_error("Expecting >") 1678 else: 1679 value = self._parse_schema(exp.var("TABLE")) 1680 else: 1681 value = self._parse_types() 1682 1683 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1684 1685 def _parse_describe(self) -> exp.Describe: 1686 kind = self._match_set(self.CREATABLES) and self._prev.text 1687 this = self._parse_table() 1688 return self.expression(exp.Describe, this=this, kind=kind) 1689 1690 def _parse_insert(self) -> exp.Insert: 1691 overwrite = self._match(TokenType.OVERWRITE) 1692 ignore = self._match(TokenType.IGNORE) 1693 local = self._match_text_seq("LOCAL") 1694 alternative = None 1695 1696 if self._match_text_seq("DIRECTORY"): 1697 this: t.Optional[exp.Expression] = self.expression( 1698 exp.Directory, 1699 this=self._parse_var_or_string(), 1700 local=local, 1701 row_format=self._parse_row_format(match_row=True), 1702 ) 1703 else: 1704 if self._match(TokenType.OR): 1705 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1706 1707 self._match(TokenType.INTO) 1708 self._match(TokenType.TABLE) 1709 this = self._parse_table(schema=True) 1710 1711 return self.expression( 1712 exp.Insert, 1713 this=this, 1714 exists=self._parse_exists(), 1715 partition=self._parse_partition(), 1716 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1717 and self._parse_conjunction(), 1718 expression=self._parse_ddl_select(), 1719 conflict=self._parse_on_conflict(), 1720 returning=self._parse_returning(), 1721 overwrite=overwrite, 1722 alternative=alternative, 1723 ignore=ignore, 1724 ) 1725 1726 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1727 conflict = self._match_text_seq("ON", "CONFLICT") 1728 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1729 1730 if not conflict and not duplicate: 1731 return None 1732 1733 nothing = None 1734 expressions = None 1735 key = None 1736 constraint = None 1737 1738 if conflict: 1739 if self._match_text_seq("ON", "CONSTRAINT"): 1740 constraint = self._parse_id_var() 1741 else: 1742 key = self._parse_csv(self._parse_value) 1743 1744 self._match_text_seq("DO") 1745 if self._match_text_seq("NOTHING"): 1746 nothing = True 1747 else: 1748 self._match(TokenType.UPDATE) 1749 self._match(TokenType.SET) 1750 expressions = self._parse_csv(self._parse_equality) 1751 1752 return self.expression( 1753 exp.OnConflict, 1754 duplicate=duplicate, 1755 expressions=expressions, 1756 nothing=nothing, 1757 key=key, 1758 constraint=constraint, 1759 ) 1760 1761 def _parse_returning(self) -> t.Optional[exp.Returning]: 1762 if not self._match(TokenType.RETURNING): 1763 return None 1764 1765 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1766 1767 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1768 if not self._match(TokenType.FORMAT): 1769 return None 1770 return self._parse_row_format() 1771 1772 def _parse_row_format( 1773 self, match_row: bool = False 1774 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1775 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1776 return None 1777 1778 if self._match_text_seq("SERDE"): 1779 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1780 1781 self._match_text_seq("DELIMITED") 1782 1783 kwargs = {} 1784 1785 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1786 kwargs["fields"] = self._parse_string() 1787 if self._match_text_seq("ESCAPED", "BY"): 1788 kwargs["escaped"] = self._parse_string() 1789 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1790 kwargs["collection_items"] = self._parse_string() 1791 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1792 kwargs["map_keys"] = self._parse_string() 1793 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1794 kwargs["lines"] = self._parse_string() 1795 if self._match_text_seq("NULL", "DEFINED", "AS"): 1796 kwargs["null"] = self._parse_string() 1797 1798 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1799 1800 def _parse_load(self) -> exp.LoadData | exp.Command: 1801 if self._match_text_seq("DATA"): 1802 local = self._match_text_seq("LOCAL") 1803 self._match_text_seq("INPATH") 1804 inpath = self._parse_string() 1805 overwrite = self._match(TokenType.OVERWRITE) 1806 self._match_pair(TokenType.INTO, TokenType.TABLE) 1807 1808 return self.expression( 1809 exp.LoadData, 1810 this=self._parse_table(schema=True), 1811 local=local, 1812 overwrite=overwrite, 1813 inpath=inpath, 1814 partition=self._parse_partition(), 1815 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1816 serde=self._match_text_seq("SERDE") and self._parse_string(), 1817 ) 1818 return self._parse_as_command(self._prev) 1819 1820 def _parse_delete(self) -> exp.Delete: 1821 # This handles MySQL's "Multiple-Table Syntax" 1822 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1823 tables = None 1824 if not self._match(TokenType.FROM, advance=False): 1825 tables = self._parse_csv(self._parse_table) or None 1826 1827 return self.expression( 1828 exp.Delete, 1829 tables=tables, 1830 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1831 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1832 where=self._parse_where(), 1833 returning=self._parse_returning(), 1834 limit=self._parse_limit(), 1835 ) 1836 1837 def _parse_update(self) -> exp.Update: 1838 return self.expression( 1839 exp.Update, 1840 **{ # type: ignore 1841 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1842 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1843 "from": self._parse_from(joins=True), 1844 "where": self._parse_where(), 1845 "returning": self._parse_returning(), 1846 "limit": self._parse_limit(), 1847 }, 1848 ) 1849 1850 def _parse_uncache(self) -> exp.Uncache: 1851 if not self._match(TokenType.TABLE): 1852 self.raise_error("Expecting TABLE after UNCACHE") 1853 1854 return self.expression( 1855 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1856 ) 1857 1858 def _parse_cache(self) -> exp.Cache: 1859 lazy = self._match_text_seq("LAZY") 1860 self._match(TokenType.TABLE) 1861 table = self._parse_table(schema=True) 1862 1863 options = [] 1864 if self._match_text_seq("OPTIONS"): 1865 self._match_l_paren() 1866 k = self._parse_string() 1867 self._match(TokenType.EQ) 1868 v = self._parse_string() 1869 options = [k, v] 1870 self._match_r_paren() 1871 1872 self._match(TokenType.ALIAS) 1873 return self.expression( 1874 exp.Cache, 1875 this=table, 1876 lazy=lazy, 1877 options=options, 1878 expression=self._parse_select(nested=True), 1879 ) 1880 1881 def _parse_partition(self) -> t.Optional[exp.Partition]: 1882 if not self._match(TokenType.PARTITION): 1883 return None 1884 1885 return self.expression( 1886 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1887 ) 1888 1889 def _parse_value(self) -> exp.Tuple: 1890 if self._match(TokenType.L_PAREN): 1891 expressions = self._parse_csv(self._parse_conjunction) 1892 self._match_r_paren() 1893 return self.expression(exp.Tuple, expressions=expressions) 1894 1895 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1896 # https://prestodb.io/docs/current/sql/values.html 1897 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1898 1899 def _parse_select( 1900 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1901 ) -> t.Optional[exp.Expression]: 1902 cte = self._parse_with() 1903 if cte: 1904 this = self._parse_statement() 1905 1906 if not this: 1907 self.raise_error("Failed to parse any statement following CTE") 1908 return cte 1909 1910 if "with" in this.arg_types: 1911 this.set("with", cte) 1912 else: 1913 self.raise_error(f"{this.key} does not support CTE") 1914 this = cte 1915 elif self._match(TokenType.SELECT): 1916 comments = self._prev_comments 1917 1918 hint = self._parse_hint() 1919 all_ = self._match(TokenType.ALL) 1920 distinct = self._match(TokenType.DISTINCT) 1921 1922 kind = ( 1923 self._match(TokenType.ALIAS) 1924 and self._match_texts(("STRUCT", "VALUE")) 1925 and self._prev.text 1926 ) 1927 1928 if distinct: 1929 distinct = self.expression( 1930 exp.Distinct, 1931 on=self._parse_value() if self._match(TokenType.ON) else None, 1932 ) 1933 1934 if all_ and distinct: 1935 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1936 1937 limit = self._parse_limit(top=True) 1938 expressions = self._parse_expressions() 1939 1940 this = self.expression( 1941 exp.Select, 1942 kind=kind, 1943 hint=hint, 1944 distinct=distinct, 1945 expressions=expressions, 1946 limit=limit, 1947 ) 1948 this.comments = comments 1949 1950 into = self._parse_into() 1951 if into: 1952 this.set("into", into) 1953 1954 from_ = self._parse_from() 1955 if from_: 1956 this.set("from", from_) 1957 1958 this = self._parse_query_modifiers(this) 1959 elif (table or nested) and self._match(TokenType.L_PAREN): 1960 if self._match(TokenType.PIVOT): 1961 this = self._parse_simplified_pivot() 1962 elif self._match(TokenType.FROM): 1963 this = exp.select("*").from_( 1964 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1965 ) 1966 else: 1967 this = self._parse_table() if table else self._parse_select(nested=True) 1968 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1969 1970 self._match_r_paren() 1971 1972 alias = None 1973 1974 # Ensure "wrapped" tables are not parsed as Subqueries. The exception to this is when there's 1975 # an alias that can be applied to the parentheses, because that would shadow all wrapped table 1976 # names, and so we want to parse it as a Subquery to represent the inner scope appropriately. 1977 # Additionally, we want the node under the Subquery to be an actual query, so we will replace 1978 # the table reference with a star query that selects from it. 1979 if isinstance(this, exp.Table): 1980 alias = self._parse_table_alias() 1981 if not alias: 1982 this.set("wrapped", True) 1983 return this 1984 1985 this.set("wrapped", None) 1986 joins = this.args.pop("joins", None) 1987 this = this.replace(exp.select("*").from_(this.copy(), copy=False)) 1988 this.set("joins", joins) 1989 1990 subquery = self._parse_subquery(this, parse_alias=parse_subquery_alias and not alias) 1991 if subquery and alias: 1992 subquery.set("alias", alias) 1993 1994 # We return early here so that the UNION isn't attached to the subquery by the 1995 # following call to _parse_set_operations, but instead becomes the parent node 1996 return subquery 1997 elif self._match(TokenType.VALUES): 1998 this = self.expression( 1999 exp.Values, 2000 expressions=self._parse_csv(self._parse_value), 2001 alias=self._parse_table_alias(), 2002 ) 2003 else: 2004 this = None 2005 2006 return self._parse_set_operations(this) 2007 2008 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2009 if not skip_with_token and not self._match(TokenType.WITH): 2010 return None 2011 2012 comments = self._prev_comments 2013 recursive = self._match(TokenType.RECURSIVE) 2014 2015 expressions = [] 2016 while True: 2017 expressions.append(self._parse_cte()) 2018 2019 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2020 break 2021 else: 2022 self._match(TokenType.WITH) 2023 2024 return self.expression( 2025 exp.With, comments=comments, expressions=expressions, recursive=recursive 2026 ) 2027 2028 def _parse_cte(self) -> exp.CTE: 2029 alias = self._parse_table_alias() 2030 if not alias or not alias.this: 2031 self.raise_error("Expected CTE to have alias") 2032 2033 self._match(TokenType.ALIAS) 2034 return self.expression( 2035 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2036 ) 2037 2038 def _parse_table_alias( 2039 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2040 ) -> t.Optional[exp.TableAlias]: 2041 any_token = self._match(TokenType.ALIAS) 2042 alias = ( 2043 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2044 or self._parse_string_as_identifier() 2045 ) 2046 2047 index = self._index 2048 if self._match(TokenType.L_PAREN): 2049 columns = self._parse_csv(self._parse_function_parameter) 2050 self._match_r_paren() if columns else self._retreat(index) 2051 else: 2052 columns = None 2053 2054 if not alias and not columns: 2055 return None 2056 2057 return self.expression(exp.TableAlias, this=alias, columns=columns) 2058 2059 def _parse_subquery( 2060 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2061 ) -> t.Optional[exp.Subquery]: 2062 if not this: 2063 return None 2064 2065 return self.expression( 2066 exp.Subquery, 2067 this=this, 2068 pivots=self._parse_pivots(), 2069 alias=self._parse_table_alias() if parse_alias else None, 2070 ) 2071 2072 def _parse_query_modifiers( 2073 self, this: t.Optional[exp.Expression] 2074 ) -> t.Optional[exp.Expression]: 2075 if isinstance(this, self.MODIFIABLES): 2076 for join in iter(self._parse_join, None): 2077 this.append("joins", join) 2078 for lateral in iter(self._parse_lateral, None): 2079 this.append("laterals", lateral) 2080 2081 while True: 2082 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2083 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2084 key, expression = parser(self) 2085 2086 if expression: 2087 this.set(key, expression) 2088 if key == "limit": 2089 offset = expression.args.pop("offset", None) 2090 if offset: 2091 this.set("offset", exp.Offset(expression=offset)) 2092 continue 2093 break 2094 return this 2095 2096 def _parse_hint(self) -> t.Optional[exp.Hint]: 2097 if self._match(TokenType.HINT): 2098 hints = [] 2099 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2100 hints.extend(hint) 2101 2102 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2103 self.raise_error("Expected */ after HINT") 2104 2105 return self.expression(exp.Hint, expressions=hints) 2106 2107 return None 2108 2109 def _parse_into(self) -> t.Optional[exp.Into]: 2110 if not self._match(TokenType.INTO): 2111 return None 2112 2113 temp = self._match(TokenType.TEMPORARY) 2114 unlogged = self._match_text_seq("UNLOGGED") 2115 self._match(TokenType.TABLE) 2116 2117 return self.expression( 2118 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2119 ) 2120 2121 def _parse_from( 2122 self, joins: bool = False, skip_from_token: bool = False 2123 ) -> t.Optional[exp.From]: 2124 if not skip_from_token and not self._match(TokenType.FROM): 2125 return None 2126 2127 return self.expression( 2128 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2129 ) 2130 2131 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2132 if not self._match(TokenType.MATCH_RECOGNIZE): 2133 return None 2134 2135 self._match_l_paren() 2136 2137 partition = self._parse_partition_by() 2138 order = self._parse_order() 2139 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2140 2141 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2142 rows = exp.var("ONE ROW PER MATCH") 2143 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2144 text = "ALL ROWS PER MATCH" 2145 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2146 text += f" SHOW EMPTY MATCHES" 2147 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2148 text += f" OMIT EMPTY MATCHES" 2149 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2150 text += f" WITH UNMATCHED ROWS" 2151 rows = exp.var(text) 2152 else: 2153 rows = None 2154 2155 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2156 text = "AFTER MATCH SKIP" 2157 if self._match_text_seq("PAST", "LAST", "ROW"): 2158 text += f" PAST LAST ROW" 2159 elif self._match_text_seq("TO", "NEXT", "ROW"): 2160 text += f" TO NEXT ROW" 2161 elif self._match_text_seq("TO", "FIRST"): 2162 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2163 elif self._match_text_seq("TO", "LAST"): 2164 text += f" TO LAST {self._advance_any().text}" # type: ignore 2165 after = exp.var(text) 2166 else: 2167 after = None 2168 2169 if self._match_text_seq("PATTERN"): 2170 self._match_l_paren() 2171 2172 if not self._curr: 2173 self.raise_error("Expecting )", self._curr) 2174 2175 paren = 1 2176 start = self._curr 2177 2178 while self._curr and paren > 0: 2179 if self._curr.token_type == TokenType.L_PAREN: 2180 paren += 1 2181 if self._curr.token_type == TokenType.R_PAREN: 2182 paren -= 1 2183 2184 end = self._prev 2185 self._advance() 2186 2187 if paren > 0: 2188 self.raise_error("Expecting )", self._curr) 2189 2190 pattern = exp.var(self._find_sql(start, end)) 2191 else: 2192 pattern = None 2193 2194 define = ( 2195 self._parse_csv( 2196 lambda: self.expression( 2197 exp.Alias, 2198 alias=self._parse_id_var(any_token=True), 2199 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2200 ) 2201 ) 2202 if self._match_text_seq("DEFINE") 2203 else None 2204 ) 2205 2206 self._match_r_paren() 2207 2208 return self.expression( 2209 exp.MatchRecognize, 2210 partition_by=partition, 2211 order=order, 2212 measures=measures, 2213 rows=rows, 2214 after=after, 2215 pattern=pattern, 2216 define=define, 2217 alias=self._parse_table_alias(), 2218 ) 2219 2220 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2221 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2222 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2223 2224 if outer_apply or cross_apply: 2225 this = self._parse_select(table=True) 2226 view = None 2227 outer = not cross_apply 2228 elif self._match(TokenType.LATERAL): 2229 this = self._parse_select(table=True) 2230 view = self._match(TokenType.VIEW) 2231 outer = self._match(TokenType.OUTER) 2232 else: 2233 return None 2234 2235 if not this: 2236 this = self._parse_function() or self._parse_id_var(any_token=False) 2237 while self._match(TokenType.DOT): 2238 this = exp.Dot( 2239 this=this, 2240 expression=self._parse_function() or self._parse_id_var(any_token=False), 2241 ) 2242 2243 if view: 2244 table = self._parse_id_var(any_token=False) 2245 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2246 table_alias: t.Optional[exp.TableAlias] = self.expression( 2247 exp.TableAlias, this=table, columns=columns 2248 ) 2249 elif isinstance(this, exp.Subquery) and this.alias: 2250 # Ensures parity between the Subquery's and the Lateral's "alias" args 2251 table_alias = this.args["alias"].copy() 2252 else: 2253 table_alias = self._parse_table_alias() 2254 2255 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2256 2257 def _parse_join_parts( 2258 self, 2259 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2260 return ( 2261 self._match_set(self.JOIN_METHODS) and self._prev, 2262 self._match_set(self.JOIN_SIDES) and self._prev, 2263 self._match_set(self.JOIN_KINDS) and self._prev, 2264 ) 2265 2266 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Join]: 2267 if self._match(TokenType.COMMA): 2268 return self.expression(exp.Join, this=self._parse_table()) 2269 2270 index = self._index 2271 method, side, kind = self._parse_join_parts() 2272 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2273 join = self._match(TokenType.JOIN) 2274 2275 if not skip_join_token and not join: 2276 self._retreat(index) 2277 kind = None 2278 method = None 2279 side = None 2280 2281 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2282 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2283 2284 if not skip_join_token and not join and not outer_apply and not cross_apply: 2285 return None 2286 2287 if outer_apply: 2288 side = Token(TokenType.LEFT, "LEFT") 2289 2290 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table()} 2291 2292 if method: 2293 kwargs["method"] = method.text 2294 if side: 2295 kwargs["side"] = side.text 2296 if kind: 2297 kwargs["kind"] = kind.text 2298 if hint: 2299 kwargs["hint"] = hint 2300 2301 if self._match(TokenType.ON): 2302 kwargs["on"] = self._parse_conjunction() 2303 elif self._match(TokenType.USING): 2304 kwargs["using"] = self._parse_wrapped_id_vars() 2305 elif not (kind and kind.token_type == TokenType.CROSS): 2306 index = self._index 2307 joins = self._parse_joins() 2308 2309 if joins and self._match(TokenType.ON): 2310 kwargs["on"] = self._parse_conjunction() 2311 elif joins and self._match(TokenType.USING): 2312 kwargs["using"] = self._parse_wrapped_id_vars() 2313 else: 2314 joins = None 2315 self._retreat(index) 2316 2317 kwargs["this"].set("joins", joins) 2318 2319 return self.expression(exp.Join, **kwargs) 2320 2321 def _parse_index( 2322 self, 2323 index: t.Optional[exp.Expression] = None, 2324 ) -> t.Optional[exp.Index]: 2325 if index: 2326 unique = None 2327 primary = None 2328 amp = None 2329 2330 self._match(TokenType.ON) 2331 self._match(TokenType.TABLE) # hive 2332 table = self._parse_table_parts(schema=True) 2333 else: 2334 unique = self._match(TokenType.UNIQUE) 2335 primary = self._match_text_seq("PRIMARY") 2336 amp = self._match_text_seq("AMP") 2337 2338 if not self._match(TokenType.INDEX): 2339 return None 2340 2341 index = self._parse_id_var() 2342 table = None 2343 2344 using = self._parse_field() if self._match(TokenType.USING) else None 2345 2346 if self._match(TokenType.L_PAREN, advance=False): 2347 columns = self._parse_wrapped_csv(self._parse_ordered) 2348 else: 2349 columns = None 2350 2351 return self.expression( 2352 exp.Index, 2353 this=index, 2354 table=table, 2355 using=using, 2356 columns=columns, 2357 unique=unique, 2358 primary=primary, 2359 amp=amp, 2360 partition_by=self._parse_partition_by(), 2361 ) 2362 2363 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2364 hints: t.List[exp.Expression] = [] 2365 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2366 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2367 hints.append( 2368 self.expression( 2369 exp.WithTableHint, 2370 expressions=self._parse_csv( 2371 lambda: self._parse_function() or self._parse_var(any_token=True) 2372 ), 2373 ) 2374 ) 2375 self._match_r_paren() 2376 else: 2377 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2378 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2379 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2380 2381 self._match_texts({"INDEX", "KEY"}) 2382 if self._match(TokenType.FOR): 2383 hint.set("target", self._advance_any() and self._prev.text.upper()) 2384 2385 hint.set("expressions", self._parse_wrapped_id_vars()) 2386 hints.append(hint) 2387 2388 return hints or None 2389 2390 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2391 return ( 2392 (not schema and self._parse_function(optional_parens=False)) 2393 or self._parse_id_var(any_token=False) 2394 or self._parse_string_as_identifier() 2395 or self._parse_placeholder() 2396 ) 2397 2398 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2399 catalog = None 2400 db = None 2401 table = self._parse_table_part(schema=schema) 2402 2403 while self._match(TokenType.DOT): 2404 if catalog: 2405 # This allows nesting the table in arbitrarily many dot expressions if needed 2406 table = self.expression( 2407 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2408 ) 2409 else: 2410 catalog = db 2411 db = table 2412 table = self._parse_table_part(schema=schema) 2413 2414 if not table: 2415 self.raise_error(f"Expected table name but got {self._curr}") 2416 2417 return self.expression( 2418 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2419 ) 2420 2421 def _parse_table( 2422 self, 2423 schema: bool = False, 2424 joins: bool = False, 2425 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2426 ) -> t.Optional[exp.Expression]: 2427 lateral = self._parse_lateral() 2428 if lateral: 2429 return lateral 2430 2431 unnest = self._parse_unnest() 2432 if unnest: 2433 return unnest 2434 2435 values = self._parse_derived_table_values() 2436 if values: 2437 return values 2438 2439 subquery = self._parse_select(table=True) 2440 if subquery: 2441 if not subquery.args.get("pivots"): 2442 subquery.set("pivots", self._parse_pivots()) 2443 return subquery 2444 2445 this: exp.Expression = self._parse_table_parts(schema=schema) 2446 2447 if schema: 2448 return self._parse_schema(this=this) 2449 2450 if self.ALIAS_POST_TABLESAMPLE: 2451 table_sample = self._parse_table_sample() 2452 2453 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2454 if alias: 2455 this.set("alias", alias) 2456 2457 if not this.args.get("pivots"): 2458 this.set("pivots", self._parse_pivots()) 2459 2460 this.set("hints", self._parse_table_hints()) 2461 2462 if not self.ALIAS_POST_TABLESAMPLE: 2463 table_sample = self._parse_table_sample() 2464 2465 if table_sample: 2466 table_sample.set("this", this) 2467 this = table_sample 2468 2469 if joins: 2470 for join in iter(self._parse_join, None): 2471 this.append("joins", join) 2472 2473 return this 2474 2475 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2476 if not self._match(TokenType.UNNEST): 2477 return None 2478 2479 expressions = self._parse_wrapped_csv(self._parse_type) 2480 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2481 2482 alias = self._parse_table_alias() if with_alias else None 2483 2484 if alias and self.UNNEST_COLUMN_ONLY: 2485 if alias.args.get("columns"): 2486 self.raise_error("Unexpected extra column alias in unnest.") 2487 2488 alias.set("columns", [alias.this]) 2489 alias.set("this", None) 2490 2491 offset = None 2492 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2493 self._match(TokenType.ALIAS) 2494 offset = self._parse_id_var() or exp.to_identifier("offset") 2495 2496 return self.expression( 2497 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2498 ) 2499 2500 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2501 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2502 if not is_derived and not self._match(TokenType.VALUES): 2503 return None 2504 2505 expressions = self._parse_csv(self._parse_value) 2506 alias = self._parse_table_alias() 2507 2508 if is_derived: 2509 self._match_r_paren() 2510 2511 return self.expression( 2512 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2513 ) 2514 2515 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2516 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2517 as_modifier and self._match_text_seq("USING", "SAMPLE") 2518 ): 2519 return None 2520 2521 bucket_numerator = None 2522 bucket_denominator = None 2523 bucket_field = None 2524 percent = None 2525 rows = None 2526 size = None 2527 seed = None 2528 2529 kind = ( 2530 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2531 ) 2532 method = self._parse_var(tokens=(TokenType.ROW,)) 2533 2534 self._match(TokenType.L_PAREN) 2535 2536 num = self._parse_number() 2537 2538 if self._match_text_seq("BUCKET"): 2539 bucket_numerator = self._parse_number() 2540 self._match_text_seq("OUT", "OF") 2541 bucket_denominator = bucket_denominator = self._parse_number() 2542 self._match(TokenType.ON) 2543 bucket_field = self._parse_field() 2544 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2545 percent = num 2546 elif self._match(TokenType.ROWS): 2547 rows = num 2548 else: 2549 size = num 2550 2551 self._match(TokenType.R_PAREN) 2552 2553 if self._match(TokenType.L_PAREN): 2554 method = self._parse_var() 2555 seed = self._match(TokenType.COMMA) and self._parse_number() 2556 self._match_r_paren() 2557 elif self._match_texts(("SEED", "REPEATABLE")): 2558 seed = self._parse_wrapped(self._parse_number) 2559 2560 return self.expression( 2561 exp.TableSample, 2562 method=method, 2563 bucket_numerator=bucket_numerator, 2564 bucket_denominator=bucket_denominator, 2565 bucket_field=bucket_field, 2566 percent=percent, 2567 rows=rows, 2568 size=size, 2569 seed=seed, 2570 kind=kind, 2571 ) 2572 2573 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2574 return list(iter(self._parse_pivot, None)) or None 2575 2576 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2577 return list(iter(self._parse_join, None)) or None 2578 2579 # https://duckdb.org/docs/sql/statements/pivot 2580 def _parse_simplified_pivot(self) -> exp.Pivot: 2581 def _parse_on() -> t.Optional[exp.Expression]: 2582 this = self._parse_bitwise() 2583 return self._parse_in(this) if self._match(TokenType.IN) else this 2584 2585 this = self._parse_table() 2586 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2587 using = self._match(TokenType.USING) and self._parse_csv( 2588 lambda: self._parse_alias(self._parse_function()) 2589 ) 2590 group = self._parse_group() 2591 return self.expression( 2592 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2593 ) 2594 2595 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2596 index = self._index 2597 2598 if self._match(TokenType.PIVOT): 2599 unpivot = False 2600 elif self._match(TokenType.UNPIVOT): 2601 unpivot = True 2602 else: 2603 return None 2604 2605 expressions = [] 2606 field = None 2607 2608 if not self._match(TokenType.L_PAREN): 2609 self._retreat(index) 2610 return None 2611 2612 if unpivot: 2613 expressions = self._parse_csv(self._parse_column) 2614 else: 2615 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2616 2617 if not expressions: 2618 self.raise_error("Failed to parse PIVOT's aggregation list") 2619 2620 if not self._match(TokenType.FOR): 2621 self.raise_error("Expecting FOR") 2622 2623 value = self._parse_column() 2624 2625 if not self._match(TokenType.IN): 2626 self.raise_error("Expecting IN") 2627 2628 field = self._parse_in(value, alias=True) 2629 2630 self._match_r_paren() 2631 2632 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2633 2634 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2635 pivot.set("alias", self._parse_table_alias()) 2636 2637 if not unpivot: 2638 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2639 2640 columns: t.List[exp.Expression] = [] 2641 for fld in pivot.args["field"].expressions: 2642 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2643 for name in names: 2644 if self.PREFIXED_PIVOT_COLUMNS: 2645 name = f"{name}_{field_name}" if name else field_name 2646 else: 2647 name = f"{field_name}_{name}" if name else field_name 2648 2649 columns.append(exp.to_identifier(name)) 2650 2651 pivot.set("columns", columns) 2652 2653 return pivot 2654 2655 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2656 return [agg.alias for agg in aggregations] 2657 2658 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2659 if not skip_where_token and not self._match(TokenType.WHERE): 2660 return None 2661 2662 return self.expression( 2663 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2664 ) 2665 2666 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2667 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2668 return None 2669 2670 elements = defaultdict(list) 2671 2672 if self._match(TokenType.ALL): 2673 return self.expression(exp.Group, all=True) 2674 2675 while True: 2676 expressions = self._parse_csv(self._parse_conjunction) 2677 if expressions: 2678 elements["expressions"].extend(expressions) 2679 2680 grouping_sets = self._parse_grouping_sets() 2681 if grouping_sets: 2682 elements["grouping_sets"].extend(grouping_sets) 2683 2684 rollup = None 2685 cube = None 2686 totals = None 2687 2688 with_ = self._match(TokenType.WITH) 2689 if self._match(TokenType.ROLLUP): 2690 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2691 elements["rollup"].extend(ensure_list(rollup)) 2692 2693 if self._match(TokenType.CUBE): 2694 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2695 elements["cube"].extend(ensure_list(cube)) 2696 2697 if self._match_text_seq("TOTALS"): 2698 totals = True 2699 elements["totals"] = True # type: ignore 2700 2701 if not (grouping_sets or rollup or cube or totals): 2702 break 2703 2704 return self.expression(exp.Group, **elements) # type: ignore 2705 2706 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2707 if not self._match(TokenType.GROUPING_SETS): 2708 return None 2709 2710 return self._parse_wrapped_csv(self._parse_grouping_set) 2711 2712 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2713 if self._match(TokenType.L_PAREN): 2714 grouping_set = self._parse_csv(self._parse_column) 2715 self._match_r_paren() 2716 return self.expression(exp.Tuple, expressions=grouping_set) 2717 2718 return self._parse_column() 2719 2720 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2721 if not skip_having_token and not self._match(TokenType.HAVING): 2722 return None 2723 return self.expression(exp.Having, this=self._parse_conjunction()) 2724 2725 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2726 if not self._match(TokenType.QUALIFY): 2727 return None 2728 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2729 2730 def _parse_order( 2731 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2732 ) -> t.Optional[exp.Expression]: 2733 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2734 return this 2735 2736 return self.expression( 2737 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2738 ) 2739 2740 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2741 if not self._match(token): 2742 return None 2743 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2744 2745 def _parse_ordered(self) -> exp.Ordered: 2746 this = self._parse_conjunction() 2747 self._match(TokenType.ASC) 2748 2749 is_desc = self._match(TokenType.DESC) 2750 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2751 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2752 desc = is_desc or False 2753 asc = not desc 2754 nulls_first = is_nulls_first or False 2755 explicitly_null_ordered = is_nulls_first or is_nulls_last 2756 2757 if ( 2758 not explicitly_null_ordered 2759 and ( 2760 (asc and self.NULL_ORDERING == "nulls_are_small") 2761 or (desc and self.NULL_ORDERING != "nulls_are_small") 2762 ) 2763 and self.NULL_ORDERING != "nulls_are_last" 2764 ): 2765 nulls_first = True 2766 2767 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2768 2769 def _parse_limit( 2770 self, this: t.Optional[exp.Expression] = None, top: bool = False 2771 ) -> t.Optional[exp.Expression]: 2772 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2773 limit_paren = self._match(TokenType.L_PAREN) 2774 expression = self._parse_number() if top else self._parse_term() 2775 2776 if self._match(TokenType.COMMA): 2777 offset = expression 2778 expression = self._parse_term() 2779 else: 2780 offset = None 2781 2782 limit_exp = self.expression(exp.Limit, this=this, expression=expression, offset=offset) 2783 2784 if limit_paren: 2785 self._match_r_paren() 2786 2787 return limit_exp 2788 2789 if self._match(TokenType.FETCH): 2790 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2791 direction = self._prev.text if direction else "FIRST" 2792 2793 count = self._parse_number() 2794 percent = self._match(TokenType.PERCENT) 2795 2796 self._match_set((TokenType.ROW, TokenType.ROWS)) 2797 2798 only = self._match_text_seq("ONLY") 2799 with_ties = self._match_text_seq("WITH", "TIES") 2800 2801 if only and with_ties: 2802 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2803 2804 return self.expression( 2805 exp.Fetch, 2806 direction=direction, 2807 count=count, 2808 percent=percent, 2809 with_ties=with_ties, 2810 ) 2811 2812 return this 2813 2814 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2815 if not self._match(TokenType.OFFSET): 2816 return this 2817 2818 count = self._parse_number() 2819 self._match_set((TokenType.ROW, TokenType.ROWS)) 2820 return self.expression(exp.Offset, this=this, expression=count) 2821 2822 def _parse_locks(self) -> t.List[exp.Lock]: 2823 locks = [] 2824 while True: 2825 if self._match_text_seq("FOR", "UPDATE"): 2826 update = True 2827 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2828 "LOCK", "IN", "SHARE", "MODE" 2829 ): 2830 update = False 2831 else: 2832 break 2833 2834 expressions = None 2835 if self._match_text_seq("OF"): 2836 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2837 2838 wait: t.Optional[bool | exp.Expression] = None 2839 if self._match_text_seq("NOWAIT"): 2840 wait = True 2841 elif self._match_text_seq("WAIT"): 2842 wait = self._parse_primary() 2843 elif self._match_text_seq("SKIP", "LOCKED"): 2844 wait = False 2845 2846 locks.append( 2847 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2848 ) 2849 2850 return locks 2851 2852 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2853 if not self._match_set(self.SET_OPERATIONS): 2854 return this 2855 2856 token_type = self._prev.token_type 2857 2858 if token_type == TokenType.UNION: 2859 expression = exp.Union 2860 elif token_type == TokenType.EXCEPT: 2861 expression = exp.Except 2862 else: 2863 expression = exp.Intersect 2864 2865 return self.expression( 2866 expression, 2867 this=this, 2868 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2869 expression=self._parse_set_operations(self._parse_select(nested=True)), 2870 ) 2871 2872 def _parse_expression(self) -> t.Optional[exp.Expression]: 2873 return self._parse_alias(self._parse_conjunction()) 2874 2875 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2876 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2877 2878 def _parse_equality(self) -> t.Optional[exp.Expression]: 2879 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2880 2881 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2882 return self._parse_tokens(self._parse_range, self.COMPARISON) 2883 2884 def _parse_range(self) -> t.Optional[exp.Expression]: 2885 this = self._parse_bitwise() 2886 negate = self._match(TokenType.NOT) 2887 2888 if self._match_set(self.RANGE_PARSERS): 2889 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2890 if not expression: 2891 return this 2892 2893 this = expression 2894 elif self._match(TokenType.ISNULL): 2895 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2896 2897 # Postgres supports ISNULL and NOTNULL for conditions. 2898 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2899 if self._match(TokenType.NOTNULL): 2900 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2901 this = self.expression(exp.Not, this=this) 2902 2903 if negate: 2904 this = self.expression(exp.Not, this=this) 2905 2906 if self._match(TokenType.IS): 2907 this = self._parse_is(this) 2908 2909 return this 2910 2911 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2912 index = self._index - 1 2913 negate = self._match(TokenType.NOT) 2914 2915 if self._match_text_seq("DISTINCT", "FROM"): 2916 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2917 return self.expression(klass, this=this, expression=self._parse_expression()) 2918 2919 expression = self._parse_null() or self._parse_boolean() 2920 if not expression: 2921 self._retreat(index) 2922 return None 2923 2924 this = self.expression(exp.Is, this=this, expression=expression) 2925 return self.expression(exp.Not, this=this) if negate else this 2926 2927 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2928 unnest = self._parse_unnest(with_alias=False) 2929 if unnest: 2930 this = self.expression(exp.In, this=this, unnest=unnest) 2931 elif self._match(TokenType.L_PAREN): 2932 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2933 2934 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2935 this = self.expression(exp.In, this=this, query=expressions[0]) 2936 else: 2937 this = self.expression(exp.In, this=this, expressions=expressions) 2938 2939 self._match_r_paren(this) 2940 else: 2941 this = self.expression(exp.In, this=this, field=self._parse_field()) 2942 2943 return this 2944 2945 def _parse_between(self, this: exp.Expression) -> exp.Between: 2946 low = self._parse_bitwise() 2947 self._match(TokenType.AND) 2948 high = self._parse_bitwise() 2949 return self.expression(exp.Between, this=this, low=low, high=high) 2950 2951 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2952 if not self._match(TokenType.ESCAPE): 2953 return this 2954 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2955 2956 def _parse_interval(self) -> t.Optional[exp.Interval]: 2957 if not self._match(TokenType.INTERVAL): 2958 return None 2959 2960 if self._match(TokenType.STRING, advance=False): 2961 this = self._parse_primary() 2962 else: 2963 this = self._parse_term() 2964 2965 unit = self._parse_function() or self._parse_var() 2966 2967 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2968 # each INTERVAL expression into this canonical form so it's easy to transpile 2969 if this and this.is_number: 2970 this = exp.Literal.string(this.name) 2971 elif this and this.is_string: 2972 parts = this.name.split() 2973 2974 if len(parts) == 2: 2975 if unit: 2976 # this is not actually a unit, it's something else 2977 unit = None 2978 self._retreat(self._index - 1) 2979 else: 2980 this = exp.Literal.string(parts[0]) 2981 unit = self.expression(exp.Var, this=parts[1]) 2982 2983 return self.expression(exp.Interval, this=this, unit=unit) 2984 2985 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2986 this = self._parse_term() 2987 2988 while True: 2989 if self._match_set(self.BITWISE): 2990 this = self.expression( 2991 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 2992 ) 2993 elif self._match_pair(TokenType.LT, TokenType.LT): 2994 this = self.expression( 2995 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2996 ) 2997 elif self._match_pair(TokenType.GT, TokenType.GT): 2998 this = self.expression( 2999 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3000 ) 3001 else: 3002 break 3003 3004 return this 3005 3006 def _parse_term(self) -> t.Optional[exp.Expression]: 3007 return self._parse_tokens(self._parse_factor, self.TERM) 3008 3009 def _parse_factor(self) -> t.Optional[exp.Expression]: 3010 return self._parse_tokens(self._parse_unary, self.FACTOR) 3011 3012 def _parse_unary(self) -> t.Optional[exp.Expression]: 3013 if self._match_set(self.UNARY_PARSERS): 3014 return self.UNARY_PARSERS[self._prev.token_type](self) 3015 return self._parse_at_time_zone(self._parse_type()) 3016 3017 def _parse_type(self) -> t.Optional[exp.Expression]: 3018 interval = self._parse_interval() 3019 if interval: 3020 return interval 3021 3022 index = self._index 3023 data_type = self._parse_types(check_func=True) 3024 this = self._parse_column() 3025 3026 if data_type: 3027 if isinstance(this, exp.Literal): 3028 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3029 if parser: 3030 return parser(self, this, data_type) 3031 return self.expression(exp.Cast, this=this, to=data_type) 3032 if not data_type.expressions: 3033 self._retreat(index) 3034 return self._parse_column() 3035 return self._parse_column_ops(data_type) 3036 3037 return this 3038 3039 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 3040 this = self._parse_type() 3041 if not this: 3042 return None 3043 3044 return self.expression( 3045 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 3046 ) 3047 3048 def _parse_types( 3049 self, check_func: bool = False, schema: bool = False 3050 ) -> t.Optional[exp.Expression]: 3051 index = self._index 3052 3053 prefix = self._match_text_seq("SYSUDTLIB", ".") 3054 3055 if not self._match_set(self.TYPE_TOKENS): 3056 return None 3057 3058 type_token = self._prev.token_type 3059 3060 if type_token == TokenType.PSEUDO_TYPE: 3061 return self.expression(exp.PseudoType, this=self._prev.text) 3062 3063 nested = type_token in self.NESTED_TYPE_TOKENS 3064 is_struct = type_token == TokenType.STRUCT 3065 expressions = None 3066 maybe_func = False 3067 3068 if self._match(TokenType.L_PAREN): 3069 if is_struct: 3070 expressions = self._parse_csv(self._parse_struct_types) 3071 elif nested: 3072 expressions = self._parse_csv( 3073 lambda: self._parse_types(check_func=check_func, schema=schema) 3074 ) 3075 elif type_token in self.ENUM_TYPE_TOKENS: 3076 expressions = self._parse_csv(self._parse_primary) 3077 else: 3078 expressions = self._parse_csv(self._parse_type_size) 3079 3080 if not expressions or not self._match(TokenType.R_PAREN): 3081 self._retreat(index) 3082 return None 3083 3084 maybe_func = True 3085 3086 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3087 this = exp.DataType( 3088 this=exp.DataType.Type.ARRAY, 3089 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 3090 nested=True, 3091 ) 3092 3093 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3094 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3095 3096 return this 3097 3098 if self._match(TokenType.L_BRACKET): 3099 self._retreat(index) 3100 return None 3101 3102 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 3103 if nested and self._match(TokenType.LT): 3104 if is_struct: 3105 expressions = self._parse_csv(self._parse_struct_types) 3106 else: 3107 expressions = self._parse_csv( 3108 lambda: self._parse_types(check_func=check_func, schema=schema) 3109 ) 3110 3111 if not self._match(TokenType.GT): 3112 self.raise_error("Expecting >") 3113 3114 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3115 values = self._parse_csv(self._parse_conjunction) 3116 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3117 3118 value: t.Optional[exp.Expression] = None 3119 if type_token in self.TIMESTAMPS: 3120 if self._match_text_seq("WITH", "TIME", "ZONE"): 3121 maybe_func = False 3122 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 3123 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3124 maybe_func = False 3125 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3126 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3127 maybe_func = False 3128 elif type_token == TokenType.INTERVAL: 3129 unit = self._parse_var() 3130 3131 if not unit: 3132 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3133 else: 3134 value = self.expression(exp.Interval, unit=unit) 3135 3136 if maybe_func and check_func: 3137 index2 = self._index 3138 peek = self._parse_string() 3139 3140 if not peek: 3141 self._retreat(index) 3142 return None 3143 3144 self._retreat(index2) 3145 3146 if value: 3147 return value 3148 3149 return exp.DataType( 3150 this=exp.DataType.Type[type_token.value.upper()], 3151 expressions=expressions, 3152 nested=nested, 3153 values=values, 3154 prefix=prefix, 3155 ) 3156 3157 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3158 this = self._parse_type() or self._parse_id_var() 3159 self._match(TokenType.COLON) 3160 return self._parse_column_def(this) 3161 3162 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3163 if not self._match_text_seq("AT", "TIME", "ZONE"): 3164 return this 3165 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3166 3167 def _parse_column(self) -> t.Optional[exp.Expression]: 3168 this = self._parse_field() 3169 if isinstance(this, exp.Identifier): 3170 this = self.expression(exp.Column, this=this) 3171 elif not this: 3172 return self._parse_bracket(this) 3173 return self._parse_column_ops(this) 3174 3175 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3176 this = self._parse_bracket(this) 3177 3178 while self._match_set(self.COLUMN_OPERATORS): 3179 op_token = self._prev.token_type 3180 op = self.COLUMN_OPERATORS.get(op_token) 3181 3182 if op_token == TokenType.DCOLON: 3183 field = self._parse_types() 3184 if not field: 3185 self.raise_error("Expected type") 3186 elif op and self._curr: 3187 self._advance() 3188 value = self._prev.text 3189 field = ( 3190 exp.Literal.number(value) 3191 if self._prev.token_type == TokenType.NUMBER 3192 else exp.Literal.string(value) 3193 ) 3194 else: 3195 field = self._parse_field(anonymous_func=True, any_token=True) 3196 3197 if isinstance(field, exp.Func): 3198 # bigquery allows function calls like x.y.count(...) 3199 # SAFE.SUBSTR(...) 3200 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3201 this = self._replace_columns_with_dots(this) 3202 3203 if op: 3204 this = op(self, this, field) 3205 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3206 this = self.expression( 3207 exp.Column, 3208 this=field, 3209 table=this.this, 3210 db=this.args.get("table"), 3211 catalog=this.args.get("db"), 3212 ) 3213 else: 3214 this = self.expression(exp.Dot, this=this, expression=field) 3215 this = self._parse_bracket(this) 3216 return this 3217 3218 def _parse_primary(self) -> t.Optional[exp.Expression]: 3219 if self._match_set(self.PRIMARY_PARSERS): 3220 token_type = self._prev.token_type 3221 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3222 3223 if token_type == TokenType.STRING: 3224 expressions = [primary] 3225 while self._match(TokenType.STRING): 3226 expressions.append(exp.Literal.string(self._prev.text)) 3227 3228 if len(expressions) > 1: 3229 return self.expression(exp.Concat, expressions=expressions) 3230 3231 return primary 3232 3233 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3234 return exp.Literal.number(f"0.{self._prev.text}") 3235 3236 if self._match(TokenType.L_PAREN): 3237 comments = self._prev_comments 3238 query = self._parse_select() 3239 3240 if query: 3241 expressions = [query] 3242 else: 3243 expressions = self._parse_expressions() 3244 3245 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3246 3247 if isinstance(this, exp.Subqueryable): 3248 this = self._parse_set_operations( 3249 self._parse_subquery(this=this, parse_alias=False) 3250 ) 3251 elif len(expressions) > 1: 3252 this = self.expression(exp.Tuple, expressions=expressions) 3253 else: 3254 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3255 3256 if this: 3257 this.add_comments(comments) 3258 3259 self._match_r_paren(expression=this) 3260 return this 3261 3262 return None 3263 3264 def _parse_field( 3265 self, 3266 any_token: bool = False, 3267 tokens: t.Optional[t.Collection[TokenType]] = None, 3268 anonymous_func: bool = False, 3269 ) -> t.Optional[exp.Expression]: 3270 return ( 3271 self._parse_primary() 3272 or self._parse_function(anonymous=anonymous_func) 3273 or self._parse_id_var(any_token=any_token, tokens=tokens) 3274 ) 3275 3276 def _parse_function( 3277 self, 3278 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3279 anonymous: bool = False, 3280 optional_parens: bool = True, 3281 ) -> t.Optional[exp.Expression]: 3282 if not self._curr: 3283 return None 3284 3285 token_type = self._curr.token_type 3286 3287 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3288 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3289 3290 if not self._next or self._next.token_type != TokenType.L_PAREN: 3291 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3292 self._advance() 3293 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3294 3295 return None 3296 3297 if token_type not in self.FUNC_TOKENS: 3298 return None 3299 3300 this = self._curr.text 3301 upper = this.upper() 3302 self._advance(2) 3303 3304 parser = self.FUNCTION_PARSERS.get(upper) 3305 3306 if parser and not anonymous: 3307 this = parser(self) 3308 else: 3309 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3310 3311 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3312 this = self.expression(subquery_predicate, this=self._parse_select()) 3313 self._match_r_paren() 3314 return this 3315 3316 if functions is None: 3317 functions = self.FUNCTIONS 3318 3319 function = functions.get(upper) 3320 3321 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3322 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3323 3324 if function and not anonymous: 3325 this = self.validate_expression(function(args), args) 3326 else: 3327 this = self.expression(exp.Anonymous, this=this, expressions=args) 3328 3329 self._match_r_paren(this) 3330 return self._parse_window(this) 3331 3332 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3333 return self._parse_column_def(self._parse_id_var()) 3334 3335 def _parse_user_defined_function( 3336 self, kind: t.Optional[TokenType] = None 3337 ) -> t.Optional[exp.Expression]: 3338 this = self._parse_id_var() 3339 3340 while self._match(TokenType.DOT): 3341 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3342 3343 if not self._match(TokenType.L_PAREN): 3344 return this 3345 3346 expressions = self._parse_csv(self._parse_function_parameter) 3347 self._match_r_paren() 3348 return self.expression( 3349 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3350 ) 3351 3352 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3353 literal = self._parse_primary() 3354 if literal: 3355 return self.expression(exp.Introducer, this=token.text, expression=literal) 3356 3357 return self.expression(exp.Identifier, this=token.text) 3358 3359 def _parse_session_parameter(self) -> exp.SessionParameter: 3360 kind = None 3361 this = self._parse_id_var() or self._parse_primary() 3362 3363 if this and self._match(TokenType.DOT): 3364 kind = this.name 3365 this = self._parse_var() or self._parse_primary() 3366 3367 return self.expression(exp.SessionParameter, this=this, kind=kind) 3368 3369 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3370 index = self._index 3371 3372 if self._match(TokenType.L_PAREN): 3373 expressions = self._parse_csv(self._parse_id_var) 3374 3375 if not self._match(TokenType.R_PAREN): 3376 self._retreat(index) 3377 else: 3378 expressions = [self._parse_id_var()] 3379 3380 if self._match_set(self.LAMBDAS): 3381 return self.LAMBDAS[self._prev.token_type](self, expressions) 3382 3383 self._retreat(index) 3384 3385 this: t.Optional[exp.Expression] 3386 3387 if self._match(TokenType.DISTINCT): 3388 this = self.expression( 3389 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3390 ) 3391 else: 3392 this = self._parse_select_or_expression(alias=alias) 3393 3394 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3395 3396 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3397 index = self._index 3398 3399 if not self.errors: 3400 try: 3401 if self._parse_select(nested=True): 3402 return this 3403 except ParseError: 3404 pass 3405 finally: 3406 self.errors.clear() 3407 self._retreat(index) 3408 3409 if not self._match(TokenType.L_PAREN): 3410 return this 3411 3412 args = self._parse_csv( 3413 lambda: self._parse_constraint() 3414 or self._parse_column_def(self._parse_field(any_token=True)) 3415 ) 3416 3417 self._match_r_paren() 3418 return self.expression(exp.Schema, this=this, expressions=args) 3419 3420 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3421 # column defs are not really columns, they're identifiers 3422 if isinstance(this, exp.Column): 3423 this = this.this 3424 3425 kind = self._parse_types(schema=True) 3426 3427 if self._match_text_seq("FOR", "ORDINALITY"): 3428 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3429 3430 constraints = [] 3431 while True: 3432 constraint = self._parse_column_constraint() 3433 if not constraint: 3434 break 3435 constraints.append(constraint) 3436 3437 if not kind and not constraints: 3438 return this 3439 3440 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3441 3442 def _parse_auto_increment( 3443 self, 3444 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3445 start = None 3446 increment = None 3447 3448 if self._match(TokenType.L_PAREN, advance=False): 3449 args = self._parse_wrapped_csv(self._parse_bitwise) 3450 start = seq_get(args, 0) 3451 increment = seq_get(args, 1) 3452 elif self._match_text_seq("START"): 3453 start = self._parse_bitwise() 3454 self._match_text_seq("INCREMENT") 3455 increment = self._parse_bitwise() 3456 3457 if start and increment: 3458 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3459 3460 return exp.AutoIncrementColumnConstraint() 3461 3462 def _parse_compress(self) -> exp.CompressColumnConstraint: 3463 if self._match(TokenType.L_PAREN, advance=False): 3464 return self.expression( 3465 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3466 ) 3467 3468 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3469 3470 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3471 if self._match_text_seq("BY", "DEFAULT"): 3472 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3473 this = self.expression( 3474 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3475 ) 3476 else: 3477 self._match_text_seq("ALWAYS") 3478 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3479 3480 self._match(TokenType.ALIAS) 3481 identity = self._match_text_seq("IDENTITY") 3482 3483 if self._match(TokenType.L_PAREN): 3484 if self._match_text_seq("START", "WITH"): 3485 this.set("start", self._parse_bitwise()) 3486 if self._match_text_seq("INCREMENT", "BY"): 3487 this.set("increment", self._parse_bitwise()) 3488 if self._match_text_seq("MINVALUE"): 3489 this.set("minvalue", self._parse_bitwise()) 3490 if self._match_text_seq("MAXVALUE"): 3491 this.set("maxvalue", self._parse_bitwise()) 3492 3493 if self._match_text_seq("CYCLE"): 3494 this.set("cycle", True) 3495 elif self._match_text_seq("NO", "CYCLE"): 3496 this.set("cycle", False) 3497 3498 if not identity: 3499 this.set("expression", self._parse_bitwise()) 3500 3501 self._match_r_paren() 3502 3503 return this 3504 3505 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3506 self._match_text_seq("LENGTH") 3507 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3508 3509 def _parse_not_constraint( 3510 self, 3511 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3512 if self._match_text_seq("NULL"): 3513 return self.expression(exp.NotNullColumnConstraint) 3514 if self._match_text_seq("CASESPECIFIC"): 3515 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3516 return None 3517 3518 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3519 if self._match(TokenType.CONSTRAINT): 3520 this = self._parse_id_var() 3521 else: 3522 this = None 3523 3524 if self._match_texts(self.CONSTRAINT_PARSERS): 3525 return self.expression( 3526 exp.ColumnConstraint, 3527 this=this, 3528 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3529 ) 3530 3531 return this 3532 3533 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3534 if not self._match(TokenType.CONSTRAINT): 3535 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3536 3537 this = self._parse_id_var() 3538 expressions = [] 3539 3540 while True: 3541 constraint = self._parse_unnamed_constraint() or self._parse_function() 3542 if not constraint: 3543 break 3544 expressions.append(constraint) 3545 3546 return self.expression(exp.Constraint, this=this, expressions=expressions) 3547 3548 def _parse_unnamed_constraint( 3549 self, constraints: t.Optional[t.Collection[str]] = None 3550 ) -> t.Optional[exp.Expression]: 3551 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3552 return None 3553 3554 constraint = self._prev.text.upper() 3555 if constraint not in self.CONSTRAINT_PARSERS: 3556 self.raise_error(f"No parser found for schema constraint {constraint}.") 3557 3558 return self.CONSTRAINT_PARSERS[constraint](self) 3559 3560 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3561 self._match_text_seq("KEY") 3562 return self.expression( 3563 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3564 ) 3565 3566 def _parse_key_constraint_options(self) -> t.List[str]: 3567 options = [] 3568 while True: 3569 if not self._curr: 3570 break 3571 3572 if self._match(TokenType.ON): 3573 action = None 3574 on = self._advance_any() and self._prev.text 3575 3576 if self._match_text_seq("NO", "ACTION"): 3577 action = "NO ACTION" 3578 elif self._match_text_seq("CASCADE"): 3579 action = "CASCADE" 3580 elif self._match_pair(TokenType.SET, TokenType.NULL): 3581 action = "SET NULL" 3582 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3583 action = "SET DEFAULT" 3584 else: 3585 self.raise_error("Invalid key constraint") 3586 3587 options.append(f"ON {on} {action}") 3588 elif self._match_text_seq("NOT", "ENFORCED"): 3589 options.append("NOT ENFORCED") 3590 elif self._match_text_seq("DEFERRABLE"): 3591 options.append("DEFERRABLE") 3592 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3593 options.append("INITIALLY DEFERRED") 3594 elif self._match_text_seq("NORELY"): 3595 options.append("NORELY") 3596 elif self._match_text_seq("MATCH", "FULL"): 3597 options.append("MATCH FULL") 3598 else: 3599 break 3600 3601 return options 3602 3603 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3604 if match and not self._match(TokenType.REFERENCES): 3605 return None 3606 3607 expressions = None 3608 this = self._parse_table(schema=True) 3609 options = self._parse_key_constraint_options() 3610 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3611 3612 def _parse_foreign_key(self) -> exp.ForeignKey: 3613 expressions = self._parse_wrapped_id_vars() 3614 reference = self._parse_references() 3615 options = {} 3616 3617 while self._match(TokenType.ON): 3618 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3619 self.raise_error("Expected DELETE or UPDATE") 3620 3621 kind = self._prev.text.lower() 3622 3623 if self._match_text_seq("NO", "ACTION"): 3624 action = "NO ACTION" 3625 elif self._match(TokenType.SET): 3626 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3627 action = "SET " + self._prev.text.upper() 3628 else: 3629 self._advance() 3630 action = self._prev.text.upper() 3631 3632 options[kind] = action 3633 3634 return self.expression( 3635 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3636 ) 3637 3638 def _parse_primary_key( 3639 self, wrapped_optional: bool = False, in_props: bool = False 3640 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3641 desc = ( 3642 self._match_set((TokenType.ASC, TokenType.DESC)) 3643 and self._prev.token_type == TokenType.DESC 3644 ) 3645 3646 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3647 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3648 3649 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3650 options = self._parse_key_constraint_options() 3651 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3652 3653 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3654 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3655 return this 3656 3657 bracket_kind = self._prev.token_type 3658 3659 if self._match(TokenType.COLON): 3660 expressions: t.List[t.Optional[exp.Expression]] = [ 3661 self.expression(exp.Slice, expression=self._parse_conjunction()) 3662 ] 3663 else: 3664 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3665 3666 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3667 if bracket_kind == TokenType.L_BRACE: 3668 this = self.expression(exp.Struct, expressions=expressions) 3669 elif not this or this.name.upper() == "ARRAY": 3670 this = self.expression(exp.Array, expressions=expressions) 3671 else: 3672 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3673 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3674 3675 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3676 self.raise_error("Expected ]") 3677 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3678 self.raise_error("Expected }") 3679 3680 self._add_comments(this) 3681 return self._parse_bracket(this) 3682 3683 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3684 if self._match(TokenType.COLON): 3685 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3686 return this 3687 3688 def _parse_case(self) -> t.Optional[exp.Expression]: 3689 ifs = [] 3690 default = None 3691 3692 expression = self._parse_conjunction() 3693 3694 while self._match(TokenType.WHEN): 3695 this = self._parse_conjunction() 3696 self._match(TokenType.THEN) 3697 then = self._parse_conjunction() 3698 ifs.append(self.expression(exp.If, this=this, true=then)) 3699 3700 if self._match(TokenType.ELSE): 3701 default = self._parse_conjunction() 3702 3703 if not self._match(TokenType.END): 3704 self.raise_error("Expected END after CASE", self._prev) 3705 3706 return self._parse_window( 3707 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3708 ) 3709 3710 def _parse_if(self) -> t.Optional[exp.Expression]: 3711 if self._match(TokenType.L_PAREN): 3712 args = self._parse_csv(self._parse_conjunction) 3713 this = self.validate_expression(exp.If.from_arg_list(args), args) 3714 self._match_r_paren() 3715 else: 3716 index = self._index - 1 3717 condition = self._parse_conjunction() 3718 3719 if not condition: 3720 self._retreat(index) 3721 return None 3722 3723 self._match(TokenType.THEN) 3724 true = self._parse_conjunction() 3725 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3726 self._match(TokenType.END) 3727 this = self.expression(exp.If, this=condition, true=true, false=false) 3728 3729 return self._parse_window(this) 3730 3731 def _parse_extract(self) -> exp.Extract: 3732 this = self._parse_function() or self._parse_var() or self._parse_type() 3733 3734 if self._match(TokenType.FROM): 3735 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3736 3737 if not self._match(TokenType.COMMA): 3738 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3739 3740 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3741 3742 def _parse_any_value(self) -> exp.AnyValue: 3743 this = self._parse_lambda() 3744 is_max = None 3745 having = None 3746 3747 if self._match(TokenType.HAVING): 3748 self._match_texts(("MAX", "MIN")) 3749 is_max = self._prev.text == "MAX" 3750 having = self._parse_column() 3751 3752 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 3753 3754 def _parse_cast(self, strict: bool) -> exp.Expression: 3755 this = self._parse_conjunction() 3756 3757 if not self._match(TokenType.ALIAS): 3758 if self._match(TokenType.COMMA): 3759 return self.expression( 3760 exp.CastToStrType, this=this, expression=self._parse_string() 3761 ) 3762 else: 3763 self.raise_error("Expected AS after CAST") 3764 3765 fmt = None 3766 to = self._parse_types() 3767 3768 if not to: 3769 self.raise_error("Expected TYPE after CAST") 3770 elif to.this == exp.DataType.Type.CHAR: 3771 if self._match(TokenType.CHARACTER_SET): 3772 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3773 elif self._match(TokenType.FORMAT): 3774 fmt_string = self._parse_string() 3775 fmt = self._parse_at_time_zone(fmt_string) 3776 3777 if to.this in exp.DataType.TEMPORAL_TYPES: 3778 this = self.expression( 3779 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3780 this=this, 3781 format=exp.Literal.string( 3782 format_time( 3783 fmt_string.this if fmt_string else "", 3784 self.FORMAT_MAPPING or self.TIME_MAPPING, 3785 self.FORMAT_TRIE or self.TIME_TRIE, 3786 ) 3787 ), 3788 ) 3789 3790 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 3791 this.set("zone", fmt.args["zone"]) 3792 3793 return this 3794 3795 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 3796 3797 def _parse_concat(self) -> t.Optional[exp.Expression]: 3798 args = self._parse_csv(self._parse_conjunction) 3799 if self.CONCAT_NULL_OUTPUTS_STRING: 3800 args = [ 3801 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3802 for arg in args 3803 if arg 3804 ] 3805 3806 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3807 # we find such a call we replace it with its argument. 3808 if len(args) == 1: 3809 return args[0] 3810 3811 return self.expression( 3812 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3813 ) 3814 3815 def _parse_string_agg(self) -> exp.Expression: 3816 if self._match(TokenType.DISTINCT): 3817 args: t.List[t.Optional[exp.Expression]] = [ 3818 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 3819 ] 3820 if self._match(TokenType.COMMA): 3821 args.extend(self._parse_csv(self._parse_conjunction)) 3822 else: 3823 args = self._parse_csv(self._parse_conjunction) 3824 3825 index = self._index 3826 if not self._match(TokenType.R_PAREN): 3827 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3828 return self.expression( 3829 exp.GroupConcat, 3830 this=seq_get(args, 0), 3831 separator=self._parse_order(this=seq_get(args, 1)), 3832 ) 3833 3834 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3835 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3836 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3837 if not self._match_text_seq("WITHIN", "GROUP"): 3838 self._retreat(index) 3839 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3840 3841 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3842 order = self._parse_order(this=seq_get(args, 0)) 3843 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3844 3845 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3846 this = self._parse_bitwise() 3847 3848 if self._match(TokenType.USING): 3849 to: t.Optional[exp.Expression] = self.expression( 3850 exp.CharacterSet, this=self._parse_var() 3851 ) 3852 elif self._match(TokenType.COMMA): 3853 to = self._parse_types() 3854 else: 3855 to = None 3856 3857 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3858 3859 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3860 """ 3861 There are generally two variants of the DECODE function: 3862 3863 - DECODE(bin, charset) 3864 - DECODE(expression, search, result [, search, result] ... [, default]) 3865 3866 The second variant will always be parsed into a CASE expression. Note that NULL 3867 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3868 instead of relying on pattern matching. 3869 """ 3870 args = self._parse_csv(self._parse_conjunction) 3871 3872 if len(args) < 3: 3873 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3874 3875 expression, *expressions = args 3876 if not expression: 3877 return None 3878 3879 ifs = [] 3880 for search, result in zip(expressions[::2], expressions[1::2]): 3881 if not search or not result: 3882 return None 3883 3884 if isinstance(search, exp.Literal): 3885 ifs.append( 3886 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3887 ) 3888 elif isinstance(search, exp.Null): 3889 ifs.append( 3890 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3891 ) 3892 else: 3893 cond = exp.or_( 3894 exp.EQ(this=expression.copy(), expression=search), 3895 exp.and_( 3896 exp.Is(this=expression.copy(), expression=exp.Null()), 3897 exp.Is(this=search.copy(), expression=exp.Null()), 3898 copy=False, 3899 ), 3900 copy=False, 3901 ) 3902 ifs.append(exp.If(this=cond, true=result)) 3903 3904 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3905 3906 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3907 self._match_text_seq("KEY") 3908 key = self._parse_field() 3909 self._match(TokenType.COLON) 3910 self._match_text_seq("VALUE") 3911 value = self._parse_field() 3912 3913 if not key and not value: 3914 return None 3915 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3916 3917 def _parse_json_object(self) -> exp.JSONObject: 3918 star = self._parse_star() 3919 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3920 3921 null_handling = None 3922 if self._match_text_seq("NULL", "ON", "NULL"): 3923 null_handling = "NULL ON NULL" 3924 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3925 null_handling = "ABSENT ON NULL" 3926 3927 unique_keys = None 3928 if self._match_text_seq("WITH", "UNIQUE"): 3929 unique_keys = True 3930 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3931 unique_keys = False 3932 3933 self._match_text_seq("KEYS") 3934 3935 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3936 format_json = self._match_text_seq("FORMAT", "JSON") 3937 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3938 3939 return self.expression( 3940 exp.JSONObject, 3941 expressions=expressions, 3942 null_handling=null_handling, 3943 unique_keys=unique_keys, 3944 return_type=return_type, 3945 format_json=format_json, 3946 encoding=encoding, 3947 ) 3948 3949 def _parse_logarithm(self) -> exp.Func: 3950 # Default argument order is base, expression 3951 args = self._parse_csv(self._parse_range) 3952 3953 if len(args) > 1: 3954 if not self.LOG_BASE_FIRST: 3955 args.reverse() 3956 return exp.Log.from_arg_list(args) 3957 3958 return self.expression( 3959 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3960 ) 3961 3962 def _parse_match_against(self) -> exp.MatchAgainst: 3963 expressions = self._parse_csv(self._parse_column) 3964 3965 self._match_text_seq(")", "AGAINST", "(") 3966 3967 this = self._parse_string() 3968 3969 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3970 modifier = "IN NATURAL LANGUAGE MODE" 3971 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3972 modifier = f"{modifier} WITH QUERY EXPANSION" 3973 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3974 modifier = "IN BOOLEAN MODE" 3975 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3976 modifier = "WITH QUERY EXPANSION" 3977 else: 3978 modifier = None 3979 3980 return self.expression( 3981 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3982 ) 3983 3984 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3985 def _parse_open_json(self) -> exp.OpenJSON: 3986 this = self._parse_bitwise() 3987 path = self._match(TokenType.COMMA) and self._parse_string() 3988 3989 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 3990 this = self._parse_field(any_token=True) 3991 kind = self._parse_types() 3992 path = self._parse_string() 3993 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3994 3995 return self.expression( 3996 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3997 ) 3998 3999 expressions = None 4000 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4001 self._match_l_paren() 4002 expressions = self._parse_csv(_parse_open_json_column_def) 4003 4004 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4005 4006 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4007 args = self._parse_csv(self._parse_bitwise) 4008 4009 if self._match(TokenType.IN): 4010 return self.expression( 4011 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4012 ) 4013 4014 if haystack_first: 4015 haystack = seq_get(args, 0) 4016 needle = seq_get(args, 1) 4017 else: 4018 needle = seq_get(args, 0) 4019 haystack = seq_get(args, 1) 4020 4021 return self.expression( 4022 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4023 ) 4024 4025 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4026 args = self._parse_csv(self._parse_table) 4027 return exp.JoinHint(this=func_name.upper(), expressions=args) 4028 4029 def _parse_substring(self) -> exp.Substring: 4030 # Postgres supports the form: substring(string [from int] [for int]) 4031 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4032 4033 args = self._parse_csv(self._parse_bitwise) 4034 4035 if self._match(TokenType.FROM): 4036 args.append(self._parse_bitwise()) 4037 if self._match(TokenType.FOR): 4038 args.append(self._parse_bitwise()) 4039 4040 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4041 4042 def _parse_trim(self) -> exp.Trim: 4043 # https://www.w3resource.com/sql/character-functions/trim.php 4044 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4045 4046 position = None 4047 collation = None 4048 4049 if self._match_texts(self.TRIM_TYPES): 4050 position = self._prev.text.upper() 4051 4052 expression = self._parse_bitwise() 4053 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4054 this = self._parse_bitwise() 4055 else: 4056 this = expression 4057 expression = None 4058 4059 if self._match(TokenType.COLLATE): 4060 collation = self._parse_bitwise() 4061 4062 return self.expression( 4063 exp.Trim, this=this, position=position, expression=expression, collation=collation 4064 ) 4065 4066 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4067 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4068 4069 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4070 return self._parse_window(self._parse_id_var(), alias=True) 4071 4072 def _parse_respect_or_ignore_nulls( 4073 self, this: t.Optional[exp.Expression] 4074 ) -> t.Optional[exp.Expression]: 4075 if self._match_text_seq("IGNORE", "NULLS"): 4076 return self.expression(exp.IgnoreNulls, this=this) 4077 if self._match_text_seq("RESPECT", "NULLS"): 4078 return self.expression(exp.RespectNulls, this=this) 4079 return this 4080 4081 def _parse_window( 4082 self, this: t.Optional[exp.Expression], alias: bool = False 4083 ) -> t.Optional[exp.Expression]: 4084 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4085 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 4086 self._match_r_paren() 4087 4088 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4089 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4090 if self._match_text_seq("WITHIN", "GROUP"): 4091 order = self._parse_wrapped(self._parse_order) 4092 this = self.expression(exp.WithinGroup, this=this, expression=order) 4093 4094 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4095 # Some dialects choose to implement and some do not. 4096 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4097 4098 # There is some code above in _parse_lambda that handles 4099 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4100 4101 # The below changes handle 4102 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4103 4104 # Oracle allows both formats 4105 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4106 # and Snowflake chose to do the same for familiarity 4107 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4108 this = self._parse_respect_or_ignore_nulls(this) 4109 4110 # bigquery select from window x AS (partition by ...) 4111 if alias: 4112 over = None 4113 self._match(TokenType.ALIAS) 4114 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4115 return this 4116 else: 4117 over = self._prev.text.upper() 4118 4119 if not self._match(TokenType.L_PAREN): 4120 return self.expression( 4121 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4122 ) 4123 4124 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4125 4126 first = self._match(TokenType.FIRST) 4127 if self._match_text_seq("LAST"): 4128 first = False 4129 4130 partition = self._parse_partition_by() 4131 order = self._parse_order() 4132 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4133 4134 if kind: 4135 self._match(TokenType.BETWEEN) 4136 start = self._parse_window_spec() 4137 self._match(TokenType.AND) 4138 end = self._parse_window_spec() 4139 4140 spec = self.expression( 4141 exp.WindowSpec, 4142 kind=kind, 4143 start=start["value"], 4144 start_side=start["side"], 4145 end=end["value"], 4146 end_side=end["side"], 4147 ) 4148 else: 4149 spec = None 4150 4151 self._match_r_paren() 4152 4153 return self.expression( 4154 exp.Window, 4155 this=this, 4156 partition_by=partition, 4157 order=order, 4158 spec=spec, 4159 alias=window_alias, 4160 over=over, 4161 first=first, 4162 ) 4163 4164 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4165 self._match(TokenType.BETWEEN) 4166 4167 return { 4168 "value": ( 4169 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4170 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4171 or self._parse_bitwise() 4172 ), 4173 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4174 } 4175 4176 def _parse_alias( 4177 self, this: t.Optional[exp.Expression], explicit: bool = False 4178 ) -> t.Optional[exp.Expression]: 4179 any_token = self._match(TokenType.ALIAS) 4180 4181 if explicit and not any_token: 4182 return this 4183 4184 if self._match(TokenType.L_PAREN): 4185 aliases = self.expression( 4186 exp.Aliases, 4187 this=this, 4188 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4189 ) 4190 self._match_r_paren(aliases) 4191 return aliases 4192 4193 alias = self._parse_id_var(any_token) 4194 4195 if alias: 4196 return self.expression(exp.Alias, this=this, alias=alias) 4197 4198 return this 4199 4200 def _parse_id_var( 4201 self, 4202 any_token: bool = True, 4203 tokens: t.Optional[t.Collection[TokenType]] = None, 4204 ) -> t.Optional[exp.Expression]: 4205 identifier = self._parse_identifier() 4206 4207 if identifier: 4208 return identifier 4209 4210 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4211 quoted = self._prev.token_type == TokenType.STRING 4212 return exp.Identifier(this=self._prev.text, quoted=quoted) 4213 4214 return None 4215 4216 def _parse_string(self) -> t.Optional[exp.Expression]: 4217 if self._match(TokenType.STRING): 4218 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4219 return self._parse_placeholder() 4220 4221 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4222 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4223 4224 def _parse_number(self) -> t.Optional[exp.Expression]: 4225 if self._match(TokenType.NUMBER): 4226 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4227 return self._parse_placeholder() 4228 4229 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4230 if self._match(TokenType.IDENTIFIER): 4231 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4232 return self._parse_placeholder() 4233 4234 def _parse_var( 4235 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4236 ) -> t.Optional[exp.Expression]: 4237 if ( 4238 (any_token and self._advance_any()) 4239 or self._match(TokenType.VAR) 4240 or (self._match_set(tokens) if tokens else False) 4241 ): 4242 return self.expression(exp.Var, this=self._prev.text) 4243 return self._parse_placeholder() 4244 4245 def _advance_any(self) -> t.Optional[Token]: 4246 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4247 self._advance() 4248 return self._prev 4249 return None 4250 4251 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4252 return self._parse_var() or self._parse_string() 4253 4254 def _parse_null(self) -> t.Optional[exp.Expression]: 4255 if self._match(TokenType.NULL): 4256 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4257 return None 4258 4259 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4260 if self._match(TokenType.TRUE): 4261 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4262 if self._match(TokenType.FALSE): 4263 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4264 return None 4265 4266 def _parse_star(self) -> t.Optional[exp.Expression]: 4267 if self._match(TokenType.STAR): 4268 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4269 return None 4270 4271 def _parse_parameter(self) -> exp.Parameter: 4272 wrapped = self._match(TokenType.L_BRACE) 4273 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4274 self._match(TokenType.R_BRACE) 4275 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4276 4277 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4278 if self._match_set(self.PLACEHOLDER_PARSERS): 4279 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4280 if placeholder: 4281 return placeholder 4282 self._advance(-1) 4283 return None 4284 4285 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4286 if not self._match(TokenType.EXCEPT): 4287 return None 4288 if self._match(TokenType.L_PAREN, advance=False): 4289 return self._parse_wrapped_csv(self._parse_column) 4290 return self._parse_csv(self._parse_column) 4291 4292 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4293 if not self._match(TokenType.REPLACE): 4294 return None 4295 if self._match(TokenType.L_PAREN, advance=False): 4296 return self._parse_wrapped_csv(self._parse_expression) 4297 return self._parse_expressions() 4298 4299 def _parse_csv( 4300 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4301 ) -> t.List[t.Optional[exp.Expression]]: 4302 parse_result = parse_method() 4303 items = [parse_result] if parse_result is not None else [] 4304 4305 while self._match(sep): 4306 self._add_comments(parse_result) 4307 parse_result = parse_method() 4308 if parse_result is not None: 4309 items.append(parse_result) 4310 4311 return items 4312 4313 def _parse_tokens( 4314 self, parse_method: t.Callable, expressions: t.Dict 4315 ) -> t.Optional[exp.Expression]: 4316 this = parse_method() 4317 4318 while self._match_set(expressions): 4319 this = self.expression( 4320 expressions[self._prev.token_type], 4321 this=this, 4322 comments=self._prev_comments, 4323 expression=parse_method(), 4324 ) 4325 4326 return this 4327 4328 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4329 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4330 4331 def _parse_wrapped_csv( 4332 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4333 ) -> t.List[t.Optional[exp.Expression]]: 4334 return self._parse_wrapped( 4335 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4336 ) 4337 4338 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4339 wrapped = self._match(TokenType.L_PAREN) 4340 if not wrapped and not optional: 4341 self.raise_error("Expecting (") 4342 parse_result = parse_method() 4343 if wrapped: 4344 self._match_r_paren() 4345 return parse_result 4346 4347 def _parse_expressions(self) -> t.List[t.Optional[exp.Expression]]: 4348 return self._parse_csv(self._parse_expression) 4349 4350 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4351 return self._parse_select() or self._parse_set_operations( 4352 self._parse_expression() if alias else self._parse_conjunction() 4353 ) 4354 4355 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4356 return self._parse_query_modifiers( 4357 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4358 ) 4359 4360 def _parse_transaction(self) -> exp.Transaction: 4361 this = None 4362 if self._match_texts(self.TRANSACTION_KIND): 4363 this = self._prev.text 4364 4365 self._match_texts({"TRANSACTION", "WORK"}) 4366 4367 modes = [] 4368 while True: 4369 mode = [] 4370 while self._match(TokenType.VAR): 4371 mode.append(self._prev.text) 4372 4373 if mode: 4374 modes.append(" ".join(mode)) 4375 if not self._match(TokenType.COMMA): 4376 break 4377 4378 return self.expression(exp.Transaction, this=this, modes=modes) 4379 4380 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4381 chain = None 4382 savepoint = None 4383 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4384 4385 self._match_texts({"TRANSACTION", "WORK"}) 4386 4387 if self._match_text_seq("TO"): 4388 self._match_text_seq("SAVEPOINT") 4389 savepoint = self._parse_id_var() 4390 4391 if self._match(TokenType.AND): 4392 chain = not self._match_text_seq("NO") 4393 self._match_text_seq("CHAIN") 4394 4395 if is_rollback: 4396 return self.expression(exp.Rollback, savepoint=savepoint) 4397 4398 return self.expression(exp.Commit, chain=chain) 4399 4400 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4401 if not self._match_text_seq("ADD"): 4402 return None 4403 4404 self._match(TokenType.COLUMN) 4405 exists_column = self._parse_exists(not_=True) 4406 expression = self._parse_column_def(self._parse_field(any_token=True)) 4407 4408 if expression: 4409 expression.set("exists", exists_column) 4410 4411 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4412 if self._match_texts(("FIRST", "AFTER")): 4413 position = self._prev.text 4414 column_position = self.expression( 4415 exp.ColumnPosition, this=self._parse_column(), position=position 4416 ) 4417 expression.set("position", column_position) 4418 4419 return expression 4420 4421 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4422 drop = self._match(TokenType.DROP) and self._parse_drop() 4423 if drop and not isinstance(drop, exp.Command): 4424 drop.set("kind", drop.args.get("kind", "COLUMN")) 4425 return drop 4426 4427 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4428 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4429 return self.expression( 4430 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4431 ) 4432 4433 def _parse_add_constraint(self) -> exp.AddConstraint: 4434 this = None 4435 kind = self._prev.token_type 4436 4437 if kind == TokenType.CONSTRAINT: 4438 this = self._parse_id_var() 4439 4440 if self._match_text_seq("CHECK"): 4441 expression = self._parse_wrapped(self._parse_conjunction) 4442 enforced = self._match_text_seq("ENFORCED") 4443 4444 return self.expression( 4445 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4446 ) 4447 4448 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4449 expression = self._parse_foreign_key() 4450 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4451 expression = self._parse_primary_key() 4452 else: 4453 expression = None 4454 4455 return self.expression(exp.AddConstraint, this=this, expression=expression) 4456 4457 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4458 index = self._index - 1 4459 4460 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4461 return self._parse_csv(self._parse_add_constraint) 4462 4463 self._retreat(index) 4464 return self._parse_csv(self._parse_add_column) 4465 4466 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4467 self._match(TokenType.COLUMN) 4468 column = self._parse_field(any_token=True) 4469 4470 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4471 return self.expression(exp.AlterColumn, this=column, drop=True) 4472 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4473 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4474 4475 self._match_text_seq("SET", "DATA") 4476 return self.expression( 4477 exp.AlterColumn, 4478 this=column, 4479 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4480 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4481 using=self._match(TokenType.USING) and self._parse_conjunction(), 4482 ) 4483 4484 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4485 index = self._index - 1 4486 4487 partition_exists = self._parse_exists() 4488 if self._match(TokenType.PARTITION, advance=False): 4489 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4490 4491 self._retreat(index) 4492 return self._parse_csv(self._parse_drop_column) 4493 4494 def _parse_alter_table_rename(self) -> exp.RenameTable: 4495 self._match_text_seq("TO") 4496 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4497 4498 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4499 start = self._prev 4500 4501 if not self._match(TokenType.TABLE): 4502 return self._parse_as_command(start) 4503 4504 exists = self._parse_exists() 4505 this = self._parse_table(schema=True) 4506 4507 if self._next: 4508 self._advance() 4509 4510 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4511 if parser: 4512 actions = ensure_list(parser(self)) 4513 4514 if not self._curr: 4515 return self.expression( 4516 exp.AlterTable, 4517 this=this, 4518 exists=exists, 4519 actions=actions, 4520 ) 4521 return self._parse_as_command(start) 4522 4523 def _parse_merge(self) -> exp.Merge: 4524 self._match(TokenType.INTO) 4525 target = self._parse_table() 4526 4527 self._match(TokenType.USING) 4528 using = self._parse_table() 4529 4530 self._match(TokenType.ON) 4531 on = self._parse_conjunction() 4532 4533 whens = [] 4534 while self._match(TokenType.WHEN): 4535 matched = not self._match(TokenType.NOT) 4536 self._match_text_seq("MATCHED") 4537 source = ( 4538 False 4539 if self._match_text_seq("BY", "TARGET") 4540 else self._match_text_seq("BY", "SOURCE") 4541 ) 4542 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4543 4544 self._match(TokenType.THEN) 4545 4546 if self._match(TokenType.INSERT): 4547 _this = self._parse_star() 4548 if _this: 4549 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4550 else: 4551 then = self.expression( 4552 exp.Insert, 4553 this=self._parse_value(), 4554 expression=self._match(TokenType.VALUES) and self._parse_value(), 4555 ) 4556 elif self._match(TokenType.UPDATE): 4557 expressions = self._parse_star() 4558 if expressions: 4559 then = self.expression(exp.Update, expressions=expressions) 4560 else: 4561 then = self.expression( 4562 exp.Update, 4563 expressions=self._match(TokenType.SET) 4564 and self._parse_csv(self._parse_equality), 4565 ) 4566 elif self._match(TokenType.DELETE): 4567 then = self.expression(exp.Var, this=self._prev.text) 4568 else: 4569 then = None 4570 4571 whens.append( 4572 self.expression( 4573 exp.When, 4574 matched=matched, 4575 source=source, 4576 condition=condition, 4577 then=then, 4578 ) 4579 ) 4580 4581 return self.expression( 4582 exp.Merge, 4583 this=target, 4584 using=using, 4585 on=on, 4586 expressions=whens, 4587 ) 4588 4589 def _parse_show(self) -> t.Optional[exp.Expression]: 4590 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4591 if parser: 4592 return parser(self) 4593 self._advance() 4594 return self.expression(exp.Show, this=self._prev.text.upper()) 4595 4596 def _parse_set_item_assignment( 4597 self, kind: t.Optional[str] = None 4598 ) -> t.Optional[exp.Expression]: 4599 index = self._index 4600 4601 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4602 return self._parse_set_transaction(global_=kind == "GLOBAL") 4603 4604 left = self._parse_primary() or self._parse_id_var() 4605 4606 if not self._match_texts(("=", "TO")): 4607 self._retreat(index) 4608 return None 4609 4610 right = self._parse_statement() or self._parse_id_var() 4611 this = self.expression(exp.EQ, this=left, expression=right) 4612 4613 return self.expression(exp.SetItem, this=this, kind=kind) 4614 4615 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4616 self._match_text_seq("TRANSACTION") 4617 characteristics = self._parse_csv( 4618 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4619 ) 4620 return self.expression( 4621 exp.SetItem, 4622 expressions=characteristics, 4623 kind="TRANSACTION", 4624 **{"global": global_}, # type: ignore 4625 ) 4626 4627 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4628 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4629 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4630 4631 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4632 index = self._index 4633 set_ = self.expression( 4634 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4635 ) 4636 4637 if self._curr: 4638 self._retreat(index) 4639 return self._parse_as_command(self._prev) 4640 4641 return set_ 4642 4643 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4644 for option in options: 4645 if self._match_text_seq(*option.split(" ")): 4646 return exp.var(option) 4647 return None 4648 4649 def _parse_as_command(self, start: Token) -> exp.Command: 4650 while self._curr: 4651 self._advance() 4652 text = self._find_sql(start, self._prev) 4653 size = len(start.text) 4654 return exp.Command(this=text[:size], expression=text[size:]) 4655 4656 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4657 settings = [] 4658 4659 self._match_l_paren() 4660 kind = self._parse_id_var() 4661 4662 if self._match(TokenType.L_PAREN): 4663 while True: 4664 key = self._parse_id_var() 4665 value = self._parse_primary() 4666 4667 if not key and value is None: 4668 break 4669 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4670 self._match(TokenType.R_PAREN) 4671 4672 self._match_r_paren() 4673 4674 return self.expression( 4675 exp.DictProperty, 4676 this=this, 4677 kind=kind.this if kind else None, 4678 settings=settings, 4679 ) 4680 4681 def _parse_dict_range(self, this: str) -> exp.DictRange: 4682 self._match_l_paren() 4683 has_min = self._match_text_seq("MIN") 4684 if has_min: 4685 min = self._parse_var() or self._parse_primary() 4686 self._match_text_seq("MAX") 4687 max = self._parse_var() or self._parse_primary() 4688 else: 4689 max = self._parse_var() or self._parse_primary() 4690 min = exp.Literal.number(0) 4691 self._match_r_paren() 4692 return self.expression(exp.DictRange, this=this, min=min, max=max) 4693 4694 def _find_parser( 4695 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4696 ) -> t.Optional[t.Callable]: 4697 if not self._curr: 4698 return None 4699 4700 index = self._index 4701 this = [] 4702 while True: 4703 # The current token might be multiple words 4704 curr = self._curr.text.upper() 4705 key = curr.split(" ") 4706 this.append(curr) 4707 4708 self._advance() 4709 result, trie = in_trie(trie, key) 4710 if result == TrieResult.FAILED: 4711 break 4712 4713 if result == TrieResult.EXISTS: 4714 subparser = parsers[" ".join(this)] 4715 return subparser 4716 4717 self._retreat(index) 4718 return None 4719 4720 def _match(self, token_type, advance=True, expression=None): 4721 if not self._curr: 4722 return None 4723 4724 if self._curr.token_type == token_type: 4725 if advance: 4726 self._advance() 4727 self._add_comments(expression) 4728 return True 4729 4730 return None 4731 4732 def _match_set(self, types, advance=True): 4733 if not self._curr: 4734 return None 4735 4736 if self._curr.token_type in types: 4737 if advance: 4738 self._advance() 4739 return True 4740 4741 return None 4742 4743 def _match_pair(self, token_type_a, token_type_b, advance=True): 4744 if not self._curr or not self._next: 4745 return None 4746 4747 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4748 if advance: 4749 self._advance(2) 4750 return True 4751 4752 return None 4753 4754 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4755 if not self._match(TokenType.L_PAREN, expression=expression): 4756 self.raise_error("Expecting (") 4757 4758 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4759 if not self._match(TokenType.R_PAREN, expression=expression): 4760 self.raise_error("Expecting )") 4761 4762 def _match_texts(self, texts, advance=True): 4763 if self._curr and self._curr.text.upper() in texts: 4764 if advance: 4765 self._advance() 4766 return True 4767 return False 4768 4769 def _match_text_seq(self, *texts, advance=True): 4770 index = self._index 4771 for text in texts: 4772 if self._curr and self._curr.text.upper() == text: 4773 self._advance() 4774 else: 4775 self._retreat(index) 4776 return False 4777 4778 if not advance: 4779 self._retreat(index) 4780 4781 return True 4782 4783 @t.overload 4784 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4785 ... 4786 4787 @t.overload 4788 def _replace_columns_with_dots( 4789 self, this: t.Optional[exp.Expression] 4790 ) -> t.Optional[exp.Expression]: 4791 ... 4792 4793 def _replace_columns_with_dots(self, this): 4794 if isinstance(this, exp.Dot): 4795 exp.replace_children(this, self._replace_columns_with_dots) 4796 elif isinstance(this, exp.Column): 4797 exp.replace_children(this, self._replace_columns_with_dots) 4798 table = this.args.get("table") 4799 this = ( 4800 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 4801 ) 4802 4803 return this 4804 4805 def _replace_lambda( 4806 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4807 ) -> t.Optional[exp.Expression]: 4808 if not node: 4809 return node 4810 4811 for column in node.find_all(exp.Column): 4812 if column.parts[0].name in lambda_variables: 4813 dot_or_id = column.to_dot() if column.table else column.this 4814 parent = column.parent 4815 4816 while isinstance(parent, exp.Dot): 4817 if not isinstance(parent.parent, exp.Dot): 4818 parent.replace(dot_or_id) 4819 break 4820 parent = parent.parent 4821 else: 4822 if column is node: 4823 node = dot_or_id 4824 else: 4825 column.replace(dot_or_id) 4826 return node
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 NESTED_TYPE_TOKENS = { 107 TokenType.ARRAY, 108 TokenType.MAP, 109 TokenType.NULLABLE, 110 TokenType.STRUCT, 111 } 112 113 ENUM_TYPE_TOKENS = { 114 TokenType.ENUM, 115 } 116 117 TYPE_TOKENS = { 118 TokenType.BIT, 119 TokenType.BOOLEAN, 120 TokenType.TINYINT, 121 TokenType.UTINYINT, 122 TokenType.SMALLINT, 123 TokenType.USMALLINT, 124 TokenType.INT, 125 TokenType.UINT, 126 TokenType.BIGINT, 127 TokenType.UBIGINT, 128 TokenType.INT128, 129 TokenType.UINT128, 130 TokenType.INT256, 131 TokenType.UINT256, 132 TokenType.FLOAT, 133 TokenType.DOUBLE, 134 TokenType.CHAR, 135 TokenType.NCHAR, 136 TokenType.VARCHAR, 137 TokenType.NVARCHAR, 138 TokenType.TEXT, 139 TokenType.MEDIUMTEXT, 140 TokenType.LONGTEXT, 141 TokenType.MEDIUMBLOB, 142 TokenType.LONGBLOB, 143 TokenType.BINARY, 144 TokenType.VARBINARY, 145 TokenType.JSON, 146 TokenType.JSONB, 147 TokenType.INTERVAL, 148 TokenType.TIME, 149 TokenType.TIMESTAMP, 150 TokenType.TIMESTAMPTZ, 151 TokenType.TIMESTAMPLTZ, 152 TokenType.DATETIME, 153 TokenType.DATETIME64, 154 TokenType.DATE, 155 TokenType.INT4RANGE, 156 TokenType.INT4MULTIRANGE, 157 TokenType.INT8RANGE, 158 TokenType.INT8MULTIRANGE, 159 TokenType.NUMRANGE, 160 TokenType.NUMMULTIRANGE, 161 TokenType.TSRANGE, 162 TokenType.TSMULTIRANGE, 163 TokenType.TSTZRANGE, 164 TokenType.TSTZMULTIRANGE, 165 TokenType.DATERANGE, 166 TokenType.DATEMULTIRANGE, 167 TokenType.DECIMAL, 168 TokenType.BIGDECIMAL, 169 TokenType.UUID, 170 TokenType.GEOGRAPHY, 171 TokenType.GEOMETRY, 172 TokenType.HLLSKETCH, 173 TokenType.HSTORE, 174 TokenType.PSEUDO_TYPE, 175 TokenType.SUPER, 176 TokenType.SERIAL, 177 TokenType.SMALLSERIAL, 178 TokenType.BIGSERIAL, 179 TokenType.XML, 180 TokenType.UNIQUEIDENTIFIER, 181 TokenType.USERDEFINED, 182 TokenType.MONEY, 183 TokenType.SMALLMONEY, 184 TokenType.ROWVERSION, 185 TokenType.IMAGE, 186 TokenType.VARIANT, 187 TokenType.OBJECT, 188 TokenType.INET, 189 TokenType.ENUM, 190 *NESTED_TYPE_TOKENS, 191 } 192 193 SUBQUERY_PREDICATES = { 194 TokenType.ANY: exp.Any, 195 TokenType.ALL: exp.All, 196 TokenType.EXISTS: exp.Exists, 197 TokenType.SOME: exp.Any, 198 } 199 200 RESERVED_KEYWORDS = { 201 *Tokenizer.SINGLE_TOKENS.values(), 202 TokenType.SELECT, 203 } 204 205 DB_CREATABLES = { 206 TokenType.DATABASE, 207 TokenType.SCHEMA, 208 TokenType.TABLE, 209 TokenType.VIEW, 210 TokenType.DICTIONARY, 211 } 212 213 CREATABLES = { 214 TokenType.COLUMN, 215 TokenType.FUNCTION, 216 TokenType.INDEX, 217 TokenType.PROCEDURE, 218 *DB_CREATABLES, 219 } 220 221 # Tokens that can represent identifiers 222 ID_VAR_TOKENS = { 223 TokenType.VAR, 224 TokenType.ANTI, 225 TokenType.APPLY, 226 TokenType.ASC, 227 TokenType.AUTO_INCREMENT, 228 TokenType.BEGIN, 229 TokenType.CACHE, 230 TokenType.CASE, 231 TokenType.COLLATE, 232 TokenType.COMMAND, 233 TokenType.COMMENT, 234 TokenType.COMMIT, 235 TokenType.CONSTRAINT, 236 TokenType.DEFAULT, 237 TokenType.DELETE, 238 TokenType.DESC, 239 TokenType.DESCRIBE, 240 TokenType.DICTIONARY, 241 TokenType.DIV, 242 TokenType.END, 243 TokenType.EXECUTE, 244 TokenType.ESCAPE, 245 TokenType.FALSE, 246 TokenType.FIRST, 247 TokenType.FILTER, 248 TokenType.FORMAT, 249 TokenType.FULL, 250 TokenType.IF, 251 TokenType.IS, 252 TokenType.ISNULL, 253 TokenType.INTERVAL, 254 TokenType.KEEP, 255 TokenType.LEFT, 256 TokenType.LOAD, 257 TokenType.MERGE, 258 TokenType.NATURAL, 259 TokenType.NEXT, 260 TokenType.OFFSET, 261 TokenType.ORDINALITY, 262 TokenType.OVERWRITE, 263 TokenType.PARTITION, 264 TokenType.PERCENT, 265 TokenType.PIVOT, 266 TokenType.PRAGMA, 267 TokenType.RANGE, 268 TokenType.REFERENCES, 269 TokenType.RIGHT, 270 TokenType.ROW, 271 TokenType.ROWS, 272 TokenType.SEMI, 273 TokenType.SET, 274 TokenType.SETTINGS, 275 TokenType.SHOW, 276 TokenType.TEMPORARY, 277 TokenType.TOP, 278 TokenType.TRUE, 279 TokenType.UNIQUE, 280 TokenType.UNPIVOT, 281 TokenType.UPDATE, 282 TokenType.VOLATILE, 283 TokenType.WINDOW, 284 *CREATABLES, 285 *SUBQUERY_PREDICATES, 286 *TYPE_TOKENS, 287 *NO_PAREN_FUNCTIONS, 288 } 289 290 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 291 292 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 293 TokenType.APPLY, 294 TokenType.ASOF, 295 TokenType.FULL, 296 TokenType.LEFT, 297 TokenType.LOCK, 298 TokenType.NATURAL, 299 TokenType.OFFSET, 300 TokenType.RIGHT, 301 TokenType.WINDOW, 302 } 303 304 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 305 306 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 307 308 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 309 310 FUNC_TOKENS = { 311 TokenType.COMMAND, 312 TokenType.CURRENT_DATE, 313 TokenType.CURRENT_DATETIME, 314 TokenType.CURRENT_TIMESTAMP, 315 TokenType.CURRENT_TIME, 316 TokenType.CURRENT_USER, 317 TokenType.FILTER, 318 TokenType.FIRST, 319 TokenType.FORMAT, 320 TokenType.GLOB, 321 TokenType.IDENTIFIER, 322 TokenType.INDEX, 323 TokenType.ISNULL, 324 TokenType.ILIKE, 325 TokenType.LIKE, 326 TokenType.MERGE, 327 TokenType.OFFSET, 328 TokenType.PRIMARY_KEY, 329 TokenType.RANGE, 330 TokenType.REPLACE, 331 TokenType.ROW, 332 TokenType.UNNEST, 333 TokenType.VAR, 334 TokenType.LEFT, 335 TokenType.RIGHT, 336 TokenType.DATE, 337 TokenType.DATETIME, 338 TokenType.TABLE, 339 TokenType.TIMESTAMP, 340 TokenType.TIMESTAMPTZ, 341 TokenType.WINDOW, 342 *TYPE_TOKENS, 343 *SUBQUERY_PREDICATES, 344 } 345 346 CONJUNCTION = { 347 TokenType.AND: exp.And, 348 TokenType.OR: exp.Or, 349 } 350 351 EQUALITY = { 352 TokenType.EQ: exp.EQ, 353 TokenType.NEQ: exp.NEQ, 354 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 355 } 356 357 COMPARISON = { 358 TokenType.GT: exp.GT, 359 TokenType.GTE: exp.GTE, 360 TokenType.LT: exp.LT, 361 TokenType.LTE: exp.LTE, 362 } 363 364 BITWISE = { 365 TokenType.AMP: exp.BitwiseAnd, 366 TokenType.CARET: exp.BitwiseXor, 367 TokenType.PIPE: exp.BitwiseOr, 368 TokenType.DPIPE: exp.DPipe, 369 } 370 371 TERM = { 372 TokenType.DASH: exp.Sub, 373 TokenType.PLUS: exp.Add, 374 TokenType.MOD: exp.Mod, 375 TokenType.COLLATE: exp.Collate, 376 } 377 378 FACTOR = { 379 TokenType.DIV: exp.IntDiv, 380 TokenType.LR_ARROW: exp.Distance, 381 TokenType.SLASH: exp.Div, 382 TokenType.STAR: exp.Mul, 383 } 384 385 TIMESTAMPS = { 386 TokenType.TIME, 387 TokenType.TIMESTAMP, 388 TokenType.TIMESTAMPTZ, 389 TokenType.TIMESTAMPLTZ, 390 } 391 392 SET_OPERATIONS = { 393 TokenType.UNION, 394 TokenType.INTERSECT, 395 TokenType.EXCEPT, 396 } 397 398 JOIN_METHODS = { 399 TokenType.NATURAL, 400 TokenType.ASOF, 401 } 402 403 JOIN_SIDES = { 404 TokenType.LEFT, 405 TokenType.RIGHT, 406 TokenType.FULL, 407 } 408 409 JOIN_KINDS = { 410 TokenType.INNER, 411 TokenType.OUTER, 412 TokenType.CROSS, 413 TokenType.SEMI, 414 TokenType.ANTI, 415 } 416 417 JOIN_HINTS: t.Set[str] = set() 418 419 LAMBDAS = { 420 TokenType.ARROW: lambda self, expressions: self.expression( 421 exp.Lambda, 422 this=self._replace_lambda( 423 self._parse_conjunction(), 424 {node.name for node in expressions}, 425 ), 426 expressions=expressions, 427 ), 428 TokenType.FARROW: lambda self, expressions: self.expression( 429 exp.Kwarg, 430 this=exp.var(expressions[0].name), 431 expression=self._parse_conjunction(), 432 ), 433 } 434 435 COLUMN_OPERATORS = { 436 TokenType.DOT: None, 437 TokenType.DCOLON: lambda self, this, to: self.expression( 438 exp.Cast if self.STRICT_CAST else exp.TryCast, 439 this=this, 440 to=to, 441 ), 442 TokenType.ARROW: lambda self, this, path: self.expression( 443 exp.JSONExtract, 444 this=this, 445 expression=path, 446 ), 447 TokenType.DARROW: lambda self, this, path: self.expression( 448 exp.JSONExtractScalar, 449 this=this, 450 expression=path, 451 ), 452 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 453 exp.JSONBExtract, 454 this=this, 455 expression=path, 456 ), 457 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 458 exp.JSONBExtractScalar, 459 this=this, 460 expression=path, 461 ), 462 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 463 exp.JSONBContains, 464 this=this, 465 expression=key, 466 ), 467 } 468 469 EXPRESSION_PARSERS = { 470 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 471 exp.Column: lambda self: self._parse_column(), 472 exp.Condition: lambda self: self._parse_conjunction(), 473 exp.DataType: lambda self: self._parse_types(), 474 exp.Expression: lambda self: self._parse_statement(), 475 exp.From: lambda self: self._parse_from(), 476 exp.Group: lambda self: self._parse_group(), 477 exp.Having: lambda self: self._parse_having(), 478 exp.Identifier: lambda self: self._parse_id_var(), 479 exp.Join: lambda self: self._parse_join(), 480 exp.Lambda: lambda self: self._parse_lambda(), 481 exp.Lateral: lambda self: self._parse_lateral(), 482 exp.Limit: lambda self: self._parse_limit(), 483 exp.Offset: lambda self: self._parse_offset(), 484 exp.Order: lambda self: self._parse_order(), 485 exp.Ordered: lambda self: self._parse_ordered(), 486 exp.Properties: lambda self: self._parse_properties(), 487 exp.Qualify: lambda self: self._parse_qualify(), 488 exp.Returning: lambda self: self._parse_returning(), 489 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 490 exp.Table: lambda self: self._parse_table_parts(), 491 exp.TableAlias: lambda self: self._parse_table_alias(), 492 exp.Where: lambda self: self._parse_where(), 493 exp.Window: lambda self: self._parse_named_window(), 494 exp.With: lambda self: self._parse_with(), 495 "JOIN_TYPE": lambda self: self._parse_join_parts(), 496 } 497 498 STATEMENT_PARSERS = { 499 TokenType.ALTER: lambda self: self._parse_alter(), 500 TokenType.BEGIN: lambda self: self._parse_transaction(), 501 TokenType.CACHE: lambda self: self._parse_cache(), 502 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 503 TokenType.COMMENT: lambda self: self._parse_comment(), 504 TokenType.CREATE: lambda self: self._parse_create(), 505 TokenType.DELETE: lambda self: self._parse_delete(), 506 TokenType.DESC: lambda self: self._parse_describe(), 507 TokenType.DESCRIBE: lambda self: self._parse_describe(), 508 TokenType.DROP: lambda self: self._parse_drop(), 509 TokenType.END: lambda self: self._parse_commit_or_rollback(), 510 TokenType.FROM: lambda self: exp.select("*").from_( 511 t.cast(exp.From, self._parse_from(skip_from_token=True)) 512 ), 513 TokenType.INSERT: lambda self: self._parse_insert(), 514 TokenType.LOAD: lambda self: self._parse_load(), 515 TokenType.MERGE: lambda self: self._parse_merge(), 516 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 517 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 518 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 519 TokenType.SET: lambda self: self._parse_set(), 520 TokenType.UNCACHE: lambda self: self._parse_uncache(), 521 TokenType.UPDATE: lambda self: self._parse_update(), 522 TokenType.USE: lambda self: self.expression( 523 exp.Use, 524 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 525 and exp.var(self._prev.text), 526 this=self._parse_table(schema=False), 527 ), 528 } 529 530 UNARY_PARSERS = { 531 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 532 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 533 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 534 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 535 } 536 537 PRIMARY_PARSERS = { 538 TokenType.STRING: lambda self, token: self.expression( 539 exp.Literal, this=token.text, is_string=True 540 ), 541 TokenType.NUMBER: lambda self, token: self.expression( 542 exp.Literal, this=token.text, is_string=False 543 ), 544 TokenType.STAR: lambda self, _: self.expression( 545 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 546 ), 547 TokenType.NULL: lambda self, _: self.expression(exp.Null), 548 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 549 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 550 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 551 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 552 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 553 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 554 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 555 exp.National, this=token.text 556 ), 557 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 558 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 559 } 560 561 PLACEHOLDER_PARSERS = { 562 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 563 TokenType.PARAMETER: lambda self: self._parse_parameter(), 564 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 565 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 566 else None, 567 } 568 569 RANGE_PARSERS = { 570 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 571 TokenType.GLOB: binary_range_parser(exp.Glob), 572 TokenType.ILIKE: binary_range_parser(exp.ILike), 573 TokenType.IN: lambda self, this: self._parse_in(this), 574 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 575 TokenType.IS: lambda self, this: self._parse_is(this), 576 TokenType.LIKE: binary_range_parser(exp.Like), 577 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 578 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 579 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 580 } 581 582 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 583 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 584 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 585 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 586 "CHARACTER SET": lambda self: self._parse_character_set(), 587 "CHECKSUM": lambda self: self._parse_checksum(), 588 "CLUSTER BY": lambda self: self._parse_cluster(), 589 "CLUSTERED": lambda self: self._parse_clustered_by(), 590 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 591 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 592 "COPY": lambda self: self._parse_copy_property(), 593 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 594 "DEFINER": lambda self: self._parse_definer(), 595 "DETERMINISTIC": lambda self: self.expression( 596 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 597 ), 598 "DISTKEY": lambda self: self._parse_distkey(), 599 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 600 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 601 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 602 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 603 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 604 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 605 "FREESPACE": lambda self: self._parse_freespace(), 606 "IMMUTABLE": lambda self: self.expression( 607 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 608 ), 609 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 610 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 611 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 612 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 613 "LIKE": lambda self: self._parse_create_like(), 614 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 615 "LOCK": lambda self: self._parse_locking(), 616 "LOCKING": lambda self: self._parse_locking(), 617 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 618 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 619 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 620 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 621 "NO": lambda self: self._parse_no_property(), 622 "ON": lambda self: self._parse_on_property(), 623 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 624 "PARTITION BY": lambda self: self._parse_partitioned_by(), 625 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 626 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 627 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 628 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 629 "RETURNS": lambda self: self._parse_returns(), 630 "ROW": lambda self: self._parse_row(), 631 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 632 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 633 "SETTINGS": lambda self: self.expression( 634 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 635 ), 636 "SORTKEY": lambda self: self._parse_sortkey(), 637 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 638 "STABLE": lambda self: self.expression( 639 exp.StabilityProperty, this=exp.Literal.string("STABLE") 640 ), 641 "STORED": lambda self: self._parse_stored(), 642 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 643 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 644 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 645 "TO": lambda self: self._parse_to_table(), 646 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 647 "TTL": lambda self: self._parse_ttl(), 648 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 649 "VOLATILE": lambda self: self._parse_volatile_property(), 650 "WITH": lambda self: self._parse_with_property(), 651 } 652 653 CONSTRAINT_PARSERS = { 654 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 655 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 656 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 657 "CHARACTER SET": lambda self: self.expression( 658 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 659 ), 660 "CHECK": lambda self: self.expression( 661 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 662 ), 663 "COLLATE": lambda self: self.expression( 664 exp.CollateColumnConstraint, this=self._parse_var() 665 ), 666 "COMMENT": lambda self: self.expression( 667 exp.CommentColumnConstraint, this=self._parse_string() 668 ), 669 "COMPRESS": lambda self: self._parse_compress(), 670 "DEFAULT": lambda self: self.expression( 671 exp.DefaultColumnConstraint, this=self._parse_bitwise() 672 ), 673 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 674 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 675 "FORMAT": lambda self: self.expression( 676 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 677 ), 678 "GENERATED": lambda self: self._parse_generated_as_identity(), 679 "IDENTITY": lambda self: self._parse_auto_increment(), 680 "INLINE": lambda self: self._parse_inline(), 681 "LIKE": lambda self: self._parse_create_like(), 682 "NOT": lambda self: self._parse_not_constraint(), 683 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 684 "ON": lambda self: self._match(TokenType.UPDATE) 685 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 686 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 687 "PRIMARY KEY": lambda self: self._parse_primary_key(), 688 "REFERENCES": lambda self: self._parse_references(match=False), 689 "TITLE": lambda self: self.expression( 690 exp.TitleColumnConstraint, this=self._parse_var_or_string() 691 ), 692 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 693 "UNIQUE": lambda self: self._parse_unique(), 694 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 695 } 696 697 ALTER_PARSERS = { 698 "ADD": lambda self: self._parse_alter_table_add(), 699 "ALTER": lambda self: self._parse_alter_table_alter(), 700 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 701 "DROP": lambda self: self._parse_alter_table_drop(), 702 "RENAME": lambda self: self._parse_alter_table_rename(), 703 } 704 705 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 706 707 NO_PAREN_FUNCTION_PARSERS = { 708 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 709 TokenType.CASE: lambda self: self._parse_case(), 710 TokenType.IF: lambda self: self._parse_if(), 711 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 712 exp.NextValueFor, 713 this=self._parse_column(), 714 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 715 ), 716 } 717 718 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 719 720 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 721 "ANY_VALUE": lambda self: self._parse_any_value(), 722 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 723 "CONCAT": lambda self: self._parse_concat(), 724 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 725 "DECODE": lambda self: self._parse_decode(), 726 "EXTRACT": lambda self: self._parse_extract(), 727 "JSON_OBJECT": lambda self: self._parse_json_object(), 728 "LOG": lambda self: self._parse_logarithm(), 729 "MATCH": lambda self: self._parse_match_against(), 730 "OPENJSON": lambda self: self._parse_open_json(), 731 "POSITION": lambda self: self._parse_position(), 732 "SAFE_CAST": lambda self: self._parse_cast(False), 733 "STRING_AGG": lambda self: self._parse_string_agg(), 734 "SUBSTRING": lambda self: self._parse_substring(), 735 "TRIM": lambda self: self._parse_trim(), 736 "TRY_CAST": lambda self: self._parse_cast(False), 737 "TRY_CONVERT": lambda self: self._parse_convert(False), 738 } 739 740 QUERY_MODIFIER_PARSERS = { 741 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 742 TokenType.WHERE: lambda self: ("where", self._parse_where()), 743 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 744 TokenType.HAVING: lambda self: ("having", self._parse_having()), 745 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 746 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 747 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 748 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 749 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 750 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 751 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 752 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 753 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 754 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 755 TokenType.CLUSTER_BY: lambda self: ( 756 "cluster", 757 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 758 ), 759 TokenType.DISTRIBUTE_BY: lambda self: ( 760 "distribute", 761 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 762 ), 763 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 764 } 765 766 SET_PARSERS = { 767 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 768 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 769 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 770 "TRANSACTION": lambda self: self._parse_set_transaction(), 771 } 772 773 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 774 775 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 776 777 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 778 779 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 780 781 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 782 783 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 784 TRANSACTION_CHARACTERISTICS = { 785 "ISOLATION LEVEL REPEATABLE READ", 786 "ISOLATION LEVEL READ COMMITTED", 787 "ISOLATION LEVEL READ UNCOMMITTED", 788 "ISOLATION LEVEL SERIALIZABLE", 789 "READ WRITE", 790 "READ ONLY", 791 } 792 793 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 794 795 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 796 797 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 798 799 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 800 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 801 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 802 803 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 804 805 STRICT_CAST = True 806 807 # A NULL arg in CONCAT yields NULL by default 808 CONCAT_NULL_OUTPUTS_STRING = False 809 810 PREFIXED_PIVOT_COLUMNS = False 811 IDENTIFY_PIVOT_STRINGS = False 812 813 LOG_BASE_FIRST = True 814 LOG_DEFAULTS_TO_LN = False 815 816 __slots__ = ( 817 "error_level", 818 "error_message_context", 819 "max_errors", 820 "sql", 821 "errors", 822 "_tokens", 823 "_index", 824 "_curr", 825 "_next", 826 "_prev", 827 "_prev_comments", 828 ) 829 830 # Autofilled 831 INDEX_OFFSET: int = 0 832 UNNEST_COLUMN_ONLY: bool = False 833 ALIAS_POST_TABLESAMPLE: bool = False 834 STRICT_STRING_CONCAT = False 835 NULL_ORDERING: str = "nulls_are_small" 836 SHOW_TRIE: t.Dict = {} 837 SET_TRIE: t.Dict = {} 838 FORMAT_MAPPING: t.Dict[str, str] = {} 839 FORMAT_TRIE: t.Dict = {} 840 TIME_MAPPING: t.Dict[str, str] = {} 841 TIME_TRIE: t.Dict = {} 842 843 def __init__( 844 self, 845 error_level: t.Optional[ErrorLevel] = None, 846 error_message_context: int = 100, 847 max_errors: int = 3, 848 ): 849 self.error_level = error_level or ErrorLevel.IMMEDIATE 850 self.error_message_context = error_message_context 851 self.max_errors = max_errors 852 self.reset() 853 854 def reset(self): 855 self.sql = "" 856 self.errors = [] 857 self._tokens = [] 858 self._index = 0 859 self._curr = None 860 self._next = None 861 self._prev = None 862 self._prev_comments = None 863 864 def parse( 865 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 866 ) -> t.List[t.Optional[exp.Expression]]: 867 """ 868 Parses a list of tokens and returns a list of syntax trees, one tree 869 per parsed SQL statement. 870 871 Args: 872 raw_tokens: The list of tokens. 873 sql: The original SQL string, used to produce helpful debug messages. 874 875 Returns: 876 The list of the produced syntax trees. 877 """ 878 return self._parse( 879 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 880 ) 881 882 def parse_into( 883 self, 884 expression_types: exp.IntoType, 885 raw_tokens: t.List[Token], 886 sql: t.Optional[str] = None, 887 ) -> t.List[t.Optional[exp.Expression]]: 888 """ 889 Parses a list of tokens into a given Expression type. If a collection of Expression 890 types is given instead, this method will try to parse the token list into each one 891 of them, stopping at the first for which the parsing succeeds. 892 893 Args: 894 expression_types: The expression type(s) to try and parse the token list into. 895 raw_tokens: The list of tokens. 896 sql: The original SQL string, used to produce helpful debug messages. 897 898 Returns: 899 The target Expression. 900 """ 901 errors = [] 902 for expression_type in ensure_list(expression_types): 903 parser = self.EXPRESSION_PARSERS.get(expression_type) 904 if not parser: 905 raise TypeError(f"No parser registered for {expression_type}") 906 907 try: 908 return self._parse(parser, raw_tokens, sql) 909 except ParseError as e: 910 e.errors[0]["into_expression"] = expression_type 911 errors.append(e) 912 913 raise ParseError( 914 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 915 errors=merge_errors(errors), 916 ) from errors[-1] 917 918 def _parse( 919 self, 920 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 921 raw_tokens: t.List[Token], 922 sql: t.Optional[str] = None, 923 ) -> t.List[t.Optional[exp.Expression]]: 924 self.reset() 925 self.sql = sql or "" 926 927 total = len(raw_tokens) 928 chunks: t.List[t.List[Token]] = [[]] 929 930 for i, token in enumerate(raw_tokens): 931 if token.token_type == TokenType.SEMICOLON: 932 if i < total - 1: 933 chunks.append([]) 934 else: 935 chunks[-1].append(token) 936 937 expressions = [] 938 939 for tokens in chunks: 940 self._index = -1 941 self._tokens = tokens 942 self._advance() 943 944 expressions.append(parse_method(self)) 945 946 if self._index < len(self._tokens): 947 self.raise_error("Invalid expression / Unexpected token") 948 949 self.check_errors() 950 951 return expressions 952 953 def check_errors(self) -> None: 954 """Logs or raises any found errors, depending on the chosen error level setting.""" 955 if self.error_level == ErrorLevel.WARN: 956 for error in self.errors: 957 logger.error(str(error)) 958 elif self.error_level == ErrorLevel.RAISE and self.errors: 959 raise ParseError( 960 concat_messages(self.errors, self.max_errors), 961 errors=merge_errors(self.errors), 962 ) 963 964 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 965 """ 966 Appends an error in the list of recorded errors or raises it, depending on the chosen 967 error level setting. 968 """ 969 token = token or self._curr or self._prev or Token.string("") 970 start = token.start 971 end = token.end + 1 972 start_context = self.sql[max(start - self.error_message_context, 0) : start] 973 highlight = self.sql[start:end] 974 end_context = self.sql[end : end + self.error_message_context] 975 976 error = ParseError.new( 977 f"{message}. Line {token.line}, Col: {token.col}.\n" 978 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 979 description=message, 980 line=token.line, 981 col=token.col, 982 start_context=start_context, 983 highlight=highlight, 984 end_context=end_context, 985 ) 986 987 if self.error_level == ErrorLevel.IMMEDIATE: 988 raise error 989 990 self.errors.append(error) 991 992 def expression( 993 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 994 ) -> E: 995 """ 996 Creates a new, validated Expression. 997 998 Args: 999 exp_class: The expression class to instantiate. 1000 comments: An optional list of comments to attach to the expression. 1001 kwargs: The arguments to set for the expression along with their respective values. 1002 1003 Returns: 1004 The target expression. 1005 """ 1006 instance = exp_class(**kwargs) 1007 instance.add_comments(comments) if comments else self._add_comments(instance) 1008 return self.validate_expression(instance) 1009 1010 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1011 if expression and self._prev_comments: 1012 expression.add_comments(self._prev_comments) 1013 self._prev_comments = None 1014 1015 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1016 """ 1017 Validates an Expression, making sure that all its mandatory arguments are set. 1018 1019 Args: 1020 expression: The expression to validate. 1021 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1022 1023 Returns: 1024 The validated expression. 1025 """ 1026 if self.error_level != ErrorLevel.IGNORE: 1027 for error_message in expression.error_messages(args): 1028 self.raise_error(error_message) 1029 1030 return expression 1031 1032 def _find_sql(self, start: Token, end: Token) -> str: 1033 return self.sql[start.start : end.end + 1] 1034 1035 def _advance(self, times: int = 1) -> None: 1036 self._index += times 1037 self._curr = seq_get(self._tokens, self._index) 1038 self._next = seq_get(self._tokens, self._index + 1) 1039 1040 if self._index > 0: 1041 self._prev = self._tokens[self._index - 1] 1042 self._prev_comments = self._prev.comments 1043 else: 1044 self._prev = None 1045 self._prev_comments = None 1046 1047 def _retreat(self, index: int) -> None: 1048 if index != self._index: 1049 self._advance(index - self._index) 1050 1051 def _parse_command(self) -> exp.Command: 1052 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1053 1054 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1055 start = self._prev 1056 exists = self._parse_exists() if allow_exists else None 1057 1058 self._match(TokenType.ON) 1059 1060 kind = self._match_set(self.CREATABLES) and self._prev 1061 if not kind: 1062 return self._parse_as_command(start) 1063 1064 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1065 this = self._parse_user_defined_function(kind=kind.token_type) 1066 elif kind.token_type == TokenType.TABLE: 1067 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1068 elif kind.token_type == TokenType.COLUMN: 1069 this = self._parse_column() 1070 else: 1071 this = self._parse_id_var() 1072 1073 self._match(TokenType.IS) 1074 1075 return self.expression( 1076 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1077 ) 1078 1079 def _parse_to_table( 1080 self, 1081 ) -> exp.ToTableProperty: 1082 table = self._parse_table_parts(schema=True) 1083 return self.expression(exp.ToTableProperty, this=table) 1084 1085 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1086 def _parse_ttl(self) -> exp.Expression: 1087 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1088 this = self._parse_bitwise() 1089 1090 if self._match_text_seq("DELETE"): 1091 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1092 if self._match_text_seq("RECOMPRESS"): 1093 return self.expression( 1094 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1095 ) 1096 if self._match_text_seq("TO", "DISK"): 1097 return self.expression( 1098 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1099 ) 1100 if self._match_text_seq("TO", "VOLUME"): 1101 return self.expression( 1102 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1103 ) 1104 1105 return this 1106 1107 expressions = self._parse_csv(_parse_ttl_action) 1108 where = self._parse_where() 1109 group = self._parse_group() 1110 1111 aggregates = None 1112 if group and self._match(TokenType.SET): 1113 aggregates = self._parse_csv(self._parse_set_item) 1114 1115 return self.expression( 1116 exp.MergeTreeTTL, 1117 expressions=expressions, 1118 where=where, 1119 group=group, 1120 aggregates=aggregates, 1121 ) 1122 1123 def _parse_statement(self) -> t.Optional[exp.Expression]: 1124 if self._curr is None: 1125 return None 1126 1127 if self._match_set(self.STATEMENT_PARSERS): 1128 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1129 1130 if self._match_set(Tokenizer.COMMANDS): 1131 return self._parse_command() 1132 1133 expression = self._parse_expression() 1134 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1135 return self._parse_query_modifiers(expression) 1136 1137 def _parse_drop(self) -> exp.Drop | exp.Command: 1138 start = self._prev 1139 temporary = self._match(TokenType.TEMPORARY) 1140 materialized = self._match_text_seq("MATERIALIZED") 1141 1142 kind = self._match_set(self.CREATABLES) and self._prev.text 1143 if not kind: 1144 return self._parse_as_command(start) 1145 1146 return self.expression( 1147 exp.Drop, 1148 exists=self._parse_exists(), 1149 this=self._parse_table(schema=True), 1150 kind=kind, 1151 temporary=temporary, 1152 materialized=materialized, 1153 cascade=self._match_text_seq("CASCADE"), 1154 constraints=self._match_text_seq("CONSTRAINTS"), 1155 purge=self._match_text_seq("PURGE"), 1156 ) 1157 1158 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1159 return ( 1160 self._match(TokenType.IF) 1161 and (not not_ or self._match(TokenType.NOT)) 1162 and self._match(TokenType.EXISTS) 1163 ) 1164 1165 def _parse_create(self) -> exp.Create | exp.Command: 1166 # Note: this can't be None because we've matched a statement parser 1167 start = self._prev 1168 replace = start.text.upper() == "REPLACE" or self._match_pair( 1169 TokenType.OR, TokenType.REPLACE 1170 ) 1171 unique = self._match(TokenType.UNIQUE) 1172 1173 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1174 self._advance() 1175 1176 properties = None 1177 create_token = self._match_set(self.CREATABLES) and self._prev 1178 1179 if not create_token: 1180 # exp.Properties.Location.POST_CREATE 1181 properties = self._parse_properties() 1182 create_token = self._match_set(self.CREATABLES) and self._prev 1183 1184 if not properties or not create_token: 1185 return self._parse_as_command(start) 1186 1187 exists = self._parse_exists(not_=True) 1188 this = None 1189 expression = None 1190 indexes = None 1191 no_schema_binding = None 1192 begin = None 1193 clone = None 1194 1195 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1196 nonlocal properties 1197 if properties and temp_props: 1198 properties.expressions.extend(temp_props.expressions) 1199 elif temp_props: 1200 properties = temp_props 1201 1202 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1203 this = self._parse_user_defined_function(kind=create_token.token_type) 1204 1205 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1206 extend_props(self._parse_properties()) 1207 1208 self._match(TokenType.ALIAS) 1209 begin = self._match(TokenType.BEGIN) 1210 return_ = self._match_text_seq("RETURN") 1211 expression = self._parse_statement() 1212 1213 if return_: 1214 expression = self.expression(exp.Return, this=expression) 1215 elif create_token.token_type == TokenType.INDEX: 1216 this = self._parse_index(index=self._parse_id_var()) 1217 elif create_token.token_type in self.DB_CREATABLES: 1218 table_parts = self._parse_table_parts(schema=True) 1219 1220 # exp.Properties.Location.POST_NAME 1221 self._match(TokenType.COMMA) 1222 extend_props(self._parse_properties(before=True)) 1223 1224 this = self._parse_schema(this=table_parts) 1225 1226 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1227 extend_props(self._parse_properties()) 1228 1229 self._match(TokenType.ALIAS) 1230 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1231 # exp.Properties.Location.POST_ALIAS 1232 extend_props(self._parse_properties()) 1233 1234 expression = self._parse_ddl_select() 1235 1236 if create_token.token_type == TokenType.TABLE: 1237 indexes = [] 1238 while True: 1239 index = self._parse_index() 1240 1241 # exp.Properties.Location.POST_EXPRESSION and POST_INDEX 1242 extend_props(self._parse_properties()) 1243 1244 if not index: 1245 break 1246 else: 1247 self._match(TokenType.COMMA) 1248 indexes.append(index) 1249 elif create_token.token_type == TokenType.VIEW: 1250 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1251 no_schema_binding = True 1252 1253 if self._match_text_seq("CLONE"): 1254 clone = self._parse_table(schema=True) 1255 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1256 clone_kind = ( 1257 self._match(TokenType.L_PAREN) 1258 and self._match_texts(self.CLONE_KINDS) 1259 and self._prev.text.upper() 1260 ) 1261 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1262 self._match(TokenType.R_PAREN) 1263 clone = self.expression( 1264 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1265 ) 1266 1267 return self.expression( 1268 exp.Create, 1269 this=this, 1270 kind=create_token.text, 1271 replace=replace, 1272 unique=unique, 1273 expression=expression, 1274 exists=exists, 1275 properties=properties, 1276 indexes=indexes, 1277 no_schema_binding=no_schema_binding, 1278 begin=begin, 1279 clone=clone, 1280 ) 1281 1282 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1283 # only used for teradata currently 1284 self._match(TokenType.COMMA) 1285 1286 kwargs = { 1287 "no": self._match_text_seq("NO"), 1288 "dual": self._match_text_seq("DUAL"), 1289 "before": self._match_text_seq("BEFORE"), 1290 "default": self._match_text_seq("DEFAULT"), 1291 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1292 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1293 "after": self._match_text_seq("AFTER"), 1294 "minimum": self._match_texts(("MIN", "MINIMUM")), 1295 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1296 } 1297 1298 if self._match_texts(self.PROPERTY_PARSERS): 1299 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1300 try: 1301 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1302 except TypeError: 1303 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1304 1305 return None 1306 1307 def _parse_property(self) -> t.Optional[exp.Expression]: 1308 if self._match_texts(self.PROPERTY_PARSERS): 1309 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1310 1311 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1312 return self._parse_character_set(default=True) 1313 1314 if self._match_text_seq("COMPOUND", "SORTKEY"): 1315 return self._parse_sortkey(compound=True) 1316 1317 if self._match_text_seq("SQL", "SECURITY"): 1318 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1319 1320 assignment = self._match_pair( 1321 TokenType.VAR, TokenType.EQ, advance=False 1322 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1323 1324 if assignment: 1325 key = self._parse_var_or_string() 1326 self._match(TokenType.EQ) 1327 return self.expression(exp.Property, this=key, value=self._parse_column()) 1328 1329 return None 1330 1331 def _parse_stored(self) -> exp.FileFormatProperty: 1332 self._match(TokenType.ALIAS) 1333 1334 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1335 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1336 1337 return self.expression( 1338 exp.FileFormatProperty, 1339 this=self.expression( 1340 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1341 ) 1342 if input_format or output_format 1343 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1344 ) 1345 1346 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1347 self._match(TokenType.EQ) 1348 self._match(TokenType.ALIAS) 1349 return self.expression(exp_class, this=self._parse_field()) 1350 1351 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1352 properties = [] 1353 while True: 1354 if before: 1355 prop = self._parse_property_before() 1356 else: 1357 prop = self._parse_property() 1358 1359 if not prop: 1360 break 1361 for p in ensure_list(prop): 1362 properties.append(p) 1363 1364 if properties: 1365 return self.expression(exp.Properties, expressions=properties) 1366 1367 return None 1368 1369 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1370 return self.expression( 1371 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1372 ) 1373 1374 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1375 if self._index >= 2: 1376 pre_volatile_token = self._tokens[self._index - 2] 1377 else: 1378 pre_volatile_token = None 1379 1380 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1381 return exp.VolatileProperty() 1382 1383 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1384 1385 def _parse_with_property( 1386 self, 1387 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1388 self._match(TokenType.WITH) 1389 if self._match(TokenType.L_PAREN, advance=False): 1390 return self._parse_wrapped_csv(self._parse_property) 1391 1392 if self._match_text_seq("JOURNAL"): 1393 return self._parse_withjournaltable() 1394 1395 if self._match_text_seq("DATA"): 1396 return self._parse_withdata(no=False) 1397 elif self._match_text_seq("NO", "DATA"): 1398 return self._parse_withdata(no=True) 1399 1400 if not self._next: 1401 return None 1402 1403 return self._parse_withisolatedloading() 1404 1405 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1406 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1407 self._match(TokenType.EQ) 1408 1409 user = self._parse_id_var() 1410 self._match(TokenType.PARAMETER) 1411 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1412 1413 if not user or not host: 1414 return None 1415 1416 return exp.DefinerProperty(this=f"{user}@{host}") 1417 1418 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1419 self._match(TokenType.TABLE) 1420 self._match(TokenType.EQ) 1421 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1422 1423 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1424 return self.expression(exp.LogProperty, no=no) 1425 1426 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1427 return self.expression(exp.JournalProperty, **kwargs) 1428 1429 def _parse_checksum(self) -> exp.ChecksumProperty: 1430 self._match(TokenType.EQ) 1431 1432 on = None 1433 if self._match(TokenType.ON): 1434 on = True 1435 elif self._match_text_seq("OFF"): 1436 on = False 1437 1438 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1439 1440 def _parse_cluster(self) -> exp.Cluster: 1441 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1442 1443 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1444 self._match_text_seq("BY") 1445 1446 self._match_l_paren() 1447 expressions = self._parse_csv(self._parse_column) 1448 self._match_r_paren() 1449 1450 if self._match_text_seq("SORTED", "BY"): 1451 self._match_l_paren() 1452 sorted_by = self._parse_csv(self._parse_ordered) 1453 self._match_r_paren() 1454 else: 1455 sorted_by = None 1456 1457 self._match(TokenType.INTO) 1458 buckets = self._parse_number() 1459 self._match_text_seq("BUCKETS") 1460 1461 return self.expression( 1462 exp.ClusteredByProperty, 1463 expressions=expressions, 1464 sorted_by=sorted_by, 1465 buckets=buckets, 1466 ) 1467 1468 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1469 if not self._match_text_seq("GRANTS"): 1470 self._retreat(self._index - 1) 1471 return None 1472 1473 return self.expression(exp.CopyGrantsProperty) 1474 1475 def _parse_freespace(self) -> exp.FreespaceProperty: 1476 self._match(TokenType.EQ) 1477 return self.expression( 1478 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1479 ) 1480 1481 def _parse_mergeblockratio( 1482 self, no: bool = False, default: bool = False 1483 ) -> exp.MergeBlockRatioProperty: 1484 if self._match(TokenType.EQ): 1485 return self.expression( 1486 exp.MergeBlockRatioProperty, 1487 this=self._parse_number(), 1488 percent=self._match(TokenType.PERCENT), 1489 ) 1490 1491 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1492 1493 def _parse_datablocksize( 1494 self, 1495 default: t.Optional[bool] = None, 1496 minimum: t.Optional[bool] = None, 1497 maximum: t.Optional[bool] = None, 1498 ) -> exp.DataBlocksizeProperty: 1499 self._match(TokenType.EQ) 1500 size = self._parse_number() 1501 1502 units = None 1503 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1504 units = self._prev.text 1505 1506 return self.expression( 1507 exp.DataBlocksizeProperty, 1508 size=size, 1509 units=units, 1510 default=default, 1511 minimum=minimum, 1512 maximum=maximum, 1513 ) 1514 1515 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1516 self._match(TokenType.EQ) 1517 always = self._match_text_seq("ALWAYS") 1518 manual = self._match_text_seq("MANUAL") 1519 never = self._match_text_seq("NEVER") 1520 default = self._match_text_seq("DEFAULT") 1521 1522 autotemp = None 1523 if self._match_text_seq("AUTOTEMP"): 1524 autotemp = self._parse_schema() 1525 1526 return self.expression( 1527 exp.BlockCompressionProperty, 1528 always=always, 1529 manual=manual, 1530 never=never, 1531 default=default, 1532 autotemp=autotemp, 1533 ) 1534 1535 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1536 no = self._match_text_seq("NO") 1537 concurrent = self._match_text_seq("CONCURRENT") 1538 self._match_text_seq("ISOLATED", "LOADING") 1539 for_all = self._match_text_seq("FOR", "ALL") 1540 for_insert = self._match_text_seq("FOR", "INSERT") 1541 for_none = self._match_text_seq("FOR", "NONE") 1542 return self.expression( 1543 exp.IsolatedLoadingProperty, 1544 no=no, 1545 concurrent=concurrent, 1546 for_all=for_all, 1547 for_insert=for_insert, 1548 for_none=for_none, 1549 ) 1550 1551 def _parse_locking(self) -> exp.LockingProperty: 1552 if self._match(TokenType.TABLE): 1553 kind = "TABLE" 1554 elif self._match(TokenType.VIEW): 1555 kind = "VIEW" 1556 elif self._match(TokenType.ROW): 1557 kind = "ROW" 1558 elif self._match_text_seq("DATABASE"): 1559 kind = "DATABASE" 1560 else: 1561 kind = None 1562 1563 if kind in ("DATABASE", "TABLE", "VIEW"): 1564 this = self._parse_table_parts() 1565 else: 1566 this = None 1567 1568 if self._match(TokenType.FOR): 1569 for_or_in = "FOR" 1570 elif self._match(TokenType.IN): 1571 for_or_in = "IN" 1572 else: 1573 for_or_in = None 1574 1575 if self._match_text_seq("ACCESS"): 1576 lock_type = "ACCESS" 1577 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1578 lock_type = "EXCLUSIVE" 1579 elif self._match_text_seq("SHARE"): 1580 lock_type = "SHARE" 1581 elif self._match_text_seq("READ"): 1582 lock_type = "READ" 1583 elif self._match_text_seq("WRITE"): 1584 lock_type = "WRITE" 1585 elif self._match_text_seq("CHECKSUM"): 1586 lock_type = "CHECKSUM" 1587 else: 1588 lock_type = None 1589 1590 override = self._match_text_seq("OVERRIDE") 1591 1592 return self.expression( 1593 exp.LockingProperty, 1594 this=this, 1595 kind=kind, 1596 for_or_in=for_or_in, 1597 lock_type=lock_type, 1598 override=override, 1599 ) 1600 1601 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1602 if self._match(TokenType.PARTITION_BY): 1603 return self._parse_csv(self._parse_conjunction) 1604 return [] 1605 1606 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1607 self._match(TokenType.EQ) 1608 return self.expression( 1609 exp.PartitionedByProperty, 1610 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1611 ) 1612 1613 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1614 if self._match_text_seq("AND", "STATISTICS"): 1615 statistics = True 1616 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1617 statistics = False 1618 else: 1619 statistics = None 1620 1621 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1622 1623 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1624 if self._match_text_seq("PRIMARY", "INDEX"): 1625 return exp.NoPrimaryIndexProperty() 1626 return None 1627 1628 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1629 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1630 return exp.OnCommitProperty() 1631 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1632 return exp.OnCommitProperty(delete=True) 1633 return None 1634 1635 def _parse_distkey(self) -> exp.DistKeyProperty: 1636 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1637 1638 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1639 table = self._parse_table(schema=True) 1640 1641 options = [] 1642 while self._match_texts(("INCLUDING", "EXCLUDING")): 1643 this = self._prev.text.upper() 1644 1645 id_var = self._parse_id_var() 1646 if not id_var: 1647 return None 1648 1649 options.append( 1650 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1651 ) 1652 1653 return self.expression(exp.LikeProperty, this=table, expressions=options) 1654 1655 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1656 return self.expression( 1657 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1658 ) 1659 1660 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1661 self._match(TokenType.EQ) 1662 return self.expression( 1663 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1664 ) 1665 1666 def _parse_returns(self) -> exp.ReturnsProperty: 1667 value: t.Optional[exp.Expression] 1668 is_table = self._match(TokenType.TABLE) 1669 1670 if is_table: 1671 if self._match(TokenType.LT): 1672 value = self.expression( 1673 exp.Schema, 1674 this="TABLE", 1675 expressions=self._parse_csv(self._parse_struct_types), 1676 ) 1677 if not self._match(TokenType.GT): 1678 self.raise_error("Expecting >") 1679 else: 1680 value = self._parse_schema(exp.var("TABLE")) 1681 else: 1682 value = self._parse_types() 1683 1684 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1685 1686 def _parse_describe(self) -> exp.Describe: 1687 kind = self._match_set(self.CREATABLES) and self._prev.text 1688 this = self._parse_table() 1689 return self.expression(exp.Describe, this=this, kind=kind) 1690 1691 def _parse_insert(self) -> exp.Insert: 1692 overwrite = self._match(TokenType.OVERWRITE) 1693 ignore = self._match(TokenType.IGNORE) 1694 local = self._match_text_seq("LOCAL") 1695 alternative = None 1696 1697 if self._match_text_seq("DIRECTORY"): 1698 this: t.Optional[exp.Expression] = self.expression( 1699 exp.Directory, 1700 this=self._parse_var_or_string(), 1701 local=local, 1702 row_format=self._parse_row_format(match_row=True), 1703 ) 1704 else: 1705 if self._match(TokenType.OR): 1706 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1707 1708 self._match(TokenType.INTO) 1709 self._match(TokenType.TABLE) 1710 this = self._parse_table(schema=True) 1711 1712 return self.expression( 1713 exp.Insert, 1714 this=this, 1715 exists=self._parse_exists(), 1716 partition=self._parse_partition(), 1717 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1718 and self._parse_conjunction(), 1719 expression=self._parse_ddl_select(), 1720 conflict=self._parse_on_conflict(), 1721 returning=self._parse_returning(), 1722 overwrite=overwrite, 1723 alternative=alternative, 1724 ignore=ignore, 1725 ) 1726 1727 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1728 conflict = self._match_text_seq("ON", "CONFLICT") 1729 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1730 1731 if not conflict and not duplicate: 1732 return None 1733 1734 nothing = None 1735 expressions = None 1736 key = None 1737 constraint = None 1738 1739 if conflict: 1740 if self._match_text_seq("ON", "CONSTRAINT"): 1741 constraint = self._parse_id_var() 1742 else: 1743 key = self._parse_csv(self._parse_value) 1744 1745 self._match_text_seq("DO") 1746 if self._match_text_seq("NOTHING"): 1747 nothing = True 1748 else: 1749 self._match(TokenType.UPDATE) 1750 self._match(TokenType.SET) 1751 expressions = self._parse_csv(self._parse_equality) 1752 1753 return self.expression( 1754 exp.OnConflict, 1755 duplicate=duplicate, 1756 expressions=expressions, 1757 nothing=nothing, 1758 key=key, 1759 constraint=constraint, 1760 ) 1761 1762 def _parse_returning(self) -> t.Optional[exp.Returning]: 1763 if not self._match(TokenType.RETURNING): 1764 return None 1765 1766 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1767 1768 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1769 if not self._match(TokenType.FORMAT): 1770 return None 1771 return self._parse_row_format() 1772 1773 def _parse_row_format( 1774 self, match_row: bool = False 1775 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1776 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1777 return None 1778 1779 if self._match_text_seq("SERDE"): 1780 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1781 1782 self._match_text_seq("DELIMITED") 1783 1784 kwargs = {} 1785 1786 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1787 kwargs["fields"] = self._parse_string() 1788 if self._match_text_seq("ESCAPED", "BY"): 1789 kwargs["escaped"] = self._parse_string() 1790 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1791 kwargs["collection_items"] = self._parse_string() 1792 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1793 kwargs["map_keys"] = self._parse_string() 1794 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1795 kwargs["lines"] = self._parse_string() 1796 if self._match_text_seq("NULL", "DEFINED", "AS"): 1797 kwargs["null"] = self._parse_string() 1798 1799 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1800 1801 def _parse_load(self) -> exp.LoadData | exp.Command: 1802 if self._match_text_seq("DATA"): 1803 local = self._match_text_seq("LOCAL") 1804 self._match_text_seq("INPATH") 1805 inpath = self._parse_string() 1806 overwrite = self._match(TokenType.OVERWRITE) 1807 self._match_pair(TokenType.INTO, TokenType.TABLE) 1808 1809 return self.expression( 1810 exp.LoadData, 1811 this=self._parse_table(schema=True), 1812 local=local, 1813 overwrite=overwrite, 1814 inpath=inpath, 1815 partition=self._parse_partition(), 1816 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1817 serde=self._match_text_seq("SERDE") and self._parse_string(), 1818 ) 1819 return self._parse_as_command(self._prev) 1820 1821 def _parse_delete(self) -> exp.Delete: 1822 # This handles MySQL's "Multiple-Table Syntax" 1823 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1824 tables = None 1825 if not self._match(TokenType.FROM, advance=False): 1826 tables = self._parse_csv(self._parse_table) or None 1827 1828 return self.expression( 1829 exp.Delete, 1830 tables=tables, 1831 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1832 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1833 where=self._parse_where(), 1834 returning=self._parse_returning(), 1835 limit=self._parse_limit(), 1836 ) 1837 1838 def _parse_update(self) -> exp.Update: 1839 return self.expression( 1840 exp.Update, 1841 **{ # type: ignore 1842 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1843 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1844 "from": self._parse_from(joins=True), 1845 "where": self._parse_where(), 1846 "returning": self._parse_returning(), 1847 "limit": self._parse_limit(), 1848 }, 1849 ) 1850 1851 def _parse_uncache(self) -> exp.Uncache: 1852 if not self._match(TokenType.TABLE): 1853 self.raise_error("Expecting TABLE after UNCACHE") 1854 1855 return self.expression( 1856 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1857 ) 1858 1859 def _parse_cache(self) -> exp.Cache: 1860 lazy = self._match_text_seq("LAZY") 1861 self._match(TokenType.TABLE) 1862 table = self._parse_table(schema=True) 1863 1864 options = [] 1865 if self._match_text_seq("OPTIONS"): 1866 self._match_l_paren() 1867 k = self._parse_string() 1868 self._match(TokenType.EQ) 1869 v = self._parse_string() 1870 options = [k, v] 1871 self._match_r_paren() 1872 1873 self._match(TokenType.ALIAS) 1874 return self.expression( 1875 exp.Cache, 1876 this=table, 1877 lazy=lazy, 1878 options=options, 1879 expression=self._parse_select(nested=True), 1880 ) 1881 1882 def _parse_partition(self) -> t.Optional[exp.Partition]: 1883 if not self._match(TokenType.PARTITION): 1884 return None 1885 1886 return self.expression( 1887 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1888 ) 1889 1890 def _parse_value(self) -> exp.Tuple: 1891 if self._match(TokenType.L_PAREN): 1892 expressions = self._parse_csv(self._parse_conjunction) 1893 self._match_r_paren() 1894 return self.expression(exp.Tuple, expressions=expressions) 1895 1896 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1897 # https://prestodb.io/docs/current/sql/values.html 1898 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1899 1900 def _parse_select( 1901 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1902 ) -> t.Optional[exp.Expression]: 1903 cte = self._parse_with() 1904 if cte: 1905 this = self._parse_statement() 1906 1907 if not this: 1908 self.raise_error("Failed to parse any statement following CTE") 1909 return cte 1910 1911 if "with" in this.arg_types: 1912 this.set("with", cte) 1913 else: 1914 self.raise_error(f"{this.key} does not support CTE") 1915 this = cte 1916 elif self._match(TokenType.SELECT): 1917 comments = self._prev_comments 1918 1919 hint = self._parse_hint() 1920 all_ = self._match(TokenType.ALL) 1921 distinct = self._match(TokenType.DISTINCT) 1922 1923 kind = ( 1924 self._match(TokenType.ALIAS) 1925 and self._match_texts(("STRUCT", "VALUE")) 1926 and self._prev.text 1927 ) 1928 1929 if distinct: 1930 distinct = self.expression( 1931 exp.Distinct, 1932 on=self._parse_value() if self._match(TokenType.ON) else None, 1933 ) 1934 1935 if all_ and distinct: 1936 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1937 1938 limit = self._parse_limit(top=True) 1939 expressions = self._parse_expressions() 1940 1941 this = self.expression( 1942 exp.Select, 1943 kind=kind, 1944 hint=hint, 1945 distinct=distinct, 1946 expressions=expressions, 1947 limit=limit, 1948 ) 1949 this.comments = comments 1950 1951 into = self._parse_into() 1952 if into: 1953 this.set("into", into) 1954 1955 from_ = self._parse_from() 1956 if from_: 1957 this.set("from", from_) 1958 1959 this = self._parse_query_modifiers(this) 1960 elif (table or nested) and self._match(TokenType.L_PAREN): 1961 if self._match(TokenType.PIVOT): 1962 this = self._parse_simplified_pivot() 1963 elif self._match(TokenType.FROM): 1964 this = exp.select("*").from_( 1965 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1966 ) 1967 else: 1968 this = self._parse_table() if table else self._parse_select(nested=True) 1969 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1970 1971 self._match_r_paren() 1972 1973 alias = None 1974 1975 # Ensure "wrapped" tables are not parsed as Subqueries. The exception to this is when there's 1976 # an alias that can be applied to the parentheses, because that would shadow all wrapped table 1977 # names, and so we want to parse it as a Subquery to represent the inner scope appropriately. 1978 # Additionally, we want the node under the Subquery to be an actual query, so we will replace 1979 # the table reference with a star query that selects from it. 1980 if isinstance(this, exp.Table): 1981 alias = self._parse_table_alias() 1982 if not alias: 1983 this.set("wrapped", True) 1984 return this 1985 1986 this.set("wrapped", None) 1987 joins = this.args.pop("joins", None) 1988 this = this.replace(exp.select("*").from_(this.copy(), copy=False)) 1989 this.set("joins", joins) 1990 1991 subquery = self._parse_subquery(this, parse_alias=parse_subquery_alias and not alias) 1992 if subquery and alias: 1993 subquery.set("alias", alias) 1994 1995 # We return early here so that the UNION isn't attached to the subquery by the 1996 # following call to _parse_set_operations, but instead becomes the parent node 1997 return subquery 1998 elif self._match(TokenType.VALUES): 1999 this = self.expression( 2000 exp.Values, 2001 expressions=self._parse_csv(self._parse_value), 2002 alias=self._parse_table_alias(), 2003 ) 2004 else: 2005 this = None 2006 2007 return self._parse_set_operations(this) 2008 2009 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2010 if not skip_with_token and not self._match(TokenType.WITH): 2011 return None 2012 2013 comments = self._prev_comments 2014 recursive = self._match(TokenType.RECURSIVE) 2015 2016 expressions = [] 2017 while True: 2018 expressions.append(self._parse_cte()) 2019 2020 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2021 break 2022 else: 2023 self._match(TokenType.WITH) 2024 2025 return self.expression( 2026 exp.With, comments=comments, expressions=expressions, recursive=recursive 2027 ) 2028 2029 def _parse_cte(self) -> exp.CTE: 2030 alias = self._parse_table_alias() 2031 if not alias or not alias.this: 2032 self.raise_error("Expected CTE to have alias") 2033 2034 self._match(TokenType.ALIAS) 2035 return self.expression( 2036 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2037 ) 2038 2039 def _parse_table_alias( 2040 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2041 ) -> t.Optional[exp.TableAlias]: 2042 any_token = self._match(TokenType.ALIAS) 2043 alias = ( 2044 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2045 or self._parse_string_as_identifier() 2046 ) 2047 2048 index = self._index 2049 if self._match(TokenType.L_PAREN): 2050 columns = self._parse_csv(self._parse_function_parameter) 2051 self._match_r_paren() if columns else self._retreat(index) 2052 else: 2053 columns = None 2054 2055 if not alias and not columns: 2056 return None 2057 2058 return self.expression(exp.TableAlias, this=alias, columns=columns) 2059 2060 def _parse_subquery( 2061 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2062 ) -> t.Optional[exp.Subquery]: 2063 if not this: 2064 return None 2065 2066 return self.expression( 2067 exp.Subquery, 2068 this=this, 2069 pivots=self._parse_pivots(), 2070 alias=self._parse_table_alias() if parse_alias else None, 2071 ) 2072 2073 def _parse_query_modifiers( 2074 self, this: t.Optional[exp.Expression] 2075 ) -> t.Optional[exp.Expression]: 2076 if isinstance(this, self.MODIFIABLES): 2077 for join in iter(self._parse_join, None): 2078 this.append("joins", join) 2079 for lateral in iter(self._parse_lateral, None): 2080 this.append("laterals", lateral) 2081 2082 while True: 2083 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2084 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2085 key, expression = parser(self) 2086 2087 if expression: 2088 this.set(key, expression) 2089 if key == "limit": 2090 offset = expression.args.pop("offset", None) 2091 if offset: 2092 this.set("offset", exp.Offset(expression=offset)) 2093 continue 2094 break 2095 return this 2096 2097 def _parse_hint(self) -> t.Optional[exp.Hint]: 2098 if self._match(TokenType.HINT): 2099 hints = [] 2100 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2101 hints.extend(hint) 2102 2103 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2104 self.raise_error("Expected */ after HINT") 2105 2106 return self.expression(exp.Hint, expressions=hints) 2107 2108 return None 2109 2110 def _parse_into(self) -> t.Optional[exp.Into]: 2111 if not self._match(TokenType.INTO): 2112 return None 2113 2114 temp = self._match(TokenType.TEMPORARY) 2115 unlogged = self._match_text_seq("UNLOGGED") 2116 self._match(TokenType.TABLE) 2117 2118 return self.expression( 2119 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2120 ) 2121 2122 def _parse_from( 2123 self, joins: bool = False, skip_from_token: bool = False 2124 ) -> t.Optional[exp.From]: 2125 if not skip_from_token and not self._match(TokenType.FROM): 2126 return None 2127 2128 return self.expression( 2129 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2130 ) 2131 2132 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2133 if not self._match(TokenType.MATCH_RECOGNIZE): 2134 return None 2135 2136 self._match_l_paren() 2137 2138 partition = self._parse_partition_by() 2139 order = self._parse_order() 2140 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2141 2142 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2143 rows = exp.var("ONE ROW PER MATCH") 2144 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2145 text = "ALL ROWS PER MATCH" 2146 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2147 text += f" SHOW EMPTY MATCHES" 2148 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2149 text += f" OMIT EMPTY MATCHES" 2150 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2151 text += f" WITH UNMATCHED ROWS" 2152 rows = exp.var(text) 2153 else: 2154 rows = None 2155 2156 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2157 text = "AFTER MATCH SKIP" 2158 if self._match_text_seq("PAST", "LAST", "ROW"): 2159 text += f" PAST LAST ROW" 2160 elif self._match_text_seq("TO", "NEXT", "ROW"): 2161 text += f" TO NEXT ROW" 2162 elif self._match_text_seq("TO", "FIRST"): 2163 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2164 elif self._match_text_seq("TO", "LAST"): 2165 text += f" TO LAST {self._advance_any().text}" # type: ignore 2166 after = exp.var(text) 2167 else: 2168 after = None 2169 2170 if self._match_text_seq("PATTERN"): 2171 self._match_l_paren() 2172 2173 if not self._curr: 2174 self.raise_error("Expecting )", self._curr) 2175 2176 paren = 1 2177 start = self._curr 2178 2179 while self._curr and paren > 0: 2180 if self._curr.token_type == TokenType.L_PAREN: 2181 paren += 1 2182 if self._curr.token_type == TokenType.R_PAREN: 2183 paren -= 1 2184 2185 end = self._prev 2186 self._advance() 2187 2188 if paren > 0: 2189 self.raise_error("Expecting )", self._curr) 2190 2191 pattern = exp.var(self._find_sql(start, end)) 2192 else: 2193 pattern = None 2194 2195 define = ( 2196 self._parse_csv( 2197 lambda: self.expression( 2198 exp.Alias, 2199 alias=self._parse_id_var(any_token=True), 2200 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2201 ) 2202 ) 2203 if self._match_text_seq("DEFINE") 2204 else None 2205 ) 2206 2207 self._match_r_paren() 2208 2209 return self.expression( 2210 exp.MatchRecognize, 2211 partition_by=partition, 2212 order=order, 2213 measures=measures, 2214 rows=rows, 2215 after=after, 2216 pattern=pattern, 2217 define=define, 2218 alias=self._parse_table_alias(), 2219 ) 2220 2221 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2222 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2223 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2224 2225 if outer_apply or cross_apply: 2226 this = self._parse_select(table=True) 2227 view = None 2228 outer = not cross_apply 2229 elif self._match(TokenType.LATERAL): 2230 this = self._parse_select(table=True) 2231 view = self._match(TokenType.VIEW) 2232 outer = self._match(TokenType.OUTER) 2233 else: 2234 return None 2235 2236 if not this: 2237 this = self._parse_function() or self._parse_id_var(any_token=False) 2238 while self._match(TokenType.DOT): 2239 this = exp.Dot( 2240 this=this, 2241 expression=self._parse_function() or self._parse_id_var(any_token=False), 2242 ) 2243 2244 if view: 2245 table = self._parse_id_var(any_token=False) 2246 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2247 table_alias: t.Optional[exp.TableAlias] = self.expression( 2248 exp.TableAlias, this=table, columns=columns 2249 ) 2250 elif isinstance(this, exp.Subquery) and this.alias: 2251 # Ensures parity between the Subquery's and the Lateral's "alias" args 2252 table_alias = this.args["alias"].copy() 2253 else: 2254 table_alias = self._parse_table_alias() 2255 2256 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2257 2258 def _parse_join_parts( 2259 self, 2260 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2261 return ( 2262 self._match_set(self.JOIN_METHODS) and self._prev, 2263 self._match_set(self.JOIN_SIDES) and self._prev, 2264 self._match_set(self.JOIN_KINDS) and self._prev, 2265 ) 2266 2267 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Join]: 2268 if self._match(TokenType.COMMA): 2269 return self.expression(exp.Join, this=self._parse_table()) 2270 2271 index = self._index 2272 method, side, kind = self._parse_join_parts() 2273 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2274 join = self._match(TokenType.JOIN) 2275 2276 if not skip_join_token and not join: 2277 self._retreat(index) 2278 kind = None 2279 method = None 2280 side = None 2281 2282 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2283 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2284 2285 if not skip_join_token and not join and not outer_apply and not cross_apply: 2286 return None 2287 2288 if outer_apply: 2289 side = Token(TokenType.LEFT, "LEFT") 2290 2291 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table()} 2292 2293 if method: 2294 kwargs["method"] = method.text 2295 if side: 2296 kwargs["side"] = side.text 2297 if kind: 2298 kwargs["kind"] = kind.text 2299 if hint: 2300 kwargs["hint"] = hint 2301 2302 if self._match(TokenType.ON): 2303 kwargs["on"] = self._parse_conjunction() 2304 elif self._match(TokenType.USING): 2305 kwargs["using"] = self._parse_wrapped_id_vars() 2306 elif not (kind and kind.token_type == TokenType.CROSS): 2307 index = self._index 2308 joins = self._parse_joins() 2309 2310 if joins and self._match(TokenType.ON): 2311 kwargs["on"] = self._parse_conjunction() 2312 elif joins and self._match(TokenType.USING): 2313 kwargs["using"] = self._parse_wrapped_id_vars() 2314 else: 2315 joins = None 2316 self._retreat(index) 2317 2318 kwargs["this"].set("joins", joins) 2319 2320 return self.expression(exp.Join, **kwargs) 2321 2322 def _parse_index( 2323 self, 2324 index: t.Optional[exp.Expression] = None, 2325 ) -> t.Optional[exp.Index]: 2326 if index: 2327 unique = None 2328 primary = None 2329 amp = None 2330 2331 self._match(TokenType.ON) 2332 self._match(TokenType.TABLE) # hive 2333 table = self._parse_table_parts(schema=True) 2334 else: 2335 unique = self._match(TokenType.UNIQUE) 2336 primary = self._match_text_seq("PRIMARY") 2337 amp = self._match_text_seq("AMP") 2338 2339 if not self._match(TokenType.INDEX): 2340 return None 2341 2342 index = self._parse_id_var() 2343 table = None 2344 2345 using = self._parse_field() if self._match(TokenType.USING) else None 2346 2347 if self._match(TokenType.L_PAREN, advance=False): 2348 columns = self._parse_wrapped_csv(self._parse_ordered) 2349 else: 2350 columns = None 2351 2352 return self.expression( 2353 exp.Index, 2354 this=index, 2355 table=table, 2356 using=using, 2357 columns=columns, 2358 unique=unique, 2359 primary=primary, 2360 amp=amp, 2361 partition_by=self._parse_partition_by(), 2362 ) 2363 2364 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2365 hints: t.List[exp.Expression] = [] 2366 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2367 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2368 hints.append( 2369 self.expression( 2370 exp.WithTableHint, 2371 expressions=self._parse_csv( 2372 lambda: self._parse_function() or self._parse_var(any_token=True) 2373 ), 2374 ) 2375 ) 2376 self._match_r_paren() 2377 else: 2378 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2379 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2380 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2381 2382 self._match_texts({"INDEX", "KEY"}) 2383 if self._match(TokenType.FOR): 2384 hint.set("target", self._advance_any() and self._prev.text.upper()) 2385 2386 hint.set("expressions", self._parse_wrapped_id_vars()) 2387 hints.append(hint) 2388 2389 return hints or None 2390 2391 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2392 return ( 2393 (not schema and self._parse_function(optional_parens=False)) 2394 or self._parse_id_var(any_token=False) 2395 or self._parse_string_as_identifier() 2396 or self._parse_placeholder() 2397 ) 2398 2399 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2400 catalog = None 2401 db = None 2402 table = self._parse_table_part(schema=schema) 2403 2404 while self._match(TokenType.DOT): 2405 if catalog: 2406 # This allows nesting the table in arbitrarily many dot expressions if needed 2407 table = self.expression( 2408 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2409 ) 2410 else: 2411 catalog = db 2412 db = table 2413 table = self._parse_table_part(schema=schema) 2414 2415 if not table: 2416 self.raise_error(f"Expected table name but got {self._curr}") 2417 2418 return self.expression( 2419 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2420 ) 2421 2422 def _parse_table( 2423 self, 2424 schema: bool = False, 2425 joins: bool = False, 2426 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2427 ) -> t.Optional[exp.Expression]: 2428 lateral = self._parse_lateral() 2429 if lateral: 2430 return lateral 2431 2432 unnest = self._parse_unnest() 2433 if unnest: 2434 return unnest 2435 2436 values = self._parse_derived_table_values() 2437 if values: 2438 return values 2439 2440 subquery = self._parse_select(table=True) 2441 if subquery: 2442 if not subquery.args.get("pivots"): 2443 subquery.set("pivots", self._parse_pivots()) 2444 return subquery 2445 2446 this: exp.Expression = self._parse_table_parts(schema=schema) 2447 2448 if schema: 2449 return self._parse_schema(this=this) 2450 2451 if self.ALIAS_POST_TABLESAMPLE: 2452 table_sample = self._parse_table_sample() 2453 2454 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2455 if alias: 2456 this.set("alias", alias) 2457 2458 if not this.args.get("pivots"): 2459 this.set("pivots", self._parse_pivots()) 2460 2461 this.set("hints", self._parse_table_hints()) 2462 2463 if not self.ALIAS_POST_TABLESAMPLE: 2464 table_sample = self._parse_table_sample() 2465 2466 if table_sample: 2467 table_sample.set("this", this) 2468 this = table_sample 2469 2470 if joins: 2471 for join in iter(self._parse_join, None): 2472 this.append("joins", join) 2473 2474 return this 2475 2476 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2477 if not self._match(TokenType.UNNEST): 2478 return None 2479 2480 expressions = self._parse_wrapped_csv(self._parse_type) 2481 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2482 2483 alias = self._parse_table_alias() if with_alias else None 2484 2485 if alias and self.UNNEST_COLUMN_ONLY: 2486 if alias.args.get("columns"): 2487 self.raise_error("Unexpected extra column alias in unnest.") 2488 2489 alias.set("columns", [alias.this]) 2490 alias.set("this", None) 2491 2492 offset = None 2493 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2494 self._match(TokenType.ALIAS) 2495 offset = self._parse_id_var() or exp.to_identifier("offset") 2496 2497 return self.expression( 2498 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2499 ) 2500 2501 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2502 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2503 if not is_derived and not self._match(TokenType.VALUES): 2504 return None 2505 2506 expressions = self._parse_csv(self._parse_value) 2507 alias = self._parse_table_alias() 2508 2509 if is_derived: 2510 self._match_r_paren() 2511 2512 return self.expression( 2513 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2514 ) 2515 2516 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2517 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2518 as_modifier and self._match_text_seq("USING", "SAMPLE") 2519 ): 2520 return None 2521 2522 bucket_numerator = None 2523 bucket_denominator = None 2524 bucket_field = None 2525 percent = None 2526 rows = None 2527 size = None 2528 seed = None 2529 2530 kind = ( 2531 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2532 ) 2533 method = self._parse_var(tokens=(TokenType.ROW,)) 2534 2535 self._match(TokenType.L_PAREN) 2536 2537 num = self._parse_number() 2538 2539 if self._match_text_seq("BUCKET"): 2540 bucket_numerator = self._parse_number() 2541 self._match_text_seq("OUT", "OF") 2542 bucket_denominator = bucket_denominator = self._parse_number() 2543 self._match(TokenType.ON) 2544 bucket_field = self._parse_field() 2545 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2546 percent = num 2547 elif self._match(TokenType.ROWS): 2548 rows = num 2549 else: 2550 size = num 2551 2552 self._match(TokenType.R_PAREN) 2553 2554 if self._match(TokenType.L_PAREN): 2555 method = self._parse_var() 2556 seed = self._match(TokenType.COMMA) and self._parse_number() 2557 self._match_r_paren() 2558 elif self._match_texts(("SEED", "REPEATABLE")): 2559 seed = self._parse_wrapped(self._parse_number) 2560 2561 return self.expression( 2562 exp.TableSample, 2563 method=method, 2564 bucket_numerator=bucket_numerator, 2565 bucket_denominator=bucket_denominator, 2566 bucket_field=bucket_field, 2567 percent=percent, 2568 rows=rows, 2569 size=size, 2570 seed=seed, 2571 kind=kind, 2572 ) 2573 2574 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2575 return list(iter(self._parse_pivot, None)) or None 2576 2577 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2578 return list(iter(self._parse_join, None)) or None 2579 2580 # https://duckdb.org/docs/sql/statements/pivot 2581 def _parse_simplified_pivot(self) -> exp.Pivot: 2582 def _parse_on() -> t.Optional[exp.Expression]: 2583 this = self._parse_bitwise() 2584 return self._parse_in(this) if self._match(TokenType.IN) else this 2585 2586 this = self._parse_table() 2587 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2588 using = self._match(TokenType.USING) and self._parse_csv( 2589 lambda: self._parse_alias(self._parse_function()) 2590 ) 2591 group = self._parse_group() 2592 return self.expression( 2593 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2594 ) 2595 2596 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2597 index = self._index 2598 2599 if self._match(TokenType.PIVOT): 2600 unpivot = False 2601 elif self._match(TokenType.UNPIVOT): 2602 unpivot = True 2603 else: 2604 return None 2605 2606 expressions = [] 2607 field = None 2608 2609 if not self._match(TokenType.L_PAREN): 2610 self._retreat(index) 2611 return None 2612 2613 if unpivot: 2614 expressions = self._parse_csv(self._parse_column) 2615 else: 2616 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2617 2618 if not expressions: 2619 self.raise_error("Failed to parse PIVOT's aggregation list") 2620 2621 if not self._match(TokenType.FOR): 2622 self.raise_error("Expecting FOR") 2623 2624 value = self._parse_column() 2625 2626 if not self._match(TokenType.IN): 2627 self.raise_error("Expecting IN") 2628 2629 field = self._parse_in(value, alias=True) 2630 2631 self._match_r_paren() 2632 2633 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2634 2635 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2636 pivot.set("alias", self._parse_table_alias()) 2637 2638 if not unpivot: 2639 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2640 2641 columns: t.List[exp.Expression] = [] 2642 for fld in pivot.args["field"].expressions: 2643 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2644 for name in names: 2645 if self.PREFIXED_PIVOT_COLUMNS: 2646 name = f"{name}_{field_name}" if name else field_name 2647 else: 2648 name = f"{field_name}_{name}" if name else field_name 2649 2650 columns.append(exp.to_identifier(name)) 2651 2652 pivot.set("columns", columns) 2653 2654 return pivot 2655 2656 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2657 return [agg.alias for agg in aggregations] 2658 2659 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2660 if not skip_where_token and not self._match(TokenType.WHERE): 2661 return None 2662 2663 return self.expression( 2664 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2665 ) 2666 2667 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2668 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2669 return None 2670 2671 elements = defaultdict(list) 2672 2673 if self._match(TokenType.ALL): 2674 return self.expression(exp.Group, all=True) 2675 2676 while True: 2677 expressions = self._parse_csv(self._parse_conjunction) 2678 if expressions: 2679 elements["expressions"].extend(expressions) 2680 2681 grouping_sets = self._parse_grouping_sets() 2682 if grouping_sets: 2683 elements["grouping_sets"].extend(grouping_sets) 2684 2685 rollup = None 2686 cube = None 2687 totals = None 2688 2689 with_ = self._match(TokenType.WITH) 2690 if self._match(TokenType.ROLLUP): 2691 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2692 elements["rollup"].extend(ensure_list(rollup)) 2693 2694 if self._match(TokenType.CUBE): 2695 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2696 elements["cube"].extend(ensure_list(cube)) 2697 2698 if self._match_text_seq("TOTALS"): 2699 totals = True 2700 elements["totals"] = True # type: ignore 2701 2702 if not (grouping_sets or rollup or cube or totals): 2703 break 2704 2705 return self.expression(exp.Group, **elements) # type: ignore 2706 2707 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2708 if not self._match(TokenType.GROUPING_SETS): 2709 return None 2710 2711 return self._parse_wrapped_csv(self._parse_grouping_set) 2712 2713 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2714 if self._match(TokenType.L_PAREN): 2715 grouping_set = self._parse_csv(self._parse_column) 2716 self._match_r_paren() 2717 return self.expression(exp.Tuple, expressions=grouping_set) 2718 2719 return self._parse_column() 2720 2721 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2722 if not skip_having_token and not self._match(TokenType.HAVING): 2723 return None 2724 return self.expression(exp.Having, this=self._parse_conjunction()) 2725 2726 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2727 if not self._match(TokenType.QUALIFY): 2728 return None 2729 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2730 2731 def _parse_order( 2732 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2733 ) -> t.Optional[exp.Expression]: 2734 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2735 return this 2736 2737 return self.expression( 2738 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2739 ) 2740 2741 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2742 if not self._match(token): 2743 return None 2744 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2745 2746 def _parse_ordered(self) -> exp.Ordered: 2747 this = self._parse_conjunction() 2748 self._match(TokenType.ASC) 2749 2750 is_desc = self._match(TokenType.DESC) 2751 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2752 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2753 desc = is_desc or False 2754 asc = not desc 2755 nulls_first = is_nulls_first or False 2756 explicitly_null_ordered = is_nulls_first or is_nulls_last 2757 2758 if ( 2759 not explicitly_null_ordered 2760 and ( 2761 (asc and self.NULL_ORDERING == "nulls_are_small") 2762 or (desc and self.NULL_ORDERING != "nulls_are_small") 2763 ) 2764 and self.NULL_ORDERING != "nulls_are_last" 2765 ): 2766 nulls_first = True 2767 2768 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2769 2770 def _parse_limit( 2771 self, this: t.Optional[exp.Expression] = None, top: bool = False 2772 ) -> t.Optional[exp.Expression]: 2773 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2774 limit_paren = self._match(TokenType.L_PAREN) 2775 expression = self._parse_number() if top else self._parse_term() 2776 2777 if self._match(TokenType.COMMA): 2778 offset = expression 2779 expression = self._parse_term() 2780 else: 2781 offset = None 2782 2783 limit_exp = self.expression(exp.Limit, this=this, expression=expression, offset=offset) 2784 2785 if limit_paren: 2786 self._match_r_paren() 2787 2788 return limit_exp 2789 2790 if self._match(TokenType.FETCH): 2791 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2792 direction = self._prev.text if direction else "FIRST" 2793 2794 count = self._parse_number() 2795 percent = self._match(TokenType.PERCENT) 2796 2797 self._match_set((TokenType.ROW, TokenType.ROWS)) 2798 2799 only = self._match_text_seq("ONLY") 2800 with_ties = self._match_text_seq("WITH", "TIES") 2801 2802 if only and with_ties: 2803 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2804 2805 return self.expression( 2806 exp.Fetch, 2807 direction=direction, 2808 count=count, 2809 percent=percent, 2810 with_ties=with_ties, 2811 ) 2812 2813 return this 2814 2815 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2816 if not self._match(TokenType.OFFSET): 2817 return this 2818 2819 count = self._parse_number() 2820 self._match_set((TokenType.ROW, TokenType.ROWS)) 2821 return self.expression(exp.Offset, this=this, expression=count) 2822 2823 def _parse_locks(self) -> t.List[exp.Lock]: 2824 locks = [] 2825 while True: 2826 if self._match_text_seq("FOR", "UPDATE"): 2827 update = True 2828 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2829 "LOCK", "IN", "SHARE", "MODE" 2830 ): 2831 update = False 2832 else: 2833 break 2834 2835 expressions = None 2836 if self._match_text_seq("OF"): 2837 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2838 2839 wait: t.Optional[bool | exp.Expression] = None 2840 if self._match_text_seq("NOWAIT"): 2841 wait = True 2842 elif self._match_text_seq("WAIT"): 2843 wait = self._parse_primary() 2844 elif self._match_text_seq("SKIP", "LOCKED"): 2845 wait = False 2846 2847 locks.append( 2848 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2849 ) 2850 2851 return locks 2852 2853 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2854 if not self._match_set(self.SET_OPERATIONS): 2855 return this 2856 2857 token_type = self._prev.token_type 2858 2859 if token_type == TokenType.UNION: 2860 expression = exp.Union 2861 elif token_type == TokenType.EXCEPT: 2862 expression = exp.Except 2863 else: 2864 expression = exp.Intersect 2865 2866 return self.expression( 2867 expression, 2868 this=this, 2869 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2870 expression=self._parse_set_operations(self._parse_select(nested=True)), 2871 ) 2872 2873 def _parse_expression(self) -> t.Optional[exp.Expression]: 2874 return self._parse_alias(self._parse_conjunction()) 2875 2876 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2877 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2878 2879 def _parse_equality(self) -> t.Optional[exp.Expression]: 2880 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2881 2882 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2883 return self._parse_tokens(self._parse_range, self.COMPARISON) 2884 2885 def _parse_range(self) -> t.Optional[exp.Expression]: 2886 this = self._parse_bitwise() 2887 negate = self._match(TokenType.NOT) 2888 2889 if self._match_set(self.RANGE_PARSERS): 2890 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2891 if not expression: 2892 return this 2893 2894 this = expression 2895 elif self._match(TokenType.ISNULL): 2896 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2897 2898 # Postgres supports ISNULL and NOTNULL for conditions. 2899 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2900 if self._match(TokenType.NOTNULL): 2901 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2902 this = self.expression(exp.Not, this=this) 2903 2904 if negate: 2905 this = self.expression(exp.Not, this=this) 2906 2907 if self._match(TokenType.IS): 2908 this = self._parse_is(this) 2909 2910 return this 2911 2912 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2913 index = self._index - 1 2914 negate = self._match(TokenType.NOT) 2915 2916 if self._match_text_seq("DISTINCT", "FROM"): 2917 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2918 return self.expression(klass, this=this, expression=self._parse_expression()) 2919 2920 expression = self._parse_null() or self._parse_boolean() 2921 if not expression: 2922 self._retreat(index) 2923 return None 2924 2925 this = self.expression(exp.Is, this=this, expression=expression) 2926 return self.expression(exp.Not, this=this) if negate else this 2927 2928 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2929 unnest = self._parse_unnest(with_alias=False) 2930 if unnest: 2931 this = self.expression(exp.In, this=this, unnest=unnest) 2932 elif self._match(TokenType.L_PAREN): 2933 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2934 2935 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2936 this = self.expression(exp.In, this=this, query=expressions[0]) 2937 else: 2938 this = self.expression(exp.In, this=this, expressions=expressions) 2939 2940 self._match_r_paren(this) 2941 else: 2942 this = self.expression(exp.In, this=this, field=self._parse_field()) 2943 2944 return this 2945 2946 def _parse_between(self, this: exp.Expression) -> exp.Between: 2947 low = self._parse_bitwise() 2948 self._match(TokenType.AND) 2949 high = self._parse_bitwise() 2950 return self.expression(exp.Between, this=this, low=low, high=high) 2951 2952 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2953 if not self._match(TokenType.ESCAPE): 2954 return this 2955 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2956 2957 def _parse_interval(self) -> t.Optional[exp.Interval]: 2958 if not self._match(TokenType.INTERVAL): 2959 return None 2960 2961 if self._match(TokenType.STRING, advance=False): 2962 this = self._parse_primary() 2963 else: 2964 this = self._parse_term() 2965 2966 unit = self._parse_function() or self._parse_var() 2967 2968 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2969 # each INTERVAL expression into this canonical form so it's easy to transpile 2970 if this and this.is_number: 2971 this = exp.Literal.string(this.name) 2972 elif this and this.is_string: 2973 parts = this.name.split() 2974 2975 if len(parts) == 2: 2976 if unit: 2977 # this is not actually a unit, it's something else 2978 unit = None 2979 self._retreat(self._index - 1) 2980 else: 2981 this = exp.Literal.string(parts[0]) 2982 unit = self.expression(exp.Var, this=parts[1]) 2983 2984 return self.expression(exp.Interval, this=this, unit=unit) 2985 2986 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2987 this = self._parse_term() 2988 2989 while True: 2990 if self._match_set(self.BITWISE): 2991 this = self.expression( 2992 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 2993 ) 2994 elif self._match_pair(TokenType.LT, TokenType.LT): 2995 this = self.expression( 2996 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2997 ) 2998 elif self._match_pair(TokenType.GT, TokenType.GT): 2999 this = self.expression( 3000 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3001 ) 3002 else: 3003 break 3004 3005 return this 3006 3007 def _parse_term(self) -> t.Optional[exp.Expression]: 3008 return self._parse_tokens(self._parse_factor, self.TERM) 3009 3010 def _parse_factor(self) -> t.Optional[exp.Expression]: 3011 return self._parse_tokens(self._parse_unary, self.FACTOR) 3012 3013 def _parse_unary(self) -> t.Optional[exp.Expression]: 3014 if self._match_set(self.UNARY_PARSERS): 3015 return self.UNARY_PARSERS[self._prev.token_type](self) 3016 return self._parse_at_time_zone(self._parse_type()) 3017 3018 def _parse_type(self) -> t.Optional[exp.Expression]: 3019 interval = self._parse_interval() 3020 if interval: 3021 return interval 3022 3023 index = self._index 3024 data_type = self._parse_types(check_func=True) 3025 this = self._parse_column() 3026 3027 if data_type: 3028 if isinstance(this, exp.Literal): 3029 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3030 if parser: 3031 return parser(self, this, data_type) 3032 return self.expression(exp.Cast, this=this, to=data_type) 3033 if not data_type.expressions: 3034 self._retreat(index) 3035 return self._parse_column() 3036 return self._parse_column_ops(data_type) 3037 3038 return this 3039 3040 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 3041 this = self._parse_type() 3042 if not this: 3043 return None 3044 3045 return self.expression( 3046 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 3047 ) 3048 3049 def _parse_types( 3050 self, check_func: bool = False, schema: bool = False 3051 ) -> t.Optional[exp.Expression]: 3052 index = self._index 3053 3054 prefix = self._match_text_seq("SYSUDTLIB", ".") 3055 3056 if not self._match_set(self.TYPE_TOKENS): 3057 return None 3058 3059 type_token = self._prev.token_type 3060 3061 if type_token == TokenType.PSEUDO_TYPE: 3062 return self.expression(exp.PseudoType, this=self._prev.text) 3063 3064 nested = type_token in self.NESTED_TYPE_TOKENS 3065 is_struct = type_token == TokenType.STRUCT 3066 expressions = None 3067 maybe_func = False 3068 3069 if self._match(TokenType.L_PAREN): 3070 if is_struct: 3071 expressions = self._parse_csv(self._parse_struct_types) 3072 elif nested: 3073 expressions = self._parse_csv( 3074 lambda: self._parse_types(check_func=check_func, schema=schema) 3075 ) 3076 elif type_token in self.ENUM_TYPE_TOKENS: 3077 expressions = self._parse_csv(self._parse_primary) 3078 else: 3079 expressions = self._parse_csv(self._parse_type_size) 3080 3081 if not expressions or not self._match(TokenType.R_PAREN): 3082 self._retreat(index) 3083 return None 3084 3085 maybe_func = True 3086 3087 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3088 this = exp.DataType( 3089 this=exp.DataType.Type.ARRAY, 3090 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 3091 nested=True, 3092 ) 3093 3094 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3095 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3096 3097 return this 3098 3099 if self._match(TokenType.L_BRACKET): 3100 self._retreat(index) 3101 return None 3102 3103 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 3104 if nested and self._match(TokenType.LT): 3105 if is_struct: 3106 expressions = self._parse_csv(self._parse_struct_types) 3107 else: 3108 expressions = self._parse_csv( 3109 lambda: self._parse_types(check_func=check_func, schema=schema) 3110 ) 3111 3112 if not self._match(TokenType.GT): 3113 self.raise_error("Expecting >") 3114 3115 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3116 values = self._parse_csv(self._parse_conjunction) 3117 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3118 3119 value: t.Optional[exp.Expression] = None 3120 if type_token in self.TIMESTAMPS: 3121 if self._match_text_seq("WITH", "TIME", "ZONE"): 3122 maybe_func = False 3123 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 3124 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3125 maybe_func = False 3126 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3127 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3128 maybe_func = False 3129 elif type_token == TokenType.INTERVAL: 3130 unit = self._parse_var() 3131 3132 if not unit: 3133 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3134 else: 3135 value = self.expression(exp.Interval, unit=unit) 3136 3137 if maybe_func and check_func: 3138 index2 = self._index 3139 peek = self._parse_string() 3140 3141 if not peek: 3142 self._retreat(index) 3143 return None 3144 3145 self._retreat(index2) 3146 3147 if value: 3148 return value 3149 3150 return exp.DataType( 3151 this=exp.DataType.Type[type_token.value.upper()], 3152 expressions=expressions, 3153 nested=nested, 3154 values=values, 3155 prefix=prefix, 3156 ) 3157 3158 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3159 this = self._parse_type() or self._parse_id_var() 3160 self._match(TokenType.COLON) 3161 return self._parse_column_def(this) 3162 3163 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3164 if not self._match_text_seq("AT", "TIME", "ZONE"): 3165 return this 3166 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3167 3168 def _parse_column(self) -> t.Optional[exp.Expression]: 3169 this = self._parse_field() 3170 if isinstance(this, exp.Identifier): 3171 this = self.expression(exp.Column, this=this) 3172 elif not this: 3173 return self._parse_bracket(this) 3174 return self._parse_column_ops(this) 3175 3176 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3177 this = self._parse_bracket(this) 3178 3179 while self._match_set(self.COLUMN_OPERATORS): 3180 op_token = self._prev.token_type 3181 op = self.COLUMN_OPERATORS.get(op_token) 3182 3183 if op_token == TokenType.DCOLON: 3184 field = self._parse_types() 3185 if not field: 3186 self.raise_error("Expected type") 3187 elif op and self._curr: 3188 self._advance() 3189 value = self._prev.text 3190 field = ( 3191 exp.Literal.number(value) 3192 if self._prev.token_type == TokenType.NUMBER 3193 else exp.Literal.string(value) 3194 ) 3195 else: 3196 field = self._parse_field(anonymous_func=True, any_token=True) 3197 3198 if isinstance(field, exp.Func): 3199 # bigquery allows function calls like x.y.count(...) 3200 # SAFE.SUBSTR(...) 3201 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3202 this = self._replace_columns_with_dots(this) 3203 3204 if op: 3205 this = op(self, this, field) 3206 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3207 this = self.expression( 3208 exp.Column, 3209 this=field, 3210 table=this.this, 3211 db=this.args.get("table"), 3212 catalog=this.args.get("db"), 3213 ) 3214 else: 3215 this = self.expression(exp.Dot, this=this, expression=field) 3216 this = self._parse_bracket(this) 3217 return this 3218 3219 def _parse_primary(self) -> t.Optional[exp.Expression]: 3220 if self._match_set(self.PRIMARY_PARSERS): 3221 token_type = self._prev.token_type 3222 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3223 3224 if token_type == TokenType.STRING: 3225 expressions = [primary] 3226 while self._match(TokenType.STRING): 3227 expressions.append(exp.Literal.string(self._prev.text)) 3228 3229 if len(expressions) > 1: 3230 return self.expression(exp.Concat, expressions=expressions) 3231 3232 return primary 3233 3234 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3235 return exp.Literal.number(f"0.{self._prev.text}") 3236 3237 if self._match(TokenType.L_PAREN): 3238 comments = self._prev_comments 3239 query = self._parse_select() 3240 3241 if query: 3242 expressions = [query] 3243 else: 3244 expressions = self._parse_expressions() 3245 3246 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3247 3248 if isinstance(this, exp.Subqueryable): 3249 this = self._parse_set_operations( 3250 self._parse_subquery(this=this, parse_alias=False) 3251 ) 3252 elif len(expressions) > 1: 3253 this = self.expression(exp.Tuple, expressions=expressions) 3254 else: 3255 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3256 3257 if this: 3258 this.add_comments(comments) 3259 3260 self._match_r_paren(expression=this) 3261 return this 3262 3263 return None 3264 3265 def _parse_field( 3266 self, 3267 any_token: bool = False, 3268 tokens: t.Optional[t.Collection[TokenType]] = None, 3269 anonymous_func: bool = False, 3270 ) -> t.Optional[exp.Expression]: 3271 return ( 3272 self._parse_primary() 3273 or self._parse_function(anonymous=anonymous_func) 3274 or self._parse_id_var(any_token=any_token, tokens=tokens) 3275 ) 3276 3277 def _parse_function( 3278 self, 3279 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3280 anonymous: bool = False, 3281 optional_parens: bool = True, 3282 ) -> t.Optional[exp.Expression]: 3283 if not self._curr: 3284 return None 3285 3286 token_type = self._curr.token_type 3287 3288 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3289 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3290 3291 if not self._next or self._next.token_type != TokenType.L_PAREN: 3292 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3293 self._advance() 3294 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3295 3296 return None 3297 3298 if token_type not in self.FUNC_TOKENS: 3299 return None 3300 3301 this = self._curr.text 3302 upper = this.upper() 3303 self._advance(2) 3304 3305 parser = self.FUNCTION_PARSERS.get(upper) 3306 3307 if parser and not anonymous: 3308 this = parser(self) 3309 else: 3310 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3311 3312 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3313 this = self.expression(subquery_predicate, this=self._parse_select()) 3314 self._match_r_paren() 3315 return this 3316 3317 if functions is None: 3318 functions = self.FUNCTIONS 3319 3320 function = functions.get(upper) 3321 3322 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3323 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3324 3325 if function and not anonymous: 3326 this = self.validate_expression(function(args), args) 3327 else: 3328 this = self.expression(exp.Anonymous, this=this, expressions=args) 3329 3330 self._match_r_paren(this) 3331 return self._parse_window(this) 3332 3333 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3334 return self._parse_column_def(self._parse_id_var()) 3335 3336 def _parse_user_defined_function( 3337 self, kind: t.Optional[TokenType] = None 3338 ) -> t.Optional[exp.Expression]: 3339 this = self._parse_id_var() 3340 3341 while self._match(TokenType.DOT): 3342 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3343 3344 if not self._match(TokenType.L_PAREN): 3345 return this 3346 3347 expressions = self._parse_csv(self._parse_function_parameter) 3348 self._match_r_paren() 3349 return self.expression( 3350 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3351 ) 3352 3353 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3354 literal = self._parse_primary() 3355 if literal: 3356 return self.expression(exp.Introducer, this=token.text, expression=literal) 3357 3358 return self.expression(exp.Identifier, this=token.text) 3359 3360 def _parse_session_parameter(self) -> exp.SessionParameter: 3361 kind = None 3362 this = self._parse_id_var() or self._parse_primary() 3363 3364 if this and self._match(TokenType.DOT): 3365 kind = this.name 3366 this = self._parse_var() or self._parse_primary() 3367 3368 return self.expression(exp.SessionParameter, this=this, kind=kind) 3369 3370 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3371 index = self._index 3372 3373 if self._match(TokenType.L_PAREN): 3374 expressions = self._parse_csv(self._parse_id_var) 3375 3376 if not self._match(TokenType.R_PAREN): 3377 self._retreat(index) 3378 else: 3379 expressions = [self._parse_id_var()] 3380 3381 if self._match_set(self.LAMBDAS): 3382 return self.LAMBDAS[self._prev.token_type](self, expressions) 3383 3384 self._retreat(index) 3385 3386 this: t.Optional[exp.Expression] 3387 3388 if self._match(TokenType.DISTINCT): 3389 this = self.expression( 3390 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3391 ) 3392 else: 3393 this = self._parse_select_or_expression(alias=alias) 3394 3395 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3396 3397 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3398 index = self._index 3399 3400 if not self.errors: 3401 try: 3402 if self._parse_select(nested=True): 3403 return this 3404 except ParseError: 3405 pass 3406 finally: 3407 self.errors.clear() 3408 self._retreat(index) 3409 3410 if not self._match(TokenType.L_PAREN): 3411 return this 3412 3413 args = self._parse_csv( 3414 lambda: self._parse_constraint() 3415 or self._parse_column_def(self._parse_field(any_token=True)) 3416 ) 3417 3418 self._match_r_paren() 3419 return self.expression(exp.Schema, this=this, expressions=args) 3420 3421 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3422 # column defs are not really columns, they're identifiers 3423 if isinstance(this, exp.Column): 3424 this = this.this 3425 3426 kind = self._parse_types(schema=True) 3427 3428 if self._match_text_seq("FOR", "ORDINALITY"): 3429 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3430 3431 constraints = [] 3432 while True: 3433 constraint = self._parse_column_constraint() 3434 if not constraint: 3435 break 3436 constraints.append(constraint) 3437 3438 if not kind and not constraints: 3439 return this 3440 3441 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3442 3443 def _parse_auto_increment( 3444 self, 3445 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3446 start = None 3447 increment = None 3448 3449 if self._match(TokenType.L_PAREN, advance=False): 3450 args = self._parse_wrapped_csv(self._parse_bitwise) 3451 start = seq_get(args, 0) 3452 increment = seq_get(args, 1) 3453 elif self._match_text_seq("START"): 3454 start = self._parse_bitwise() 3455 self._match_text_seq("INCREMENT") 3456 increment = self._parse_bitwise() 3457 3458 if start and increment: 3459 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3460 3461 return exp.AutoIncrementColumnConstraint() 3462 3463 def _parse_compress(self) -> exp.CompressColumnConstraint: 3464 if self._match(TokenType.L_PAREN, advance=False): 3465 return self.expression( 3466 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3467 ) 3468 3469 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3470 3471 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3472 if self._match_text_seq("BY", "DEFAULT"): 3473 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3474 this = self.expression( 3475 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3476 ) 3477 else: 3478 self._match_text_seq("ALWAYS") 3479 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3480 3481 self._match(TokenType.ALIAS) 3482 identity = self._match_text_seq("IDENTITY") 3483 3484 if self._match(TokenType.L_PAREN): 3485 if self._match_text_seq("START", "WITH"): 3486 this.set("start", self._parse_bitwise()) 3487 if self._match_text_seq("INCREMENT", "BY"): 3488 this.set("increment", self._parse_bitwise()) 3489 if self._match_text_seq("MINVALUE"): 3490 this.set("minvalue", self._parse_bitwise()) 3491 if self._match_text_seq("MAXVALUE"): 3492 this.set("maxvalue", self._parse_bitwise()) 3493 3494 if self._match_text_seq("CYCLE"): 3495 this.set("cycle", True) 3496 elif self._match_text_seq("NO", "CYCLE"): 3497 this.set("cycle", False) 3498 3499 if not identity: 3500 this.set("expression", self._parse_bitwise()) 3501 3502 self._match_r_paren() 3503 3504 return this 3505 3506 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3507 self._match_text_seq("LENGTH") 3508 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3509 3510 def _parse_not_constraint( 3511 self, 3512 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3513 if self._match_text_seq("NULL"): 3514 return self.expression(exp.NotNullColumnConstraint) 3515 if self._match_text_seq("CASESPECIFIC"): 3516 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3517 return None 3518 3519 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3520 if self._match(TokenType.CONSTRAINT): 3521 this = self._parse_id_var() 3522 else: 3523 this = None 3524 3525 if self._match_texts(self.CONSTRAINT_PARSERS): 3526 return self.expression( 3527 exp.ColumnConstraint, 3528 this=this, 3529 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3530 ) 3531 3532 return this 3533 3534 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3535 if not self._match(TokenType.CONSTRAINT): 3536 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3537 3538 this = self._parse_id_var() 3539 expressions = [] 3540 3541 while True: 3542 constraint = self._parse_unnamed_constraint() or self._parse_function() 3543 if not constraint: 3544 break 3545 expressions.append(constraint) 3546 3547 return self.expression(exp.Constraint, this=this, expressions=expressions) 3548 3549 def _parse_unnamed_constraint( 3550 self, constraints: t.Optional[t.Collection[str]] = None 3551 ) -> t.Optional[exp.Expression]: 3552 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3553 return None 3554 3555 constraint = self._prev.text.upper() 3556 if constraint not in self.CONSTRAINT_PARSERS: 3557 self.raise_error(f"No parser found for schema constraint {constraint}.") 3558 3559 return self.CONSTRAINT_PARSERS[constraint](self) 3560 3561 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3562 self._match_text_seq("KEY") 3563 return self.expression( 3564 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3565 ) 3566 3567 def _parse_key_constraint_options(self) -> t.List[str]: 3568 options = [] 3569 while True: 3570 if not self._curr: 3571 break 3572 3573 if self._match(TokenType.ON): 3574 action = None 3575 on = self._advance_any() and self._prev.text 3576 3577 if self._match_text_seq("NO", "ACTION"): 3578 action = "NO ACTION" 3579 elif self._match_text_seq("CASCADE"): 3580 action = "CASCADE" 3581 elif self._match_pair(TokenType.SET, TokenType.NULL): 3582 action = "SET NULL" 3583 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3584 action = "SET DEFAULT" 3585 else: 3586 self.raise_error("Invalid key constraint") 3587 3588 options.append(f"ON {on} {action}") 3589 elif self._match_text_seq("NOT", "ENFORCED"): 3590 options.append("NOT ENFORCED") 3591 elif self._match_text_seq("DEFERRABLE"): 3592 options.append("DEFERRABLE") 3593 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3594 options.append("INITIALLY DEFERRED") 3595 elif self._match_text_seq("NORELY"): 3596 options.append("NORELY") 3597 elif self._match_text_seq("MATCH", "FULL"): 3598 options.append("MATCH FULL") 3599 else: 3600 break 3601 3602 return options 3603 3604 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3605 if match and not self._match(TokenType.REFERENCES): 3606 return None 3607 3608 expressions = None 3609 this = self._parse_table(schema=True) 3610 options = self._parse_key_constraint_options() 3611 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3612 3613 def _parse_foreign_key(self) -> exp.ForeignKey: 3614 expressions = self._parse_wrapped_id_vars() 3615 reference = self._parse_references() 3616 options = {} 3617 3618 while self._match(TokenType.ON): 3619 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3620 self.raise_error("Expected DELETE or UPDATE") 3621 3622 kind = self._prev.text.lower() 3623 3624 if self._match_text_seq("NO", "ACTION"): 3625 action = "NO ACTION" 3626 elif self._match(TokenType.SET): 3627 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3628 action = "SET " + self._prev.text.upper() 3629 else: 3630 self._advance() 3631 action = self._prev.text.upper() 3632 3633 options[kind] = action 3634 3635 return self.expression( 3636 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3637 ) 3638 3639 def _parse_primary_key( 3640 self, wrapped_optional: bool = False, in_props: bool = False 3641 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3642 desc = ( 3643 self._match_set((TokenType.ASC, TokenType.DESC)) 3644 and self._prev.token_type == TokenType.DESC 3645 ) 3646 3647 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3648 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3649 3650 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3651 options = self._parse_key_constraint_options() 3652 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3653 3654 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3655 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3656 return this 3657 3658 bracket_kind = self._prev.token_type 3659 3660 if self._match(TokenType.COLON): 3661 expressions: t.List[t.Optional[exp.Expression]] = [ 3662 self.expression(exp.Slice, expression=self._parse_conjunction()) 3663 ] 3664 else: 3665 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3666 3667 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3668 if bracket_kind == TokenType.L_BRACE: 3669 this = self.expression(exp.Struct, expressions=expressions) 3670 elif not this or this.name.upper() == "ARRAY": 3671 this = self.expression(exp.Array, expressions=expressions) 3672 else: 3673 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3674 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3675 3676 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3677 self.raise_error("Expected ]") 3678 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3679 self.raise_error("Expected }") 3680 3681 self._add_comments(this) 3682 return self._parse_bracket(this) 3683 3684 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3685 if self._match(TokenType.COLON): 3686 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3687 return this 3688 3689 def _parse_case(self) -> t.Optional[exp.Expression]: 3690 ifs = [] 3691 default = None 3692 3693 expression = self._parse_conjunction() 3694 3695 while self._match(TokenType.WHEN): 3696 this = self._parse_conjunction() 3697 self._match(TokenType.THEN) 3698 then = self._parse_conjunction() 3699 ifs.append(self.expression(exp.If, this=this, true=then)) 3700 3701 if self._match(TokenType.ELSE): 3702 default = self._parse_conjunction() 3703 3704 if not self._match(TokenType.END): 3705 self.raise_error("Expected END after CASE", self._prev) 3706 3707 return self._parse_window( 3708 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3709 ) 3710 3711 def _parse_if(self) -> t.Optional[exp.Expression]: 3712 if self._match(TokenType.L_PAREN): 3713 args = self._parse_csv(self._parse_conjunction) 3714 this = self.validate_expression(exp.If.from_arg_list(args), args) 3715 self._match_r_paren() 3716 else: 3717 index = self._index - 1 3718 condition = self._parse_conjunction() 3719 3720 if not condition: 3721 self._retreat(index) 3722 return None 3723 3724 self._match(TokenType.THEN) 3725 true = self._parse_conjunction() 3726 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3727 self._match(TokenType.END) 3728 this = self.expression(exp.If, this=condition, true=true, false=false) 3729 3730 return self._parse_window(this) 3731 3732 def _parse_extract(self) -> exp.Extract: 3733 this = self._parse_function() or self._parse_var() or self._parse_type() 3734 3735 if self._match(TokenType.FROM): 3736 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3737 3738 if not self._match(TokenType.COMMA): 3739 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3740 3741 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3742 3743 def _parse_any_value(self) -> exp.AnyValue: 3744 this = self._parse_lambda() 3745 is_max = None 3746 having = None 3747 3748 if self._match(TokenType.HAVING): 3749 self._match_texts(("MAX", "MIN")) 3750 is_max = self._prev.text == "MAX" 3751 having = self._parse_column() 3752 3753 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 3754 3755 def _parse_cast(self, strict: bool) -> exp.Expression: 3756 this = self._parse_conjunction() 3757 3758 if not self._match(TokenType.ALIAS): 3759 if self._match(TokenType.COMMA): 3760 return self.expression( 3761 exp.CastToStrType, this=this, expression=self._parse_string() 3762 ) 3763 else: 3764 self.raise_error("Expected AS after CAST") 3765 3766 fmt = None 3767 to = self._parse_types() 3768 3769 if not to: 3770 self.raise_error("Expected TYPE after CAST") 3771 elif to.this == exp.DataType.Type.CHAR: 3772 if self._match(TokenType.CHARACTER_SET): 3773 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3774 elif self._match(TokenType.FORMAT): 3775 fmt_string = self._parse_string() 3776 fmt = self._parse_at_time_zone(fmt_string) 3777 3778 if to.this in exp.DataType.TEMPORAL_TYPES: 3779 this = self.expression( 3780 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3781 this=this, 3782 format=exp.Literal.string( 3783 format_time( 3784 fmt_string.this if fmt_string else "", 3785 self.FORMAT_MAPPING or self.TIME_MAPPING, 3786 self.FORMAT_TRIE or self.TIME_TRIE, 3787 ) 3788 ), 3789 ) 3790 3791 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 3792 this.set("zone", fmt.args["zone"]) 3793 3794 return this 3795 3796 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 3797 3798 def _parse_concat(self) -> t.Optional[exp.Expression]: 3799 args = self._parse_csv(self._parse_conjunction) 3800 if self.CONCAT_NULL_OUTPUTS_STRING: 3801 args = [ 3802 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3803 for arg in args 3804 if arg 3805 ] 3806 3807 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3808 # we find such a call we replace it with its argument. 3809 if len(args) == 1: 3810 return args[0] 3811 3812 return self.expression( 3813 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3814 ) 3815 3816 def _parse_string_agg(self) -> exp.Expression: 3817 if self._match(TokenType.DISTINCT): 3818 args: t.List[t.Optional[exp.Expression]] = [ 3819 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 3820 ] 3821 if self._match(TokenType.COMMA): 3822 args.extend(self._parse_csv(self._parse_conjunction)) 3823 else: 3824 args = self._parse_csv(self._parse_conjunction) 3825 3826 index = self._index 3827 if not self._match(TokenType.R_PAREN): 3828 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3829 return self.expression( 3830 exp.GroupConcat, 3831 this=seq_get(args, 0), 3832 separator=self._parse_order(this=seq_get(args, 1)), 3833 ) 3834 3835 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3836 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3837 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3838 if not self._match_text_seq("WITHIN", "GROUP"): 3839 self._retreat(index) 3840 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3841 3842 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3843 order = self._parse_order(this=seq_get(args, 0)) 3844 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3845 3846 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3847 this = self._parse_bitwise() 3848 3849 if self._match(TokenType.USING): 3850 to: t.Optional[exp.Expression] = self.expression( 3851 exp.CharacterSet, this=self._parse_var() 3852 ) 3853 elif self._match(TokenType.COMMA): 3854 to = self._parse_types() 3855 else: 3856 to = None 3857 3858 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3859 3860 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3861 """ 3862 There are generally two variants of the DECODE function: 3863 3864 - DECODE(bin, charset) 3865 - DECODE(expression, search, result [, search, result] ... [, default]) 3866 3867 The second variant will always be parsed into a CASE expression. Note that NULL 3868 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3869 instead of relying on pattern matching. 3870 """ 3871 args = self._parse_csv(self._parse_conjunction) 3872 3873 if len(args) < 3: 3874 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3875 3876 expression, *expressions = args 3877 if not expression: 3878 return None 3879 3880 ifs = [] 3881 for search, result in zip(expressions[::2], expressions[1::2]): 3882 if not search or not result: 3883 return None 3884 3885 if isinstance(search, exp.Literal): 3886 ifs.append( 3887 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3888 ) 3889 elif isinstance(search, exp.Null): 3890 ifs.append( 3891 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3892 ) 3893 else: 3894 cond = exp.or_( 3895 exp.EQ(this=expression.copy(), expression=search), 3896 exp.and_( 3897 exp.Is(this=expression.copy(), expression=exp.Null()), 3898 exp.Is(this=search.copy(), expression=exp.Null()), 3899 copy=False, 3900 ), 3901 copy=False, 3902 ) 3903 ifs.append(exp.If(this=cond, true=result)) 3904 3905 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3906 3907 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3908 self._match_text_seq("KEY") 3909 key = self._parse_field() 3910 self._match(TokenType.COLON) 3911 self._match_text_seq("VALUE") 3912 value = self._parse_field() 3913 3914 if not key and not value: 3915 return None 3916 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3917 3918 def _parse_json_object(self) -> exp.JSONObject: 3919 star = self._parse_star() 3920 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3921 3922 null_handling = None 3923 if self._match_text_seq("NULL", "ON", "NULL"): 3924 null_handling = "NULL ON NULL" 3925 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3926 null_handling = "ABSENT ON NULL" 3927 3928 unique_keys = None 3929 if self._match_text_seq("WITH", "UNIQUE"): 3930 unique_keys = True 3931 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3932 unique_keys = False 3933 3934 self._match_text_seq("KEYS") 3935 3936 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3937 format_json = self._match_text_seq("FORMAT", "JSON") 3938 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3939 3940 return self.expression( 3941 exp.JSONObject, 3942 expressions=expressions, 3943 null_handling=null_handling, 3944 unique_keys=unique_keys, 3945 return_type=return_type, 3946 format_json=format_json, 3947 encoding=encoding, 3948 ) 3949 3950 def _parse_logarithm(self) -> exp.Func: 3951 # Default argument order is base, expression 3952 args = self._parse_csv(self._parse_range) 3953 3954 if len(args) > 1: 3955 if not self.LOG_BASE_FIRST: 3956 args.reverse() 3957 return exp.Log.from_arg_list(args) 3958 3959 return self.expression( 3960 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3961 ) 3962 3963 def _parse_match_against(self) -> exp.MatchAgainst: 3964 expressions = self._parse_csv(self._parse_column) 3965 3966 self._match_text_seq(")", "AGAINST", "(") 3967 3968 this = self._parse_string() 3969 3970 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3971 modifier = "IN NATURAL LANGUAGE MODE" 3972 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3973 modifier = f"{modifier} WITH QUERY EXPANSION" 3974 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3975 modifier = "IN BOOLEAN MODE" 3976 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3977 modifier = "WITH QUERY EXPANSION" 3978 else: 3979 modifier = None 3980 3981 return self.expression( 3982 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3983 ) 3984 3985 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3986 def _parse_open_json(self) -> exp.OpenJSON: 3987 this = self._parse_bitwise() 3988 path = self._match(TokenType.COMMA) and self._parse_string() 3989 3990 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 3991 this = self._parse_field(any_token=True) 3992 kind = self._parse_types() 3993 path = self._parse_string() 3994 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3995 3996 return self.expression( 3997 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3998 ) 3999 4000 expressions = None 4001 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4002 self._match_l_paren() 4003 expressions = self._parse_csv(_parse_open_json_column_def) 4004 4005 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4006 4007 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4008 args = self._parse_csv(self._parse_bitwise) 4009 4010 if self._match(TokenType.IN): 4011 return self.expression( 4012 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4013 ) 4014 4015 if haystack_first: 4016 haystack = seq_get(args, 0) 4017 needle = seq_get(args, 1) 4018 else: 4019 needle = seq_get(args, 0) 4020 haystack = seq_get(args, 1) 4021 4022 return self.expression( 4023 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4024 ) 4025 4026 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4027 args = self._parse_csv(self._parse_table) 4028 return exp.JoinHint(this=func_name.upper(), expressions=args) 4029 4030 def _parse_substring(self) -> exp.Substring: 4031 # Postgres supports the form: substring(string [from int] [for int]) 4032 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4033 4034 args = self._parse_csv(self._parse_bitwise) 4035 4036 if self._match(TokenType.FROM): 4037 args.append(self._parse_bitwise()) 4038 if self._match(TokenType.FOR): 4039 args.append(self._parse_bitwise()) 4040 4041 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4042 4043 def _parse_trim(self) -> exp.Trim: 4044 # https://www.w3resource.com/sql/character-functions/trim.php 4045 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4046 4047 position = None 4048 collation = None 4049 4050 if self._match_texts(self.TRIM_TYPES): 4051 position = self._prev.text.upper() 4052 4053 expression = self._parse_bitwise() 4054 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4055 this = self._parse_bitwise() 4056 else: 4057 this = expression 4058 expression = None 4059 4060 if self._match(TokenType.COLLATE): 4061 collation = self._parse_bitwise() 4062 4063 return self.expression( 4064 exp.Trim, this=this, position=position, expression=expression, collation=collation 4065 ) 4066 4067 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4068 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4069 4070 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4071 return self._parse_window(self._parse_id_var(), alias=True) 4072 4073 def _parse_respect_or_ignore_nulls( 4074 self, this: t.Optional[exp.Expression] 4075 ) -> t.Optional[exp.Expression]: 4076 if self._match_text_seq("IGNORE", "NULLS"): 4077 return self.expression(exp.IgnoreNulls, this=this) 4078 if self._match_text_seq("RESPECT", "NULLS"): 4079 return self.expression(exp.RespectNulls, this=this) 4080 return this 4081 4082 def _parse_window( 4083 self, this: t.Optional[exp.Expression], alias: bool = False 4084 ) -> t.Optional[exp.Expression]: 4085 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4086 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 4087 self._match_r_paren() 4088 4089 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4090 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4091 if self._match_text_seq("WITHIN", "GROUP"): 4092 order = self._parse_wrapped(self._parse_order) 4093 this = self.expression(exp.WithinGroup, this=this, expression=order) 4094 4095 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4096 # Some dialects choose to implement and some do not. 4097 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4098 4099 # There is some code above in _parse_lambda that handles 4100 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4101 4102 # The below changes handle 4103 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4104 4105 # Oracle allows both formats 4106 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4107 # and Snowflake chose to do the same for familiarity 4108 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4109 this = self._parse_respect_or_ignore_nulls(this) 4110 4111 # bigquery select from window x AS (partition by ...) 4112 if alias: 4113 over = None 4114 self._match(TokenType.ALIAS) 4115 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4116 return this 4117 else: 4118 over = self._prev.text.upper() 4119 4120 if not self._match(TokenType.L_PAREN): 4121 return self.expression( 4122 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4123 ) 4124 4125 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4126 4127 first = self._match(TokenType.FIRST) 4128 if self._match_text_seq("LAST"): 4129 first = False 4130 4131 partition = self._parse_partition_by() 4132 order = self._parse_order() 4133 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4134 4135 if kind: 4136 self._match(TokenType.BETWEEN) 4137 start = self._parse_window_spec() 4138 self._match(TokenType.AND) 4139 end = self._parse_window_spec() 4140 4141 spec = self.expression( 4142 exp.WindowSpec, 4143 kind=kind, 4144 start=start["value"], 4145 start_side=start["side"], 4146 end=end["value"], 4147 end_side=end["side"], 4148 ) 4149 else: 4150 spec = None 4151 4152 self._match_r_paren() 4153 4154 return self.expression( 4155 exp.Window, 4156 this=this, 4157 partition_by=partition, 4158 order=order, 4159 spec=spec, 4160 alias=window_alias, 4161 over=over, 4162 first=first, 4163 ) 4164 4165 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4166 self._match(TokenType.BETWEEN) 4167 4168 return { 4169 "value": ( 4170 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4171 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4172 or self._parse_bitwise() 4173 ), 4174 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4175 } 4176 4177 def _parse_alias( 4178 self, this: t.Optional[exp.Expression], explicit: bool = False 4179 ) -> t.Optional[exp.Expression]: 4180 any_token = self._match(TokenType.ALIAS) 4181 4182 if explicit and not any_token: 4183 return this 4184 4185 if self._match(TokenType.L_PAREN): 4186 aliases = self.expression( 4187 exp.Aliases, 4188 this=this, 4189 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4190 ) 4191 self._match_r_paren(aliases) 4192 return aliases 4193 4194 alias = self._parse_id_var(any_token) 4195 4196 if alias: 4197 return self.expression(exp.Alias, this=this, alias=alias) 4198 4199 return this 4200 4201 def _parse_id_var( 4202 self, 4203 any_token: bool = True, 4204 tokens: t.Optional[t.Collection[TokenType]] = None, 4205 ) -> t.Optional[exp.Expression]: 4206 identifier = self._parse_identifier() 4207 4208 if identifier: 4209 return identifier 4210 4211 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4212 quoted = self._prev.token_type == TokenType.STRING 4213 return exp.Identifier(this=self._prev.text, quoted=quoted) 4214 4215 return None 4216 4217 def _parse_string(self) -> t.Optional[exp.Expression]: 4218 if self._match(TokenType.STRING): 4219 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4220 return self._parse_placeholder() 4221 4222 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4223 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4224 4225 def _parse_number(self) -> t.Optional[exp.Expression]: 4226 if self._match(TokenType.NUMBER): 4227 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4228 return self._parse_placeholder() 4229 4230 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4231 if self._match(TokenType.IDENTIFIER): 4232 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4233 return self._parse_placeholder() 4234 4235 def _parse_var( 4236 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4237 ) -> t.Optional[exp.Expression]: 4238 if ( 4239 (any_token and self._advance_any()) 4240 or self._match(TokenType.VAR) 4241 or (self._match_set(tokens) if tokens else False) 4242 ): 4243 return self.expression(exp.Var, this=self._prev.text) 4244 return self._parse_placeholder() 4245 4246 def _advance_any(self) -> t.Optional[Token]: 4247 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4248 self._advance() 4249 return self._prev 4250 return None 4251 4252 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4253 return self._parse_var() or self._parse_string() 4254 4255 def _parse_null(self) -> t.Optional[exp.Expression]: 4256 if self._match(TokenType.NULL): 4257 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4258 return None 4259 4260 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4261 if self._match(TokenType.TRUE): 4262 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4263 if self._match(TokenType.FALSE): 4264 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4265 return None 4266 4267 def _parse_star(self) -> t.Optional[exp.Expression]: 4268 if self._match(TokenType.STAR): 4269 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4270 return None 4271 4272 def _parse_parameter(self) -> exp.Parameter: 4273 wrapped = self._match(TokenType.L_BRACE) 4274 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4275 self._match(TokenType.R_BRACE) 4276 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4277 4278 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4279 if self._match_set(self.PLACEHOLDER_PARSERS): 4280 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4281 if placeholder: 4282 return placeholder 4283 self._advance(-1) 4284 return None 4285 4286 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4287 if not self._match(TokenType.EXCEPT): 4288 return None 4289 if self._match(TokenType.L_PAREN, advance=False): 4290 return self._parse_wrapped_csv(self._parse_column) 4291 return self._parse_csv(self._parse_column) 4292 4293 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4294 if not self._match(TokenType.REPLACE): 4295 return None 4296 if self._match(TokenType.L_PAREN, advance=False): 4297 return self._parse_wrapped_csv(self._parse_expression) 4298 return self._parse_expressions() 4299 4300 def _parse_csv( 4301 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4302 ) -> t.List[t.Optional[exp.Expression]]: 4303 parse_result = parse_method() 4304 items = [parse_result] if parse_result is not None else [] 4305 4306 while self._match(sep): 4307 self._add_comments(parse_result) 4308 parse_result = parse_method() 4309 if parse_result is not None: 4310 items.append(parse_result) 4311 4312 return items 4313 4314 def _parse_tokens( 4315 self, parse_method: t.Callable, expressions: t.Dict 4316 ) -> t.Optional[exp.Expression]: 4317 this = parse_method() 4318 4319 while self._match_set(expressions): 4320 this = self.expression( 4321 expressions[self._prev.token_type], 4322 this=this, 4323 comments=self._prev_comments, 4324 expression=parse_method(), 4325 ) 4326 4327 return this 4328 4329 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4330 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4331 4332 def _parse_wrapped_csv( 4333 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4334 ) -> t.List[t.Optional[exp.Expression]]: 4335 return self._parse_wrapped( 4336 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4337 ) 4338 4339 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4340 wrapped = self._match(TokenType.L_PAREN) 4341 if not wrapped and not optional: 4342 self.raise_error("Expecting (") 4343 parse_result = parse_method() 4344 if wrapped: 4345 self._match_r_paren() 4346 return parse_result 4347 4348 def _parse_expressions(self) -> t.List[t.Optional[exp.Expression]]: 4349 return self._parse_csv(self._parse_expression) 4350 4351 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4352 return self._parse_select() or self._parse_set_operations( 4353 self._parse_expression() if alias else self._parse_conjunction() 4354 ) 4355 4356 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4357 return self._parse_query_modifiers( 4358 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4359 ) 4360 4361 def _parse_transaction(self) -> exp.Transaction: 4362 this = None 4363 if self._match_texts(self.TRANSACTION_KIND): 4364 this = self._prev.text 4365 4366 self._match_texts({"TRANSACTION", "WORK"}) 4367 4368 modes = [] 4369 while True: 4370 mode = [] 4371 while self._match(TokenType.VAR): 4372 mode.append(self._prev.text) 4373 4374 if mode: 4375 modes.append(" ".join(mode)) 4376 if not self._match(TokenType.COMMA): 4377 break 4378 4379 return self.expression(exp.Transaction, this=this, modes=modes) 4380 4381 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4382 chain = None 4383 savepoint = None 4384 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4385 4386 self._match_texts({"TRANSACTION", "WORK"}) 4387 4388 if self._match_text_seq("TO"): 4389 self._match_text_seq("SAVEPOINT") 4390 savepoint = self._parse_id_var() 4391 4392 if self._match(TokenType.AND): 4393 chain = not self._match_text_seq("NO") 4394 self._match_text_seq("CHAIN") 4395 4396 if is_rollback: 4397 return self.expression(exp.Rollback, savepoint=savepoint) 4398 4399 return self.expression(exp.Commit, chain=chain) 4400 4401 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4402 if not self._match_text_seq("ADD"): 4403 return None 4404 4405 self._match(TokenType.COLUMN) 4406 exists_column = self._parse_exists(not_=True) 4407 expression = self._parse_column_def(self._parse_field(any_token=True)) 4408 4409 if expression: 4410 expression.set("exists", exists_column) 4411 4412 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4413 if self._match_texts(("FIRST", "AFTER")): 4414 position = self._prev.text 4415 column_position = self.expression( 4416 exp.ColumnPosition, this=self._parse_column(), position=position 4417 ) 4418 expression.set("position", column_position) 4419 4420 return expression 4421 4422 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4423 drop = self._match(TokenType.DROP) and self._parse_drop() 4424 if drop and not isinstance(drop, exp.Command): 4425 drop.set("kind", drop.args.get("kind", "COLUMN")) 4426 return drop 4427 4428 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4429 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4430 return self.expression( 4431 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4432 ) 4433 4434 def _parse_add_constraint(self) -> exp.AddConstraint: 4435 this = None 4436 kind = self._prev.token_type 4437 4438 if kind == TokenType.CONSTRAINT: 4439 this = self._parse_id_var() 4440 4441 if self._match_text_seq("CHECK"): 4442 expression = self._parse_wrapped(self._parse_conjunction) 4443 enforced = self._match_text_seq("ENFORCED") 4444 4445 return self.expression( 4446 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4447 ) 4448 4449 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4450 expression = self._parse_foreign_key() 4451 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4452 expression = self._parse_primary_key() 4453 else: 4454 expression = None 4455 4456 return self.expression(exp.AddConstraint, this=this, expression=expression) 4457 4458 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4459 index = self._index - 1 4460 4461 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4462 return self._parse_csv(self._parse_add_constraint) 4463 4464 self._retreat(index) 4465 return self._parse_csv(self._parse_add_column) 4466 4467 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4468 self._match(TokenType.COLUMN) 4469 column = self._parse_field(any_token=True) 4470 4471 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4472 return self.expression(exp.AlterColumn, this=column, drop=True) 4473 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4474 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4475 4476 self._match_text_seq("SET", "DATA") 4477 return self.expression( 4478 exp.AlterColumn, 4479 this=column, 4480 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4481 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4482 using=self._match(TokenType.USING) and self._parse_conjunction(), 4483 ) 4484 4485 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4486 index = self._index - 1 4487 4488 partition_exists = self._parse_exists() 4489 if self._match(TokenType.PARTITION, advance=False): 4490 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4491 4492 self._retreat(index) 4493 return self._parse_csv(self._parse_drop_column) 4494 4495 def _parse_alter_table_rename(self) -> exp.RenameTable: 4496 self._match_text_seq("TO") 4497 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4498 4499 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4500 start = self._prev 4501 4502 if not self._match(TokenType.TABLE): 4503 return self._parse_as_command(start) 4504 4505 exists = self._parse_exists() 4506 this = self._parse_table(schema=True) 4507 4508 if self._next: 4509 self._advance() 4510 4511 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4512 if parser: 4513 actions = ensure_list(parser(self)) 4514 4515 if not self._curr: 4516 return self.expression( 4517 exp.AlterTable, 4518 this=this, 4519 exists=exists, 4520 actions=actions, 4521 ) 4522 return self._parse_as_command(start) 4523 4524 def _parse_merge(self) -> exp.Merge: 4525 self._match(TokenType.INTO) 4526 target = self._parse_table() 4527 4528 self._match(TokenType.USING) 4529 using = self._parse_table() 4530 4531 self._match(TokenType.ON) 4532 on = self._parse_conjunction() 4533 4534 whens = [] 4535 while self._match(TokenType.WHEN): 4536 matched = not self._match(TokenType.NOT) 4537 self._match_text_seq("MATCHED") 4538 source = ( 4539 False 4540 if self._match_text_seq("BY", "TARGET") 4541 else self._match_text_seq("BY", "SOURCE") 4542 ) 4543 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4544 4545 self._match(TokenType.THEN) 4546 4547 if self._match(TokenType.INSERT): 4548 _this = self._parse_star() 4549 if _this: 4550 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4551 else: 4552 then = self.expression( 4553 exp.Insert, 4554 this=self._parse_value(), 4555 expression=self._match(TokenType.VALUES) and self._parse_value(), 4556 ) 4557 elif self._match(TokenType.UPDATE): 4558 expressions = self._parse_star() 4559 if expressions: 4560 then = self.expression(exp.Update, expressions=expressions) 4561 else: 4562 then = self.expression( 4563 exp.Update, 4564 expressions=self._match(TokenType.SET) 4565 and self._parse_csv(self._parse_equality), 4566 ) 4567 elif self._match(TokenType.DELETE): 4568 then = self.expression(exp.Var, this=self._prev.text) 4569 else: 4570 then = None 4571 4572 whens.append( 4573 self.expression( 4574 exp.When, 4575 matched=matched, 4576 source=source, 4577 condition=condition, 4578 then=then, 4579 ) 4580 ) 4581 4582 return self.expression( 4583 exp.Merge, 4584 this=target, 4585 using=using, 4586 on=on, 4587 expressions=whens, 4588 ) 4589 4590 def _parse_show(self) -> t.Optional[exp.Expression]: 4591 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4592 if parser: 4593 return parser(self) 4594 self._advance() 4595 return self.expression(exp.Show, this=self._prev.text.upper()) 4596 4597 def _parse_set_item_assignment( 4598 self, kind: t.Optional[str] = None 4599 ) -> t.Optional[exp.Expression]: 4600 index = self._index 4601 4602 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4603 return self._parse_set_transaction(global_=kind == "GLOBAL") 4604 4605 left = self._parse_primary() or self._parse_id_var() 4606 4607 if not self._match_texts(("=", "TO")): 4608 self._retreat(index) 4609 return None 4610 4611 right = self._parse_statement() or self._parse_id_var() 4612 this = self.expression(exp.EQ, this=left, expression=right) 4613 4614 return self.expression(exp.SetItem, this=this, kind=kind) 4615 4616 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4617 self._match_text_seq("TRANSACTION") 4618 characteristics = self._parse_csv( 4619 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4620 ) 4621 return self.expression( 4622 exp.SetItem, 4623 expressions=characteristics, 4624 kind="TRANSACTION", 4625 **{"global": global_}, # type: ignore 4626 ) 4627 4628 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4629 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4630 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4631 4632 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4633 index = self._index 4634 set_ = self.expression( 4635 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4636 ) 4637 4638 if self._curr: 4639 self._retreat(index) 4640 return self._parse_as_command(self._prev) 4641 4642 return set_ 4643 4644 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4645 for option in options: 4646 if self._match_text_seq(*option.split(" ")): 4647 return exp.var(option) 4648 return None 4649 4650 def _parse_as_command(self, start: Token) -> exp.Command: 4651 while self._curr: 4652 self._advance() 4653 text = self._find_sql(start, self._prev) 4654 size = len(start.text) 4655 return exp.Command(this=text[:size], expression=text[size:]) 4656 4657 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4658 settings = [] 4659 4660 self._match_l_paren() 4661 kind = self._parse_id_var() 4662 4663 if self._match(TokenType.L_PAREN): 4664 while True: 4665 key = self._parse_id_var() 4666 value = self._parse_primary() 4667 4668 if not key and value is None: 4669 break 4670 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4671 self._match(TokenType.R_PAREN) 4672 4673 self._match_r_paren() 4674 4675 return self.expression( 4676 exp.DictProperty, 4677 this=this, 4678 kind=kind.this if kind else None, 4679 settings=settings, 4680 ) 4681 4682 def _parse_dict_range(self, this: str) -> exp.DictRange: 4683 self._match_l_paren() 4684 has_min = self._match_text_seq("MIN") 4685 if has_min: 4686 min = self._parse_var() or self._parse_primary() 4687 self._match_text_seq("MAX") 4688 max = self._parse_var() or self._parse_primary() 4689 else: 4690 max = self._parse_var() or self._parse_primary() 4691 min = exp.Literal.number(0) 4692 self._match_r_paren() 4693 return self.expression(exp.DictRange, this=this, min=min, max=max) 4694 4695 def _find_parser( 4696 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4697 ) -> t.Optional[t.Callable]: 4698 if not self._curr: 4699 return None 4700 4701 index = self._index 4702 this = [] 4703 while True: 4704 # The current token might be multiple words 4705 curr = self._curr.text.upper() 4706 key = curr.split(" ") 4707 this.append(curr) 4708 4709 self._advance() 4710 result, trie = in_trie(trie, key) 4711 if result == TrieResult.FAILED: 4712 break 4713 4714 if result == TrieResult.EXISTS: 4715 subparser = parsers[" ".join(this)] 4716 return subparser 4717 4718 self._retreat(index) 4719 return None 4720 4721 def _match(self, token_type, advance=True, expression=None): 4722 if not self._curr: 4723 return None 4724 4725 if self._curr.token_type == token_type: 4726 if advance: 4727 self._advance() 4728 self._add_comments(expression) 4729 return True 4730 4731 return None 4732 4733 def _match_set(self, types, advance=True): 4734 if not self._curr: 4735 return None 4736 4737 if self._curr.token_type in types: 4738 if advance: 4739 self._advance() 4740 return True 4741 4742 return None 4743 4744 def _match_pair(self, token_type_a, token_type_b, advance=True): 4745 if not self._curr or not self._next: 4746 return None 4747 4748 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4749 if advance: 4750 self._advance(2) 4751 return True 4752 4753 return None 4754 4755 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4756 if not self._match(TokenType.L_PAREN, expression=expression): 4757 self.raise_error("Expecting (") 4758 4759 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4760 if not self._match(TokenType.R_PAREN, expression=expression): 4761 self.raise_error("Expecting )") 4762 4763 def _match_texts(self, texts, advance=True): 4764 if self._curr and self._curr.text.upper() in texts: 4765 if advance: 4766 self._advance() 4767 return True 4768 return False 4769 4770 def _match_text_seq(self, *texts, advance=True): 4771 index = self._index 4772 for text in texts: 4773 if self._curr and self._curr.text.upper() == text: 4774 self._advance() 4775 else: 4776 self._retreat(index) 4777 return False 4778 4779 if not advance: 4780 self._retreat(index) 4781 4782 return True 4783 4784 @t.overload 4785 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4786 ... 4787 4788 @t.overload 4789 def _replace_columns_with_dots( 4790 self, this: t.Optional[exp.Expression] 4791 ) -> t.Optional[exp.Expression]: 4792 ... 4793 4794 def _replace_columns_with_dots(self, this): 4795 if isinstance(this, exp.Dot): 4796 exp.replace_children(this, self._replace_columns_with_dots) 4797 elif isinstance(this, exp.Column): 4798 exp.replace_children(this, self._replace_columns_with_dots) 4799 table = this.args.get("table") 4800 this = ( 4801 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 4802 ) 4803 4804 return this 4805 4806 def _replace_lambda( 4807 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4808 ) -> t.Optional[exp.Expression]: 4809 if not node: 4810 return node 4811 4812 for column in node.find_all(exp.Column): 4813 if column.parts[0].name in lambda_variables: 4814 dot_or_id = column.to_dot() if column.table else column.this 4815 parent = column.parent 4816 4817 while isinstance(parent, exp.Dot): 4818 if not isinstance(parent.parent, exp.Dot): 4819 parent.replace(dot_or_id) 4820 break 4821 parent = parent.parent 4822 else: 4823 if column is node: 4824 node = dot_or_id 4825 else: 4826 column.replace(dot_or_id) 4827 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
843 def __init__( 844 self, 845 error_level: t.Optional[ErrorLevel] = None, 846 error_message_context: int = 100, 847 max_errors: int = 3, 848 ): 849 self.error_level = error_level or ErrorLevel.IMMEDIATE 850 self.error_message_context = error_message_context 851 self.max_errors = max_errors 852 self.reset()
864 def parse( 865 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 866 ) -> t.List[t.Optional[exp.Expression]]: 867 """ 868 Parses a list of tokens and returns a list of syntax trees, one tree 869 per parsed SQL statement. 870 871 Args: 872 raw_tokens: The list of tokens. 873 sql: The original SQL string, used to produce helpful debug messages. 874 875 Returns: 876 The list of the produced syntax trees. 877 """ 878 return self._parse( 879 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 880 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
882 def parse_into( 883 self, 884 expression_types: exp.IntoType, 885 raw_tokens: t.List[Token], 886 sql: t.Optional[str] = None, 887 ) -> t.List[t.Optional[exp.Expression]]: 888 """ 889 Parses a list of tokens into a given Expression type. If a collection of Expression 890 types is given instead, this method will try to parse the token list into each one 891 of them, stopping at the first for which the parsing succeeds. 892 893 Args: 894 expression_types: The expression type(s) to try and parse the token list into. 895 raw_tokens: The list of tokens. 896 sql: The original SQL string, used to produce helpful debug messages. 897 898 Returns: 899 The target Expression. 900 """ 901 errors = [] 902 for expression_type in ensure_list(expression_types): 903 parser = self.EXPRESSION_PARSERS.get(expression_type) 904 if not parser: 905 raise TypeError(f"No parser registered for {expression_type}") 906 907 try: 908 return self._parse(parser, raw_tokens, sql) 909 except ParseError as e: 910 e.errors[0]["into_expression"] = expression_type 911 errors.append(e) 912 913 raise ParseError( 914 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 915 errors=merge_errors(errors), 916 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
953 def check_errors(self) -> None: 954 """Logs or raises any found errors, depending on the chosen error level setting.""" 955 if self.error_level == ErrorLevel.WARN: 956 for error in self.errors: 957 logger.error(str(error)) 958 elif self.error_level == ErrorLevel.RAISE and self.errors: 959 raise ParseError( 960 concat_messages(self.errors, self.max_errors), 961 errors=merge_errors(self.errors), 962 )
Logs or raises any found errors, depending on the chosen error level setting.
964 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 965 """ 966 Appends an error in the list of recorded errors or raises it, depending on the chosen 967 error level setting. 968 """ 969 token = token or self._curr or self._prev or Token.string("") 970 start = token.start 971 end = token.end + 1 972 start_context = self.sql[max(start - self.error_message_context, 0) : start] 973 highlight = self.sql[start:end] 974 end_context = self.sql[end : end + self.error_message_context] 975 976 error = ParseError.new( 977 f"{message}. Line {token.line}, Col: {token.col}.\n" 978 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 979 description=message, 980 line=token.line, 981 col=token.col, 982 start_context=start_context, 983 highlight=highlight, 984 end_context=end_context, 985 ) 986 987 if self.error_level == ErrorLevel.IMMEDIATE: 988 raise error 989 990 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
992 def expression( 993 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 994 ) -> E: 995 """ 996 Creates a new, validated Expression. 997 998 Args: 999 exp_class: The expression class to instantiate. 1000 comments: An optional list of comments to attach to the expression. 1001 kwargs: The arguments to set for the expression along with their respective values. 1002 1003 Returns: 1004 The target expression. 1005 """ 1006 instance = exp_class(**kwargs) 1007 instance.add_comments(comments) if comments else self._add_comments(instance) 1008 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1015 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1016 """ 1017 Validates an Expression, making sure that all its mandatory arguments are set. 1018 1019 Args: 1020 expression: The expression to validate. 1021 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1022 1023 Returns: 1024 The validated expression. 1025 """ 1026 if self.error_level != ErrorLevel.IGNORE: 1027 for error_message in expression.error_messages(args): 1028 self.raise_error(error_message) 1029 1030 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.