sqlglot.optimizer.qualify_tables
1import itertools 2import typing as t 3 4from sqlglot import alias, exp 5from sqlglot._typing import E 6from sqlglot.helper import csv_reader, name_sequence 7from sqlglot.optimizer.scope import Scope, traverse_scope 8from sqlglot.schema import Schema 9 10 11def qualify_tables( 12 expression: E, 13 db: t.Optional[str] = None, 14 catalog: t.Optional[str] = None, 15 schema: t.Optional[Schema] = None, 16) -> E: 17 """ 18 Rewrite sqlglot AST to have fully qualified, unnested tables. 19 20 Examples: 21 >>> import sqlglot 22 >>> expression = sqlglot.parse_one("SELECT 1 FROM tbl") 23 >>> qualify_tables(expression, db="db").sql() 24 'SELECT 1 FROM db.tbl AS tbl' 25 >>> 26 >>> expression = sqlglot.parse_one("SELECT * FROM (tbl)") 27 >>> qualify_tables(expression).sql() 28 'SELECT * FROM tbl AS tbl' 29 >>> 30 >>> expression = sqlglot.parse_one("SELECT * FROM (tbl1 JOIN tbl2 ON id1 = id2)") 31 >>> qualify_tables(expression).sql() 32 'SELECT * FROM tbl1 AS tbl1 JOIN tbl2 AS tbl2 ON id1 = id2' 33 34 Note: 35 This rule effectively enforces a left-to-right join order, since all joins 36 are unnested. This means that the optimizer doesn't necessarily preserve the 37 original join order, e.g. when parentheses are used to specify it explicitly. 38 39 Args: 40 expression: Expression to qualify 41 db: Database name 42 catalog: Catalog name 43 schema: A schema to populate 44 45 Returns: 46 The qualified expression. 47 """ 48 next_alias_name = name_sequence("_q_") 49 50 for scope in traverse_scope(expression): 51 for derived_table in itertools.chain(scope.ctes, scope.derived_tables): 52 if not derived_table.args.get("alias"): 53 alias_ = next_alias_name() 54 derived_table.set("alias", exp.TableAlias(this=exp.to_identifier(alias_))) 55 scope.rename_source(None, alias_) 56 57 pivots = derived_table.args.get("pivots") 58 if pivots and not pivots[0].alias: 59 pivots[0].set("alias", exp.TableAlias(this=exp.to_identifier(next_alias_name()))) 60 61 for name, source in scope.sources.items(): 62 if isinstance(source, exp.Table): 63 if isinstance(source.this, exp.Identifier): 64 if not source.args.get("db"): 65 source.set("db", exp.to_identifier(db)) 66 if not source.args.get("catalog"): 67 source.set("catalog", exp.to_identifier(catalog)) 68 69 # Unnest joins attached in tables by appending them to the closest query 70 for join in source.args.get("joins") or []: 71 scope.expression.append("joins", join) 72 73 source.set("joins", None) 74 source.set("wrapped", None) 75 76 if not source.alias: 77 source = source.replace( 78 alias( 79 source, name or source.name or next_alias_name(), copy=True, table=True 80 ) 81 ) 82 83 pivots = source.args.get("pivots") 84 if pivots and not pivots[0].alias: 85 pivots[0].set( 86 "alias", exp.TableAlias(this=exp.to_identifier(next_alias_name())) 87 ) 88 89 if schema and isinstance(source.this, exp.ReadCSV): 90 with csv_reader(source.this) as reader: 91 header = next(reader) 92 columns = next(reader) 93 schema.add_table( 94 source, {k: type(v).__name__ for k, v in zip(header, columns)} 95 ) 96 elif isinstance(source, Scope) and source.is_udtf: 97 udtf = source.expression 98 table_alias = udtf.args.get("alias") or exp.TableAlias( 99 this=exp.to_identifier(next_alias_name()) 100 ) 101 udtf.set("alias", table_alias) 102 103 if not table_alias.name: 104 table_alias.set("this", exp.to_identifier(next_alias_name())) 105 if isinstance(udtf, exp.Values) and not table_alias.columns: 106 for i, e in enumerate(udtf.expressions[0].expressions): 107 table_alias.append("columns", exp.to_identifier(f"_col_{i}")) 108 109 return expression
def
qualify_tables( expression: ~E, db: Optional[str] = None, catalog: Optional[str] = None, schema: Optional[sqlglot.schema.Schema] = None) -> ~E:
12def qualify_tables( 13 expression: E, 14 db: t.Optional[str] = None, 15 catalog: t.Optional[str] = None, 16 schema: t.Optional[Schema] = None, 17) -> E: 18 """ 19 Rewrite sqlglot AST to have fully qualified, unnested tables. 20 21 Examples: 22 >>> import sqlglot 23 >>> expression = sqlglot.parse_one("SELECT 1 FROM tbl") 24 >>> qualify_tables(expression, db="db").sql() 25 'SELECT 1 FROM db.tbl AS tbl' 26 >>> 27 >>> expression = sqlglot.parse_one("SELECT * FROM (tbl)") 28 >>> qualify_tables(expression).sql() 29 'SELECT * FROM tbl AS tbl' 30 >>> 31 >>> expression = sqlglot.parse_one("SELECT * FROM (tbl1 JOIN tbl2 ON id1 = id2)") 32 >>> qualify_tables(expression).sql() 33 'SELECT * FROM tbl1 AS tbl1 JOIN tbl2 AS tbl2 ON id1 = id2' 34 35 Note: 36 This rule effectively enforces a left-to-right join order, since all joins 37 are unnested. This means that the optimizer doesn't necessarily preserve the 38 original join order, e.g. when parentheses are used to specify it explicitly. 39 40 Args: 41 expression: Expression to qualify 42 db: Database name 43 catalog: Catalog name 44 schema: A schema to populate 45 46 Returns: 47 The qualified expression. 48 """ 49 next_alias_name = name_sequence("_q_") 50 51 for scope in traverse_scope(expression): 52 for derived_table in itertools.chain(scope.ctes, scope.derived_tables): 53 if not derived_table.args.get("alias"): 54 alias_ = next_alias_name() 55 derived_table.set("alias", exp.TableAlias(this=exp.to_identifier(alias_))) 56 scope.rename_source(None, alias_) 57 58 pivots = derived_table.args.get("pivots") 59 if pivots and not pivots[0].alias: 60 pivots[0].set("alias", exp.TableAlias(this=exp.to_identifier(next_alias_name()))) 61 62 for name, source in scope.sources.items(): 63 if isinstance(source, exp.Table): 64 if isinstance(source.this, exp.Identifier): 65 if not source.args.get("db"): 66 source.set("db", exp.to_identifier(db)) 67 if not source.args.get("catalog"): 68 source.set("catalog", exp.to_identifier(catalog)) 69 70 # Unnest joins attached in tables by appending them to the closest query 71 for join in source.args.get("joins") or []: 72 scope.expression.append("joins", join) 73 74 source.set("joins", None) 75 source.set("wrapped", None) 76 77 if not source.alias: 78 source = source.replace( 79 alias( 80 source, name or source.name or next_alias_name(), copy=True, table=True 81 ) 82 ) 83 84 pivots = source.args.get("pivots") 85 if pivots and not pivots[0].alias: 86 pivots[0].set( 87 "alias", exp.TableAlias(this=exp.to_identifier(next_alias_name())) 88 ) 89 90 if schema and isinstance(source.this, exp.ReadCSV): 91 with csv_reader(source.this) as reader: 92 header = next(reader) 93 columns = next(reader) 94 schema.add_table( 95 source, {k: type(v).__name__ for k, v in zip(header, columns)} 96 ) 97 elif isinstance(source, Scope) and source.is_udtf: 98 udtf = source.expression 99 table_alias = udtf.args.get("alias") or exp.TableAlias( 100 this=exp.to_identifier(next_alias_name()) 101 ) 102 udtf.set("alias", table_alias) 103 104 if not table_alias.name: 105 table_alias.set("this", exp.to_identifier(next_alias_name())) 106 if isinstance(udtf, exp.Values) and not table_alias.columns: 107 for i, e in enumerate(udtf.expressions[0].expressions): 108 table_alias.append("columns", exp.to_identifier(f"_col_{i}")) 109 110 return expression
Rewrite sqlglot AST to have fully qualified, unnested tables.
Examples:
>>> import sqlglot >>> expression = sqlglot.parse_one("SELECT 1 FROM tbl") >>> qualify_tables(expression, db="db").sql() 'SELECT 1 FROM db.tbl AS tbl' >>> >>> expression = sqlglot.parse_one("SELECT * FROM (tbl)") >>> qualify_tables(expression).sql() 'SELECT * FROM tbl AS tbl' >>> >>> expression = sqlglot.parse_one("SELECT * FROM (tbl1 JOIN tbl2 ON id1 = id2)") >>> qualify_tables(expression).sql() 'SELECT * FROM tbl1 AS tbl1 JOIN tbl2 AS tbl2 ON id1 = id2'
Note:
This rule effectively enforces a left-to-right join order, since all joins are unnested. This means that the optimizer doesn't necessarily preserve the original join order, e.g. when parentheses are used to specify it explicitly.
Arguments:
- expression: Expression to qualify
- db: Database name
- catalog: Catalog name
- schema: A schema to populate
Returns:
The qualified expression.