Edit on GitHub

sqlglot.optimizer.qualify_tables

  1import itertools
  2import typing as t
  3
  4from sqlglot import alias, exp
  5from sqlglot._typing import E
  6from sqlglot.helper import csv_reader, name_sequence
  7from sqlglot.optimizer.scope import Scope, traverse_scope
  8from sqlglot.schema import Schema
  9
 10
 11def qualify_tables(
 12    expression: E,
 13    db: t.Optional[str] = None,
 14    catalog: t.Optional[str] = None,
 15    schema: t.Optional[Schema] = None,
 16) -> E:
 17    """
 18    Rewrite sqlglot AST to have fully qualified, unnested tables.
 19
 20    Examples:
 21        >>> import sqlglot
 22        >>> expression = sqlglot.parse_one("SELECT 1 FROM tbl")
 23        >>> qualify_tables(expression, db="db").sql()
 24        'SELECT 1 FROM db.tbl AS tbl'
 25        >>>
 26        >>> expression = sqlglot.parse_one("SELECT * FROM (tbl)")
 27        >>> qualify_tables(expression).sql()
 28        'SELECT * FROM tbl AS tbl'
 29        >>>
 30        >>> expression = sqlglot.parse_one("SELECT * FROM (tbl1 JOIN tbl2 ON id1 = id2)")
 31        >>> qualify_tables(expression).sql()
 32        'SELECT * FROM tbl1 AS tbl1 JOIN tbl2 AS tbl2 ON id1 = id2'
 33
 34    Note:
 35        This rule effectively enforces a left-to-right join order, since all joins
 36        are unnested. This means that the optimizer doesn't necessarily preserve the
 37        original join order, e.g. when parentheses are used to specify it explicitly.
 38
 39    Args:
 40        expression: Expression to qualify
 41        db: Database name
 42        catalog: Catalog name
 43        schema: A schema to populate
 44
 45    Returns:
 46        The qualified expression.
 47    """
 48    next_alias_name = name_sequence("_q_")
 49
 50    for scope in traverse_scope(expression):
 51        for derived_table in itertools.chain(scope.ctes, scope.derived_tables):
 52            if not derived_table.args.get("alias"):
 53                alias_ = next_alias_name()
 54                derived_table.set("alias", exp.TableAlias(this=exp.to_identifier(alias_)))
 55                scope.rename_source(None, alias_)
 56
 57            pivots = derived_table.args.get("pivots")
 58            if pivots and not pivots[0].alias:
 59                pivots[0].set("alias", exp.TableAlias(this=exp.to_identifier(next_alias_name())))
 60
 61        for name, source in scope.sources.items():
 62            if isinstance(source, exp.Table):
 63                if isinstance(source.this, exp.Identifier):
 64                    if not source.args.get("db"):
 65                        source.set("db", exp.to_identifier(db))
 66                    if not source.args.get("catalog"):
 67                        source.set("catalog", exp.to_identifier(catalog))
 68
 69                # Unnest joins attached in tables by appending them to the closest query
 70                for join in source.args.get("joins") or []:
 71                    scope.expression.append("joins", join)
 72
 73                source.set("joins", None)
 74                source.set("wrapped", None)
 75
 76                if not source.alias:
 77                    source = source.replace(
 78                        alias(
 79                            source, name or source.name or next_alias_name(), copy=True, table=True
 80                        )
 81                    )
 82
 83                pivots = source.args.get("pivots")
 84                if pivots and not pivots[0].alias:
 85                    pivots[0].set(
 86                        "alias", exp.TableAlias(this=exp.to_identifier(next_alias_name()))
 87                    )
 88
 89                if schema and isinstance(source.this, exp.ReadCSV):
 90                    with csv_reader(source.this) as reader:
 91                        header = next(reader)
 92                        columns = next(reader)
 93                        schema.add_table(
 94                            source, {k: type(v).__name__ for k, v in zip(header, columns)}
 95                        )
 96            elif isinstance(source, Scope) and source.is_udtf:
 97                udtf = source.expression
 98                table_alias = udtf.args.get("alias") or exp.TableAlias(
 99                    this=exp.to_identifier(next_alias_name())
100                )
101                udtf.set("alias", table_alias)
102
103                if not table_alias.name:
104                    table_alias.set("this", exp.to_identifier(next_alias_name()))
105                if isinstance(udtf, exp.Values) and not table_alias.columns:
106                    for i, e in enumerate(udtf.expressions[0].expressions):
107                        table_alias.append("columns", exp.to_identifier(f"_col_{i}"))
108
109    return expression
def qualify_tables( expression: ~E, db: Optional[str] = None, catalog: Optional[str] = None, schema: Optional[sqlglot.schema.Schema] = None) -> ~E:
 12def qualify_tables(
 13    expression: E,
 14    db: t.Optional[str] = None,
 15    catalog: t.Optional[str] = None,
 16    schema: t.Optional[Schema] = None,
 17) -> E:
 18    """
 19    Rewrite sqlglot AST to have fully qualified, unnested tables.
 20
 21    Examples:
 22        >>> import sqlglot
 23        >>> expression = sqlglot.parse_one("SELECT 1 FROM tbl")
 24        >>> qualify_tables(expression, db="db").sql()
 25        'SELECT 1 FROM db.tbl AS tbl'
 26        >>>
 27        >>> expression = sqlglot.parse_one("SELECT * FROM (tbl)")
 28        >>> qualify_tables(expression).sql()
 29        'SELECT * FROM tbl AS tbl'
 30        >>>
 31        >>> expression = sqlglot.parse_one("SELECT * FROM (tbl1 JOIN tbl2 ON id1 = id2)")
 32        >>> qualify_tables(expression).sql()
 33        'SELECT * FROM tbl1 AS tbl1 JOIN tbl2 AS tbl2 ON id1 = id2'
 34
 35    Note:
 36        This rule effectively enforces a left-to-right join order, since all joins
 37        are unnested. This means that the optimizer doesn't necessarily preserve the
 38        original join order, e.g. when parentheses are used to specify it explicitly.
 39
 40    Args:
 41        expression: Expression to qualify
 42        db: Database name
 43        catalog: Catalog name
 44        schema: A schema to populate
 45
 46    Returns:
 47        The qualified expression.
 48    """
 49    next_alias_name = name_sequence("_q_")
 50
 51    for scope in traverse_scope(expression):
 52        for derived_table in itertools.chain(scope.ctes, scope.derived_tables):
 53            if not derived_table.args.get("alias"):
 54                alias_ = next_alias_name()
 55                derived_table.set("alias", exp.TableAlias(this=exp.to_identifier(alias_)))
 56                scope.rename_source(None, alias_)
 57
 58            pivots = derived_table.args.get("pivots")
 59            if pivots and not pivots[0].alias:
 60                pivots[0].set("alias", exp.TableAlias(this=exp.to_identifier(next_alias_name())))
 61
 62        for name, source in scope.sources.items():
 63            if isinstance(source, exp.Table):
 64                if isinstance(source.this, exp.Identifier):
 65                    if not source.args.get("db"):
 66                        source.set("db", exp.to_identifier(db))
 67                    if not source.args.get("catalog"):
 68                        source.set("catalog", exp.to_identifier(catalog))
 69
 70                # Unnest joins attached in tables by appending them to the closest query
 71                for join in source.args.get("joins") or []:
 72                    scope.expression.append("joins", join)
 73
 74                source.set("joins", None)
 75                source.set("wrapped", None)
 76
 77                if not source.alias:
 78                    source = source.replace(
 79                        alias(
 80                            source, name or source.name or next_alias_name(), copy=True, table=True
 81                        )
 82                    )
 83
 84                pivots = source.args.get("pivots")
 85                if pivots and not pivots[0].alias:
 86                    pivots[0].set(
 87                        "alias", exp.TableAlias(this=exp.to_identifier(next_alias_name()))
 88                    )
 89
 90                if schema and isinstance(source.this, exp.ReadCSV):
 91                    with csv_reader(source.this) as reader:
 92                        header = next(reader)
 93                        columns = next(reader)
 94                        schema.add_table(
 95                            source, {k: type(v).__name__ for k, v in zip(header, columns)}
 96                        )
 97            elif isinstance(source, Scope) and source.is_udtf:
 98                udtf = source.expression
 99                table_alias = udtf.args.get("alias") or exp.TableAlias(
100                    this=exp.to_identifier(next_alias_name())
101                )
102                udtf.set("alias", table_alias)
103
104                if not table_alias.name:
105                    table_alias.set("this", exp.to_identifier(next_alias_name()))
106                if isinstance(udtf, exp.Values) and not table_alias.columns:
107                    for i, e in enumerate(udtf.expressions[0].expressions):
108                        table_alias.append("columns", exp.to_identifier(f"_col_{i}"))
109
110    return expression

Rewrite sqlglot AST to have fully qualified, unnested tables.

Examples:
>>> import sqlglot
>>> expression = sqlglot.parse_one("SELECT 1 FROM tbl")
>>> qualify_tables(expression, db="db").sql()
'SELECT 1 FROM db.tbl AS tbl'
>>>
>>> expression = sqlglot.parse_one("SELECT * FROM (tbl)")
>>> qualify_tables(expression).sql()
'SELECT * FROM tbl AS tbl'
>>>
>>> expression = sqlglot.parse_one("SELECT * FROM (tbl1 JOIN tbl2 ON id1 = id2)")
>>> qualify_tables(expression).sql()
'SELECT * FROM tbl1 AS tbl1 JOIN tbl2 AS tbl2 ON id1 = id2'
Note:

This rule effectively enforces a left-to-right join order, since all joins are unnested. This means that the optimizer doesn't necessarily preserve the original join order, e.g. when parentheses are used to specify it explicitly.

Arguments:
  • expression: Expression to qualify
  • db: Database name
  • catalog: Catalog name
  • schema: A schema to populate
Returns:

The qualified expression.