sqlglot.dialects.duckdb
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.expressions import DATA_TYPE 7from sqlglot.dialects.dialect import ( 8 Dialect, 9 JSON_EXTRACT_TYPE, 10 NormalizationStrategy, 11 Version, 12 approx_count_distinct_sql, 13 arrow_json_extract_sql, 14 binary_from_function, 15 bool_xor_sql, 16 build_default_decimal_type, 17 count_if_to_sum, 18 date_trunc_to_time, 19 datestrtodate_sql, 20 no_datetime_sql, 21 encode_decode_sql, 22 build_formatted_time, 23 inline_array_unless_query, 24 no_comment_column_constraint_sql, 25 no_time_sql, 26 no_timestamp_sql, 27 pivot_column_names, 28 rename_func, 29 remove_from_array_using_filter, 30 strposition_sql, 31 str_to_time_sql, 32 timestamptrunc_sql, 33 timestrtotime_sql, 34 unit_to_var, 35 unit_to_str, 36 sha256_sql, 37 build_regexp_extract, 38 explode_to_unnest_sql, 39 no_make_interval_sql, 40 groupconcat_sql, 41) 42from sqlglot.generator import unsupported_args 43from sqlglot.helper import seq_get 44from sqlglot.tokens import TokenType 45from sqlglot.parser import binary_range_parser 46 47DATETIME_DELTA = t.Union[ 48 exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd, exp.DateSub, exp.DatetimeSub 49] 50 51 52def _date_delta_sql(self: DuckDB.Generator, expression: DATETIME_DELTA) -> str: 53 this = expression.this 54 unit = unit_to_var(expression) 55 op = ( 56 "+" 57 if isinstance(expression, (exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd)) 58 else "-" 59 ) 60 61 to_type: t.Optional[DATA_TYPE] = None 62 if isinstance(expression, exp.TsOrDsAdd): 63 to_type = expression.return_type 64 elif this.is_string: 65 # Cast string literals (i.e function parameters) to the appropriate type for +/- interval to work 66 to_type = ( 67 exp.DataType.Type.DATETIME 68 if isinstance(expression, (exp.DatetimeAdd, exp.DatetimeSub)) 69 else exp.DataType.Type.DATE 70 ) 71 72 this = exp.cast(this, to_type) if to_type else this 73 74 expr = expression.expression 75 interval = expr if isinstance(expr, exp.Interval) else exp.Interval(this=expr, unit=unit) 76 77 return f"{self.sql(this)} {op} {self.sql(interval)}" 78 79 80# BigQuery -> DuckDB conversion for the DATE function 81def _date_sql(self: DuckDB.Generator, expression: exp.Date) -> str: 82 result = f"CAST({self.sql(expression, 'this')} AS DATE)" 83 zone = self.sql(expression, "zone") 84 85 if zone: 86 date_str = self.func("STRFTIME", result, "'%d/%m/%Y'") 87 date_str = f"{date_str} || ' ' || {zone}" 88 89 # This will create a TIMESTAMP with time zone information 90 result = self.func("STRPTIME", date_str, "'%d/%m/%Y %Z'") 91 92 return result 93 94 95# BigQuery -> DuckDB conversion for the TIME_DIFF function 96def _timediff_sql(self: DuckDB.Generator, expression: exp.TimeDiff) -> str: 97 this = exp.cast(expression.this, exp.DataType.Type.TIME) 98 expr = exp.cast(expression.expression, exp.DataType.Type.TIME) 99 100 # Although the 2 dialects share similar signatures, BQ seems to inverse 101 # the sign of the result so the start/end time operands are flipped 102 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 103 104 105@unsupported_args(("expression", "DuckDB's ARRAY_SORT does not support a comparator.")) 106def _array_sort_sql(self: DuckDB.Generator, expression: exp.ArraySort) -> str: 107 return self.func("ARRAY_SORT", expression.this) 108 109 110def _sort_array_sql(self: DuckDB.Generator, expression: exp.SortArray) -> str: 111 name = "ARRAY_REVERSE_SORT" if expression.args.get("asc") == exp.false() else "ARRAY_SORT" 112 return self.func(name, expression.this) 113 114 115def _build_sort_array_desc(args: t.List) -> exp.Expression: 116 return exp.SortArray(this=seq_get(args, 0), asc=exp.false()) 117 118 119def _build_date_diff(args: t.List) -> exp.Expression: 120 return exp.DateDiff(this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)) 121 122 123def _build_generate_series(end_exclusive: bool = False) -> t.Callable[[t.List], exp.GenerateSeries]: 124 def _builder(args: t.List) -> exp.GenerateSeries: 125 # Check https://duckdb.org/docs/sql/functions/nested.html#range-functions 126 if len(args) == 1: 127 # DuckDB uses 0 as a default for the series' start when it's omitted 128 args.insert(0, exp.Literal.number("0")) 129 130 gen_series = exp.GenerateSeries.from_arg_list(args) 131 gen_series.set("is_end_exclusive", end_exclusive) 132 133 return gen_series 134 135 return _builder 136 137 138def _build_make_timestamp(args: t.List) -> exp.Expression: 139 if len(args) == 1: 140 return exp.UnixToTime(this=seq_get(args, 0), scale=exp.UnixToTime.MICROS) 141 142 return exp.TimestampFromParts( 143 year=seq_get(args, 0), 144 month=seq_get(args, 1), 145 day=seq_get(args, 2), 146 hour=seq_get(args, 3), 147 min=seq_get(args, 4), 148 sec=seq_get(args, 5), 149 ) 150 151 152def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[DuckDB.Parser], exp.Show]: 153 def _parse(self: DuckDB.Parser) -> exp.Show: 154 return self._parse_show_duckdb(*args, **kwargs) 155 156 return _parse 157 158 159def _struct_sql(self: DuckDB.Generator, expression: exp.Struct) -> str: 160 args: t.List[str] = [] 161 162 # BigQuery allows inline construction such as "STRUCT<a STRING, b INTEGER>('str', 1)" which is 163 # canonicalized to "ROW('str', 1) AS STRUCT(a TEXT, b INT)" in DuckDB 164 # The transformation to ROW will take place if: 165 # 1. The STRUCT itself does not have proper fields (key := value) as a "proper" STRUCT would 166 # 2. A cast to STRUCT / ARRAY of STRUCTs is found 167 ancestor_cast = expression.find_ancestor(exp.Cast) 168 is_bq_inline_struct = ( 169 (expression.find(exp.PropertyEQ) is None) 170 and ancestor_cast 171 and any( 172 casted_type.is_type(exp.DataType.Type.STRUCT) 173 for casted_type in ancestor_cast.find_all(exp.DataType) 174 ) 175 ) 176 177 for i, expr in enumerate(expression.expressions): 178 is_property_eq = isinstance(expr, exp.PropertyEQ) 179 value = expr.expression if is_property_eq else expr 180 181 if is_bq_inline_struct: 182 args.append(self.sql(value)) 183 else: 184 key = expr.name if is_property_eq else f"_{i}" 185 args.append(f"{self.sql(exp.Literal.string(key))}: {self.sql(value)}") 186 187 csv_args = ", ".join(args) 188 189 return f"ROW({csv_args})" if is_bq_inline_struct else f"{{{csv_args}}}" 190 191 192def _datatype_sql(self: DuckDB.Generator, expression: exp.DataType) -> str: 193 if expression.is_type("array"): 194 return f"{self.expressions(expression, flat=True)}[{self.expressions(expression, key='values', flat=True)}]" 195 196 # Modifiers are not supported for TIME, [TIME | TIMESTAMP] WITH TIME ZONE 197 if expression.is_type( 198 exp.DataType.Type.TIME, exp.DataType.Type.TIMETZ, exp.DataType.Type.TIMESTAMPTZ 199 ): 200 return expression.this.value 201 202 return self.datatype_sql(expression) 203 204 205def _json_format_sql(self: DuckDB.Generator, expression: exp.JSONFormat) -> str: 206 sql = self.func("TO_JSON", expression.this, expression.args.get("options")) 207 return f"CAST({sql} AS TEXT)" 208 209 210def _unix_to_time_sql(self: DuckDB.Generator, expression: exp.UnixToTime) -> str: 211 scale = expression.args.get("scale") 212 timestamp = expression.this 213 214 if scale in (None, exp.UnixToTime.SECONDS): 215 return self.func("TO_TIMESTAMP", timestamp) 216 if scale == exp.UnixToTime.MILLIS: 217 return self.func("EPOCH_MS", timestamp) 218 if scale == exp.UnixToTime.MICROS: 219 return self.func("MAKE_TIMESTAMP", timestamp) 220 221 return self.func("TO_TIMESTAMP", exp.Div(this=timestamp, expression=exp.func("POW", 10, scale))) 222 223 224WRAPPED_JSON_EXTRACT_EXPRESSIONS = (exp.Binary, exp.Bracket, exp.In) 225 226 227def _arrow_json_extract_sql(self: DuckDB.Generator, expression: JSON_EXTRACT_TYPE) -> str: 228 arrow_sql = arrow_json_extract_sql(self, expression) 229 if not expression.same_parent and isinstance( 230 expression.parent, WRAPPED_JSON_EXTRACT_EXPRESSIONS 231 ): 232 arrow_sql = self.wrap(arrow_sql) 233 return arrow_sql 234 235 236def _implicit_datetime_cast( 237 arg: t.Optional[exp.Expression], type: exp.DataType.Type = exp.DataType.Type.DATE 238) -> t.Optional[exp.Expression]: 239 return exp.cast(arg, type) if isinstance(arg, exp.Literal) else arg 240 241 242def _date_diff_sql(self: DuckDB.Generator, expression: exp.DateDiff) -> str: 243 this = _implicit_datetime_cast(expression.this) 244 expr = _implicit_datetime_cast(expression.expression) 245 246 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 247 248 249def _generate_datetime_array_sql( 250 self: DuckDB.Generator, expression: t.Union[exp.GenerateDateArray, exp.GenerateTimestampArray] 251) -> str: 252 is_generate_date_array = isinstance(expression, exp.GenerateDateArray) 253 254 type = exp.DataType.Type.DATE if is_generate_date_array else exp.DataType.Type.TIMESTAMP 255 start = _implicit_datetime_cast(expression.args.get("start"), type=type) 256 end = _implicit_datetime_cast(expression.args.get("end"), type=type) 257 258 # BQ's GENERATE_DATE_ARRAY & GENERATE_TIMESTAMP_ARRAY are transformed to DuckDB'S GENERATE_SERIES 259 gen_series: t.Union[exp.GenerateSeries, exp.Cast] = exp.GenerateSeries( 260 start=start, end=end, step=expression.args.get("step") 261 ) 262 263 if is_generate_date_array: 264 # The GENERATE_SERIES result type is TIMESTAMP array, so to match BQ's semantics for 265 # GENERATE_DATE_ARRAY we must cast it back to DATE array 266 gen_series = exp.cast(gen_series, exp.DataType.build("ARRAY<DATE>")) 267 268 return self.sql(gen_series) 269 270 271def _json_extract_value_array_sql( 272 self: DuckDB.Generator, expression: exp.JSONValueArray | exp.JSONExtractArray 273) -> str: 274 json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression) 275 data_type = "ARRAY<STRING>" if isinstance(expression, exp.JSONValueArray) else "ARRAY<JSON>" 276 return self.sql(exp.cast(json_extract, to=exp.DataType.build(data_type))) 277 278 279class DuckDB(Dialect): 280 NULL_ORDERING = "nulls_are_last" 281 SUPPORTS_USER_DEFINED_TYPES = True 282 SAFE_DIVISION = True 283 INDEX_OFFSET = 1 284 CONCAT_COALESCE = True 285 SUPPORTS_ORDER_BY_ALL = True 286 SUPPORTS_FIXED_SIZE_ARRAYS = True 287 STRICT_JSON_PATH_SYNTAX = False 288 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 289 290 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 291 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 292 293 DATE_PART_MAPPING = { 294 **Dialect.DATE_PART_MAPPING, 295 "DAYOFWEEKISO": "ISODOW", 296 } 297 DATE_PART_MAPPING.pop("WEEKDAY") 298 299 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 300 if isinstance(path, exp.Literal): 301 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 302 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 303 # This check ensures we'll avoid trying to parse these as JSON paths, which can 304 # either result in a noisy warning or in an invalid representation of the path. 305 path_text = path.name 306 if path_text.startswith("/") or "[#" in path_text: 307 return path 308 309 return super().to_json_path(path) 310 311 class Tokenizer(tokens.Tokenizer): 312 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 313 HEREDOC_STRINGS = ["$"] 314 315 HEREDOC_TAG_IS_IDENTIFIER = True 316 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 317 318 KEYWORDS = { 319 **tokens.Tokenizer.KEYWORDS, 320 "//": TokenType.DIV, 321 "**": TokenType.DSTAR, 322 "^@": TokenType.CARET_AT, 323 "@>": TokenType.AT_GT, 324 "<@": TokenType.LT_AT, 325 "ATTACH": TokenType.ATTACH, 326 "BINARY": TokenType.VARBINARY, 327 "BITSTRING": TokenType.BIT, 328 "BPCHAR": TokenType.TEXT, 329 "CHAR": TokenType.TEXT, 330 "DATETIME": TokenType.TIMESTAMPNTZ, 331 "DETACH": TokenType.DETACH, 332 "EXCLUDE": TokenType.EXCEPT, 333 "LOGICAL": TokenType.BOOLEAN, 334 "ONLY": TokenType.ONLY, 335 "PIVOT_WIDER": TokenType.PIVOT, 336 "POSITIONAL": TokenType.POSITIONAL, 337 "SIGNED": TokenType.INT, 338 "STRING": TokenType.TEXT, 339 "SUMMARIZE": TokenType.SUMMARIZE, 340 "TIMESTAMP": TokenType.TIMESTAMPNTZ, 341 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 342 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 343 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 344 "TIMESTAMP_US": TokenType.TIMESTAMP, 345 "UBIGINT": TokenType.UBIGINT, 346 "UINTEGER": TokenType.UINT, 347 "USMALLINT": TokenType.USMALLINT, 348 "UTINYINT": TokenType.UTINYINT, 349 "VARCHAR": TokenType.TEXT, 350 } 351 KEYWORDS.pop("/*+") 352 353 SINGLE_TOKENS = { 354 **tokens.Tokenizer.SINGLE_TOKENS, 355 "$": TokenType.PARAMETER, 356 } 357 358 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 359 360 class Parser(parser.Parser): 361 BITWISE = { 362 **parser.Parser.BITWISE, 363 TokenType.TILDA: exp.RegexpLike, 364 } 365 BITWISE.pop(TokenType.CARET) 366 367 RANGE_PARSERS = { 368 **parser.Parser.RANGE_PARSERS, 369 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 370 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 371 } 372 373 EXPONENT = { 374 **parser.Parser.EXPONENT, 375 TokenType.CARET: exp.Pow, 376 TokenType.DSTAR: exp.Pow, 377 } 378 379 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 380 381 SHOW_PARSERS = { 382 "TABLES": _show_parser("TABLES"), 383 "ALL TABLES": _show_parser("ALL TABLES"), 384 } 385 386 FUNCTIONS = { 387 **parser.Parser.FUNCTIONS, 388 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 389 "ARRAY_SORT": exp.SortArray.from_arg_list, 390 "DATEDIFF": _build_date_diff, 391 "DATE_DIFF": _build_date_diff, 392 "DATE_TRUNC": date_trunc_to_time, 393 "DATETRUNC": date_trunc_to_time, 394 "DECODE": lambda args: exp.Decode( 395 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 396 ), 397 "EDITDIST3": exp.Levenshtein.from_arg_list, 398 "ENCODE": lambda args: exp.Encode( 399 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 400 ), 401 "EPOCH": exp.TimeToUnix.from_arg_list, 402 "EPOCH_MS": lambda args: exp.UnixToTime( 403 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 404 ), 405 "GENERATE_SERIES": _build_generate_series(), 406 "JSON": exp.ParseJSON.from_arg_list, 407 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 408 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 409 "LIST_HAS": exp.ArrayContains.from_arg_list, 410 "LIST_REVERSE_SORT": _build_sort_array_desc, 411 "LIST_SORT": exp.SortArray.from_arg_list, 412 "LIST_VALUE": lambda args: exp.Array(expressions=args), 413 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 414 "MAKE_TIMESTAMP": _build_make_timestamp, 415 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 416 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 417 "RANGE": _build_generate_series(end_exclusive=True), 418 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 419 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 420 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 421 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 422 this=seq_get(args, 0), 423 expression=seq_get(args, 1), 424 replacement=seq_get(args, 2), 425 modifiers=seq_get(args, 3), 426 ), 427 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 428 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 429 "STRING_SPLIT": exp.Split.from_arg_list, 430 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 431 "STRING_TO_ARRAY": exp.Split.from_arg_list, 432 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 433 "STRUCT_PACK": exp.Struct.from_arg_list, 434 "STR_SPLIT": exp.Split.from_arg_list, 435 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 436 "TIME_BUCKET": exp.DateBin.from_arg_list, 437 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 438 "UNNEST": exp.Explode.from_arg_list, 439 "XOR": binary_from_function(exp.BitwiseXor), 440 } 441 442 FUNCTIONS.pop("DATE_SUB") 443 FUNCTIONS.pop("GLOB") 444 445 FUNCTION_PARSERS = { 446 **parser.Parser.FUNCTION_PARSERS, 447 **dict.fromkeys( 448 ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() 449 ), 450 } 451 FUNCTION_PARSERS.pop("DECODE") 452 453 NO_PAREN_FUNCTION_PARSERS = { 454 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 455 "MAP": lambda self: self._parse_map(), 456 "@": lambda self: exp.Abs(this=self._parse_bitwise()), 457 } 458 459 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 460 TokenType.SEMI, 461 TokenType.ANTI, 462 } 463 464 PLACEHOLDER_PARSERS = { 465 **parser.Parser.PLACEHOLDER_PARSERS, 466 TokenType.PARAMETER: lambda self: ( 467 self.expression(exp.Placeholder, this=self._prev.text) 468 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 469 else None 470 ), 471 } 472 473 TYPE_CONVERTERS = { 474 # https://duckdb.org/docs/sql/data_types/numeric 475 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 476 # https://duckdb.org/docs/sql/data_types/text 477 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 478 } 479 480 STATEMENT_PARSERS = { 481 **parser.Parser.STATEMENT_PARSERS, 482 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 483 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 484 TokenType.SHOW: lambda self: self._parse_show(), 485 } 486 487 def _parse_expression(self) -> t.Optional[exp.Expression]: 488 # DuckDB supports prefix aliases, e.g. foo: 1 489 if self._next and self._next.token_type == TokenType.COLON: 490 alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) 491 self._match(TokenType.COLON) 492 comments = self._prev_comments or [] 493 494 this = self._parse_assignment() 495 if isinstance(this, exp.Expression): 496 # Moves the comment next to the alias in `alias: expr /* comment */` 497 comments += this.pop_comments() or [] 498 499 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 500 501 return super()._parse_expression() 502 503 def _parse_table( 504 self, 505 schema: bool = False, 506 joins: bool = False, 507 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 508 parse_bracket: bool = False, 509 is_db_reference: bool = False, 510 parse_partition: bool = False, 511 consume_pipe: bool = False, 512 ) -> t.Optional[exp.Expression]: 513 # DuckDB supports prefix aliases, e.g. FROM foo: bar 514 if self._next and self._next.token_type == TokenType.COLON: 515 alias = self._parse_table_alias( 516 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 517 ) 518 self._match(TokenType.COLON) 519 comments = self._prev_comments or [] 520 else: 521 alias = None 522 comments = [] 523 524 table = super()._parse_table( 525 schema=schema, 526 joins=joins, 527 alias_tokens=alias_tokens, 528 parse_bracket=parse_bracket, 529 is_db_reference=is_db_reference, 530 parse_partition=parse_partition, 531 ) 532 if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias): 533 # Moves the comment next to the alias in `alias: table /* comment */` 534 comments += table.pop_comments() or [] 535 alias.comments = alias.pop_comments() + comments 536 table.set("alias", alias) 537 538 return table 539 540 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 541 # https://duckdb.org/docs/sql/samples.html 542 sample = super()._parse_table_sample(as_modifier=as_modifier) 543 if sample and not sample.args.get("method"): 544 if sample.args.get("size"): 545 sample.set("method", exp.var("RESERVOIR")) 546 else: 547 sample.set("method", exp.var("SYSTEM")) 548 549 return sample 550 551 def _parse_bracket( 552 self, this: t.Optional[exp.Expression] = None 553 ) -> t.Optional[exp.Expression]: 554 bracket = super()._parse_bracket(this) 555 556 if self.dialect.version < Version("1.2.0") and isinstance(bracket, exp.Bracket): 557 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 558 bracket.set("returns_list_for_maps", True) 559 560 return bracket 561 562 def _parse_map(self) -> exp.ToMap | exp.Map: 563 if self._match(TokenType.L_BRACE, advance=False): 564 return self.expression(exp.ToMap, this=self._parse_bracket()) 565 566 args = self._parse_wrapped_csv(self._parse_assignment) 567 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 568 569 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 570 return self._parse_field_def() 571 572 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 573 if len(aggregations) == 1: 574 return super()._pivot_column_names(aggregations) 575 return pivot_column_names(aggregations, dialect="duckdb") 576 577 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 578 def _parse_attach_option() -> exp.AttachOption: 579 return self.expression( 580 exp.AttachOption, 581 this=self._parse_var(any_token=True), 582 expression=self._parse_field(any_token=True), 583 ) 584 585 self._match(TokenType.DATABASE) 586 exists = self._parse_exists(not_=is_attach) 587 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 588 589 if self._match(TokenType.L_PAREN, advance=False): 590 expressions = self._parse_wrapped_csv(_parse_attach_option) 591 else: 592 expressions = None 593 594 return ( 595 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 596 if is_attach 597 else self.expression(exp.Detach, this=this, exists=exists) 598 ) 599 600 def _parse_show_duckdb(self, this: str) -> exp.Show: 601 return self.expression(exp.Show, this=this) 602 603 class Generator(generator.Generator): 604 PARAMETER_TOKEN = "$" 605 NAMED_PLACEHOLDER_TOKEN = "$" 606 JOIN_HINTS = False 607 TABLE_HINTS = False 608 QUERY_HINTS = False 609 LIMIT_FETCH = "LIMIT" 610 STRUCT_DELIMITER = ("(", ")") 611 RENAME_TABLE_WITH_DB = False 612 NVL2_SUPPORTED = False 613 SEMI_ANTI_JOIN_WITH_SIDE = False 614 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 615 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 616 LAST_DAY_SUPPORTS_DATE_PART = False 617 JSON_KEY_VALUE_PAIR_SEP = "," 618 IGNORE_NULLS_IN_FUNC = True 619 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 620 SUPPORTS_CREATE_TABLE_LIKE = False 621 MULTI_ARG_DISTINCT = False 622 CAN_IMPLEMENT_ARRAY_ANY = True 623 SUPPORTS_TO_NUMBER = False 624 SUPPORTS_WINDOW_EXCLUDE = True 625 COPY_HAS_INTO_KEYWORD = False 626 STAR_EXCEPT = "EXCLUDE" 627 PAD_FILL_PATTERN_IS_REQUIRED = True 628 ARRAY_CONCAT_IS_VAR_LEN = False 629 ARRAY_SIZE_DIM_REQUIRED = False 630 NORMALIZE_EXTRACT_DATE_PARTS = True 631 632 TRANSFORMS = { 633 **generator.Generator.TRANSFORMS, 634 exp.ApproxDistinct: approx_count_distinct_sql, 635 exp.Array: inline_array_unless_query, 636 exp.ArrayFilter: rename_func("LIST_FILTER"), 637 exp.ArrayRemove: remove_from_array_using_filter, 638 exp.ArraySort: _array_sort_sql, 639 exp.ArraySum: rename_func("LIST_SUM"), 640 exp.BitwiseXor: rename_func("XOR"), 641 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 642 exp.CurrentDate: lambda *_: "CURRENT_DATE", 643 exp.CurrentTime: lambda *_: "CURRENT_TIME", 644 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 645 exp.DayOfMonth: rename_func("DAYOFMONTH"), 646 exp.DayOfWeek: rename_func("DAYOFWEEK"), 647 exp.DayOfWeekIso: rename_func("ISODOW"), 648 exp.DayOfYear: rename_func("DAYOFYEAR"), 649 exp.DataType: _datatype_sql, 650 exp.Date: _date_sql, 651 exp.DateAdd: _date_delta_sql, 652 exp.DateFromParts: rename_func("MAKE_DATE"), 653 exp.DateSub: _date_delta_sql, 654 exp.DateDiff: _date_diff_sql, 655 exp.DateStrToDate: datestrtodate_sql, 656 exp.Datetime: no_datetime_sql, 657 exp.DatetimeSub: _date_delta_sql, 658 exp.DatetimeAdd: _date_delta_sql, 659 exp.DateToDi: lambda self, 660 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 661 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 662 exp.DiToDate: lambda self, 663 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 664 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 665 exp.GenerateDateArray: _generate_datetime_array_sql, 666 exp.GenerateTimestampArray: _generate_datetime_array_sql, 667 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False), 668 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 669 exp.Explode: rename_func("UNNEST"), 670 exp.IntDiv: lambda self, e: self.binary(e, "//"), 671 exp.IsInf: rename_func("ISINF"), 672 exp.IsNan: rename_func("ISNAN"), 673 exp.JSONBExists: rename_func("JSON_EXISTS"), 674 exp.JSONExtract: _arrow_json_extract_sql, 675 exp.JSONExtractArray: _json_extract_value_array_sql, 676 exp.JSONExtractScalar: _arrow_json_extract_sql, 677 exp.JSONFormat: _json_format_sql, 678 exp.JSONValueArray: _json_extract_value_array_sql, 679 exp.Lateral: explode_to_unnest_sql, 680 exp.LogicalOr: rename_func("BOOL_OR"), 681 exp.LogicalAnd: rename_func("BOOL_AND"), 682 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 683 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 684 exp.MonthsBetween: lambda self, e: self.func( 685 "DATEDIFF", 686 "'month'", 687 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 688 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 689 ), 690 exp.PercentileCont: rename_func("QUANTILE_CONT"), 691 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 692 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 693 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 694 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 695 exp.RegexpReplace: lambda self, e: self.func( 696 "REGEXP_REPLACE", 697 e.this, 698 e.expression, 699 e.args.get("replacement"), 700 e.args.get("modifiers"), 701 ), 702 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 703 exp.RegexpILike: lambda self, e: self.func( 704 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 705 ), 706 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 707 exp.Return: lambda self, e: self.sql(e, "this"), 708 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 709 exp.Rand: rename_func("RANDOM"), 710 exp.SHA: rename_func("SHA1"), 711 exp.SHA2: sha256_sql, 712 exp.Split: rename_func("STR_SPLIT"), 713 exp.SortArray: _sort_array_sql, 714 exp.StrPosition: strposition_sql, 715 exp.StrToUnix: lambda self, e: self.func( 716 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 717 ), 718 exp.Struct: _struct_sql, 719 exp.Transform: rename_func("LIST_TRANSFORM"), 720 exp.TimeAdd: _date_delta_sql, 721 exp.Time: no_time_sql, 722 exp.TimeDiff: _timediff_sql, 723 exp.Timestamp: no_timestamp_sql, 724 exp.TimestampDiff: lambda self, e: self.func( 725 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 726 ), 727 exp.TimestampTrunc: timestamptrunc_sql(), 728 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 729 exp.TimeStrToTime: timestrtotime_sql, 730 exp.TimeStrToUnix: lambda self, e: self.func( 731 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 732 ), 733 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 734 exp.TimeToUnix: rename_func("EPOCH"), 735 exp.TsOrDiToDi: lambda self, 736 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 737 exp.TsOrDsAdd: _date_delta_sql, 738 exp.TsOrDsDiff: lambda self, e: self.func( 739 "DATE_DIFF", 740 f"'{e.args.get('unit') or 'DAY'}'", 741 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 742 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 743 ), 744 exp.UnixToStr: lambda self, e: self.func( 745 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 746 ), 747 exp.DatetimeTrunc: lambda self, e: self.func( 748 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 749 ), 750 exp.UnixToTime: _unix_to_time_sql, 751 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 752 exp.VariancePop: rename_func("VAR_POP"), 753 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 754 exp.Xor: bool_xor_sql, 755 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 756 rename_func("LEVENSHTEIN") 757 ), 758 exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"), 759 exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"), 760 exp.DateBin: rename_func("TIME_BUCKET"), 761 } 762 763 SUPPORTED_JSON_PATH_PARTS = { 764 exp.JSONPathKey, 765 exp.JSONPathRoot, 766 exp.JSONPathSubscript, 767 exp.JSONPathWildcard, 768 } 769 770 TYPE_MAPPING = { 771 **generator.Generator.TYPE_MAPPING, 772 exp.DataType.Type.BINARY: "BLOB", 773 exp.DataType.Type.BPCHAR: "TEXT", 774 exp.DataType.Type.CHAR: "TEXT", 775 exp.DataType.Type.DATETIME: "TIMESTAMP", 776 exp.DataType.Type.FLOAT: "REAL", 777 exp.DataType.Type.JSONB: "JSON", 778 exp.DataType.Type.NCHAR: "TEXT", 779 exp.DataType.Type.NVARCHAR: "TEXT", 780 exp.DataType.Type.UINT: "UINTEGER", 781 exp.DataType.Type.VARBINARY: "BLOB", 782 exp.DataType.Type.ROWVERSION: "BLOB", 783 exp.DataType.Type.VARCHAR: "TEXT", 784 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 785 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 786 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 787 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 788 } 789 790 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 791 RESERVED_KEYWORDS = { 792 "array", 793 "analyse", 794 "union", 795 "all", 796 "when", 797 "in_p", 798 "default", 799 "create_p", 800 "window", 801 "asymmetric", 802 "to", 803 "else", 804 "localtime", 805 "from", 806 "end_p", 807 "select", 808 "current_date", 809 "foreign", 810 "with", 811 "grant", 812 "session_user", 813 "or", 814 "except", 815 "references", 816 "fetch", 817 "limit", 818 "group_p", 819 "leading", 820 "into", 821 "collate", 822 "offset", 823 "do", 824 "then", 825 "localtimestamp", 826 "check_p", 827 "lateral_p", 828 "current_role", 829 "where", 830 "asc_p", 831 "placing", 832 "desc_p", 833 "user", 834 "unique", 835 "initially", 836 "column", 837 "both", 838 "some", 839 "as", 840 "any", 841 "only", 842 "deferrable", 843 "null_p", 844 "current_time", 845 "true_p", 846 "table", 847 "case", 848 "trailing", 849 "variadic", 850 "for", 851 "on", 852 "distinct", 853 "false_p", 854 "not", 855 "constraint", 856 "current_timestamp", 857 "returning", 858 "primary", 859 "intersect", 860 "having", 861 "analyze", 862 "current_user", 863 "and", 864 "cast", 865 "symmetric", 866 "using", 867 "order", 868 "current_catalog", 869 } 870 871 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 872 873 # DuckDB doesn't generally support CREATE TABLE .. properties 874 # https://duckdb.org/docs/sql/statements/create_table.html 875 PROPERTIES_LOCATION = { 876 prop: exp.Properties.Location.UNSUPPORTED 877 for prop in generator.Generator.PROPERTIES_LOCATION 878 } 879 880 # There are a few exceptions (e.g. temporary tables) which are supported or 881 # can be transpiled to DuckDB, so we explicitly override them accordingly 882 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 883 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 884 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 885 886 IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS = ( 887 exp.FirstValue, 888 exp.Lag, 889 exp.LastValue, 890 exp.Lead, 891 exp.NthValue, 892 ) 893 894 def show_sql(self, expression: exp.Show) -> str: 895 return f"SHOW {expression.name}" 896 897 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 898 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 899 900 def strtotime_sql(self, expression: exp.StrToTime) -> str: 901 if expression.args.get("safe"): 902 formatted_time = self.format_time(expression) 903 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 904 return str_to_time_sql(self, expression) 905 906 def strtodate_sql(self, expression: exp.StrToDate) -> str: 907 if expression.args.get("safe"): 908 formatted_time = self.format_time(expression) 909 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 910 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 911 912 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 913 arg = expression.this 914 if expression.args.get("safe"): 915 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 916 return self.func("JSON", arg) 917 918 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 919 nano = expression.args.get("nano") 920 if nano is not None: 921 expression.set( 922 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 923 ) 924 925 return rename_func("MAKE_TIME")(self, expression) 926 927 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 928 sec = expression.args["sec"] 929 930 milli = expression.args.get("milli") 931 if milli is not None: 932 sec += milli.pop() / exp.Literal.number(1000.0) 933 934 nano = expression.args.get("nano") 935 if nano is not None: 936 sec += nano.pop() / exp.Literal.number(1000000000.0) 937 938 if milli or nano: 939 expression.set("sec", sec) 940 941 return rename_func("MAKE_TIMESTAMP")(self, expression) 942 943 def tablesample_sql( 944 self, 945 expression: exp.TableSample, 946 tablesample_keyword: t.Optional[str] = None, 947 ) -> str: 948 if not isinstance(expression.parent, exp.Select): 949 # This sample clause only applies to a single source, not the entire resulting relation 950 tablesample_keyword = "TABLESAMPLE" 951 952 if expression.args.get("size"): 953 method = expression.args.get("method") 954 if method and method.name.upper() != "RESERVOIR": 955 self.unsupported( 956 f"Sampling method {method} is not supported with a discrete sample count, " 957 "defaulting to reservoir sampling" 958 ) 959 expression.set("method", exp.var("RESERVOIR")) 960 961 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 962 963 def interval_sql(self, expression: exp.Interval) -> str: 964 multiplier: t.Optional[int] = None 965 unit = expression.text("unit").lower() 966 967 if unit.startswith("week"): 968 multiplier = 7 969 if unit.startswith("quarter"): 970 multiplier = 90 971 972 if multiplier: 973 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 974 975 return super().interval_sql(expression) 976 977 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 978 if isinstance(expression.parent, exp.UserDefinedFunction): 979 return self.sql(expression, "this") 980 return super().columndef_sql(expression, sep) 981 982 def join_sql(self, expression: exp.Join) -> str: 983 if ( 984 expression.side == "LEFT" 985 and not expression.args.get("on") 986 and isinstance(expression.this, exp.Unnest) 987 ): 988 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 989 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 990 return super().join_sql(expression.on(exp.true())) 991 992 return super().join_sql(expression) 993 994 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 995 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 996 if expression.args.get("is_end_exclusive"): 997 return rename_func("RANGE")(self, expression) 998 999 return self.function_fallback_sql(expression) 1000 1001 def countif_sql(self, expression: exp.CountIf) -> str: 1002 if self.dialect.version >= Version("1.2"): 1003 return self.function_fallback_sql(expression) 1004 1005 # https://github.com/tobymao/sqlglot/pull/4749 1006 return count_if_to_sum(self, expression) 1007 1008 def bracket_sql(self, expression: exp.Bracket) -> str: 1009 if self.dialect.version >= Version("1.2"): 1010 return super().bracket_sql(expression) 1011 1012 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1013 this = expression.this 1014 if isinstance(this, exp.Array): 1015 this.replace(exp.paren(this)) 1016 1017 bracket = super().bracket_sql(expression) 1018 1019 if not expression.args.get("returns_list_for_maps"): 1020 if not this.type: 1021 from sqlglot.optimizer.annotate_types import annotate_types 1022 1023 this = annotate_types(this, dialect=self.dialect) 1024 1025 if this.is_type(exp.DataType.Type.MAP): 1026 bracket = f"({bracket})[1]" 1027 1028 return bracket 1029 1030 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1031 expression_sql = self.sql(expression, "expression") 1032 1033 func = expression.this 1034 if isinstance(func, exp.PERCENTILES): 1035 # Make the order key the first arg and slide the fraction to the right 1036 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1037 order_col = expression.find(exp.Ordered) 1038 if order_col: 1039 func.set("expression", func.this) 1040 func.set("this", order_col.this) 1041 1042 this = self.sql(expression, "this").rstrip(")") 1043 1044 return f"{this}{expression_sql})" 1045 1046 def length_sql(self, expression: exp.Length) -> str: 1047 arg = expression.this 1048 1049 # Dialects like BQ and Snowflake also accept binary values as args, so 1050 # DDB will attempt to infer the type or resort to case/when resolution 1051 if not expression.args.get("binary") or arg.is_string: 1052 return self.func("LENGTH", arg) 1053 1054 if not arg.type: 1055 from sqlglot.optimizer.annotate_types import annotate_types 1056 1057 arg = annotate_types(arg, dialect=self.dialect) 1058 1059 if arg.is_type(*exp.DataType.TEXT_TYPES): 1060 return self.func("LENGTH", arg) 1061 1062 # We need these casts to make duckdb's static type checker happy 1063 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1064 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1065 1066 case = ( 1067 exp.case(self.func("TYPEOF", arg)) 1068 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1069 .else_( 1070 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1071 ) # anonymous to break length_sql recursion 1072 ) 1073 1074 return self.sql(case) 1075 1076 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1077 this = expression.this 1078 key = expression.args.get("key") 1079 key_sql = key.name if isinstance(key, exp.Expression) else "" 1080 value_sql = self.sql(expression, "value") 1081 1082 kv_sql = f"{key_sql} := {value_sql}" 1083 1084 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1085 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1086 if isinstance(this, exp.Struct) and not this.expressions: 1087 return self.func("STRUCT_PACK", kv_sql) 1088 1089 return self.func("STRUCT_INSERT", this, kv_sql) 1090 1091 def unnest_sql(self, expression: exp.Unnest) -> str: 1092 explode_array = expression.args.get("explode_array") 1093 if explode_array: 1094 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1095 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1096 expression.expressions.append( 1097 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1098 ) 1099 1100 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1101 alias = expression.args.get("alias") 1102 if alias: 1103 expression.set("alias", None) 1104 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 1105 1106 unnest_sql = super().unnest_sql(expression) 1107 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1108 return self.sql(select) 1109 1110 return super().unnest_sql(expression) 1111 1112 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1113 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1114 # DuckDB should render IGNORE NULLS only for the general-purpose 1115 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1116 return super().ignorenulls_sql(expression) 1117 1118 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 1119 return self.sql(expression, "this") 1120 1121 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 1122 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1123 # DuckDB should render RESPECT NULLS only for the general-purpose 1124 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 1125 return super().respectnulls_sql(expression) 1126 1127 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 1128 return self.sql(expression, "this") 1129 1130 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1131 this = self.sql(expression, "this") 1132 null_text = self.sql(expression, "null") 1133 1134 if null_text: 1135 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1136 1137 return self.func("ARRAY_TO_STRING", this, expression.expression) 1138 1139 @unsupported_args("position", "occurrence") 1140 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1141 group = expression.args.get("group") 1142 params = expression.args.get("parameters") 1143 1144 # Do not render group if there is no following argument, 1145 # and it's the default value for this dialect 1146 if ( 1147 not params 1148 and group 1149 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1150 ): 1151 group = None 1152 return self.func( 1153 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1154 ) 1155 1156 @unsupported_args("culture") 1157 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1158 fmt = expression.args.get("format") 1159 if fmt and fmt.is_int: 1160 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1161 1162 self.unsupported("Only integer formats are supported by NumberToStr") 1163 return self.function_fallback_sql(expression) 1164 1165 def autoincrementcolumnconstraint_sql(self, _) -> str: 1166 self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB") 1167 return "" 1168 1169 def aliases_sql(self, expression: exp.Aliases) -> str: 1170 this = expression.this 1171 if isinstance(this, exp.Posexplode): 1172 return self.posexplode_sql(this) 1173 1174 return super().aliases_sql(expression) 1175 1176 def posexplode_sql(self, expression: exp.Posexplode) -> str: 1177 this = expression.this 1178 parent = expression.parent 1179 1180 # The default Spark aliases are "pos" and "col", unless specified otherwise 1181 pos, col = exp.to_identifier("pos"), exp.to_identifier("col") 1182 1183 if isinstance(parent, exp.Aliases): 1184 # Column case: SELECT POSEXPLODE(col) [AS (a, b)] 1185 pos, col = parent.expressions 1186 elif isinstance(parent, exp.Table): 1187 # Table case: SELECT * FROM POSEXPLODE(col) [AS (a, b)] 1188 alias = parent.args.get("alias") 1189 if alias: 1190 pos, col = alias.columns or [pos, col] 1191 alias.pop() 1192 1193 # Translate POSEXPLODE to UNNEST + GENERATE_SUBSCRIPTS 1194 # Note: In Spark pos is 0-indexed, but in DuckDB it's 1-indexed, so we subtract 1 from GENERATE_SUBSCRIPTS 1195 unnest_sql = self.sql(exp.Unnest(expressions=[this], alias=col)) 1196 gen_subscripts = self.sql( 1197 exp.Alias( 1198 this=exp.Anonymous( 1199 this="GENERATE_SUBSCRIPTS", expressions=[this, exp.Literal.number(1)] 1200 ) 1201 - exp.Literal.number(1), 1202 alias=pos, 1203 ) 1204 ) 1205 1206 posexplode_sql = self.format_args(gen_subscripts, unnest_sql) 1207 1208 if isinstance(parent, exp.From) or (parent and isinstance(parent.parent, exp.From)): 1209 # SELECT * FROM POSEXPLODE(col) -> SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(...), UNNEST(...)) 1210 return self.sql(exp.Subquery(this=exp.Select(expressions=[posexplode_sql]))) 1211 1212 return posexplode_sql
280class DuckDB(Dialect): 281 NULL_ORDERING = "nulls_are_last" 282 SUPPORTS_USER_DEFINED_TYPES = True 283 SAFE_DIVISION = True 284 INDEX_OFFSET = 1 285 CONCAT_COALESCE = True 286 SUPPORTS_ORDER_BY_ALL = True 287 SUPPORTS_FIXED_SIZE_ARRAYS = True 288 STRICT_JSON_PATH_SYNTAX = False 289 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 290 291 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 292 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 293 294 DATE_PART_MAPPING = { 295 **Dialect.DATE_PART_MAPPING, 296 "DAYOFWEEKISO": "ISODOW", 297 } 298 DATE_PART_MAPPING.pop("WEEKDAY") 299 300 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 301 if isinstance(path, exp.Literal): 302 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 303 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 304 # This check ensures we'll avoid trying to parse these as JSON paths, which can 305 # either result in a noisy warning or in an invalid representation of the path. 306 path_text = path.name 307 if path_text.startswith("/") or "[#" in path_text: 308 return path 309 310 return super().to_json_path(path) 311 312 class Tokenizer(tokens.Tokenizer): 313 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 314 HEREDOC_STRINGS = ["$"] 315 316 HEREDOC_TAG_IS_IDENTIFIER = True 317 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 318 319 KEYWORDS = { 320 **tokens.Tokenizer.KEYWORDS, 321 "//": TokenType.DIV, 322 "**": TokenType.DSTAR, 323 "^@": TokenType.CARET_AT, 324 "@>": TokenType.AT_GT, 325 "<@": TokenType.LT_AT, 326 "ATTACH": TokenType.ATTACH, 327 "BINARY": TokenType.VARBINARY, 328 "BITSTRING": TokenType.BIT, 329 "BPCHAR": TokenType.TEXT, 330 "CHAR": TokenType.TEXT, 331 "DATETIME": TokenType.TIMESTAMPNTZ, 332 "DETACH": TokenType.DETACH, 333 "EXCLUDE": TokenType.EXCEPT, 334 "LOGICAL": TokenType.BOOLEAN, 335 "ONLY": TokenType.ONLY, 336 "PIVOT_WIDER": TokenType.PIVOT, 337 "POSITIONAL": TokenType.POSITIONAL, 338 "SIGNED": TokenType.INT, 339 "STRING": TokenType.TEXT, 340 "SUMMARIZE": TokenType.SUMMARIZE, 341 "TIMESTAMP": TokenType.TIMESTAMPNTZ, 342 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 343 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 344 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 345 "TIMESTAMP_US": TokenType.TIMESTAMP, 346 "UBIGINT": TokenType.UBIGINT, 347 "UINTEGER": TokenType.UINT, 348 "USMALLINT": TokenType.USMALLINT, 349 "UTINYINT": TokenType.UTINYINT, 350 "VARCHAR": TokenType.TEXT, 351 } 352 KEYWORDS.pop("/*+") 353 354 SINGLE_TOKENS = { 355 **tokens.Tokenizer.SINGLE_TOKENS, 356 "$": TokenType.PARAMETER, 357 } 358 359 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 360 361 class Parser(parser.Parser): 362 BITWISE = { 363 **parser.Parser.BITWISE, 364 TokenType.TILDA: exp.RegexpLike, 365 } 366 BITWISE.pop(TokenType.CARET) 367 368 RANGE_PARSERS = { 369 **parser.Parser.RANGE_PARSERS, 370 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 371 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 372 } 373 374 EXPONENT = { 375 **parser.Parser.EXPONENT, 376 TokenType.CARET: exp.Pow, 377 TokenType.DSTAR: exp.Pow, 378 } 379 380 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 381 382 SHOW_PARSERS = { 383 "TABLES": _show_parser("TABLES"), 384 "ALL TABLES": _show_parser("ALL TABLES"), 385 } 386 387 FUNCTIONS = { 388 **parser.Parser.FUNCTIONS, 389 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 390 "ARRAY_SORT": exp.SortArray.from_arg_list, 391 "DATEDIFF": _build_date_diff, 392 "DATE_DIFF": _build_date_diff, 393 "DATE_TRUNC": date_trunc_to_time, 394 "DATETRUNC": date_trunc_to_time, 395 "DECODE": lambda args: exp.Decode( 396 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 397 ), 398 "EDITDIST3": exp.Levenshtein.from_arg_list, 399 "ENCODE": lambda args: exp.Encode( 400 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 401 ), 402 "EPOCH": exp.TimeToUnix.from_arg_list, 403 "EPOCH_MS": lambda args: exp.UnixToTime( 404 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 405 ), 406 "GENERATE_SERIES": _build_generate_series(), 407 "JSON": exp.ParseJSON.from_arg_list, 408 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 409 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 410 "LIST_HAS": exp.ArrayContains.from_arg_list, 411 "LIST_REVERSE_SORT": _build_sort_array_desc, 412 "LIST_SORT": exp.SortArray.from_arg_list, 413 "LIST_VALUE": lambda args: exp.Array(expressions=args), 414 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 415 "MAKE_TIMESTAMP": _build_make_timestamp, 416 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 417 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 418 "RANGE": _build_generate_series(end_exclusive=True), 419 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 420 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 421 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 422 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 423 this=seq_get(args, 0), 424 expression=seq_get(args, 1), 425 replacement=seq_get(args, 2), 426 modifiers=seq_get(args, 3), 427 ), 428 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 429 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 430 "STRING_SPLIT": exp.Split.from_arg_list, 431 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 432 "STRING_TO_ARRAY": exp.Split.from_arg_list, 433 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 434 "STRUCT_PACK": exp.Struct.from_arg_list, 435 "STR_SPLIT": exp.Split.from_arg_list, 436 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 437 "TIME_BUCKET": exp.DateBin.from_arg_list, 438 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 439 "UNNEST": exp.Explode.from_arg_list, 440 "XOR": binary_from_function(exp.BitwiseXor), 441 } 442 443 FUNCTIONS.pop("DATE_SUB") 444 FUNCTIONS.pop("GLOB") 445 446 FUNCTION_PARSERS = { 447 **parser.Parser.FUNCTION_PARSERS, 448 **dict.fromkeys( 449 ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() 450 ), 451 } 452 FUNCTION_PARSERS.pop("DECODE") 453 454 NO_PAREN_FUNCTION_PARSERS = { 455 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 456 "MAP": lambda self: self._parse_map(), 457 "@": lambda self: exp.Abs(this=self._parse_bitwise()), 458 } 459 460 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 461 TokenType.SEMI, 462 TokenType.ANTI, 463 } 464 465 PLACEHOLDER_PARSERS = { 466 **parser.Parser.PLACEHOLDER_PARSERS, 467 TokenType.PARAMETER: lambda self: ( 468 self.expression(exp.Placeholder, this=self._prev.text) 469 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 470 else None 471 ), 472 } 473 474 TYPE_CONVERTERS = { 475 # https://duckdb.org/docs/sql/data_types/numeric 476 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 477 # https://duckdb.org/docs/sql/data_types/text 478 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 479 } 480 481 STATEMENT_PARSERS = { 482 **parser.Parser.STATEMENT_PARSERS, 483 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 484 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 485 TokenType.SHOW: lambda self: self._parse_show(), 486 } 487 488 def _parse_expression(self) -> t.Optional[exp.Expression]: 489 # DuckDB supports prefix aliases, e.g. foo: 1 490 if self._next and self._next.token_type == TokenType.COLON: 491 alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) 492 self._match(TokenType.COLON) 493 comments = self._prev_comments or [] 494 495 this = self._parse_assignment() 496 if isinstance(this, exp.Expression): 497 # Moves the comment next to the alias in `alias: expr /* comment */` 498 comments += this.pop_comments() or [] 499 500 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 501 502 return super()._parse_expression() 503 504 def _parse_table( 505 self, 506 schema: bool = False, 507 joins: bool = False, 508 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 509 parse_bracket: bool = False, 510 is_db_reference: bool = False, 511 parse_partition: bool = False, 512 consume_pipe: bool = False, 513 ) -> t.Optional[exp.Expression]: 514 # DuckDB supports prefix aliases, e.g. FROM foo: bar 515 if self._next and self._next.token_type == TokenType.COLON: 516 alias = self._parse_table_alias( 517 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 518 ) 519 self._match(TokenType.COLON) 520 comments = self._prev_comments or [] 521 else: 522 alias = None 523 comments = [] 524 525 table = super()._parse_table( 526 schema=schema, 527 joins=joins, 528 alias_tokens=alias_tokens, 529 parse_bracket=parse_bracket, 530 is_db_reference=is_db_reference, 531 parse_partition=parse_partition, 532 ) 533 if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias): 534 # Moves the comment next to the alias in `alias: table /* comment */` 535 comments += table.pop_comments() or [] 536 alias.comments = alias.pop_comments() + comments 537 table.set("alias", alias) 538 539 return table 540 541 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 542 # https://duckdb.org/docs/sql/samples.html 543 sample = super()._parse_table_sample(as_modifier=as_modifier) 544 if sample and not sample.args.get("method"): 545 if sample.args.get("size"): 546 sample.set("method", exp.var("RESERVOIR")) 547 else: 548 sample.set("method", exp.var("SYSTEM")) 549 550 return sample 551 552 def _parse_bracket( 553 self, this: t.Optional[exp.Expression] = None 554 ) -> t.Optional[exp.Expression]: 555 bracket = super()._parse_bracket(this) 556 557 if self.dialect.version < Version("1.2.0") and isinstance(bracket, exp.Bracket): 558 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 559 bracket.set("returns_list_for_maps", True) 560 561 return bracket 562 563 def _parse_map(self) -> exp.ToMap | exp.Map: 564 if self._match(TokenType.L_BRACE, advance=False): 565 return self.expression(exp.ToMap, this=self._parse_bracket()) 566 567 args = self._parse_wrapped_csv(self._parse_assignment) 568 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 569 570 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 571 return self._parse_field_def() 572 573 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 574 if len(aggregations) == 1: 575 return super()._pivot_column_names(aggregations) 576 return pivot_column_names(aggregations, dialect="duckdb") 577 578 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 579 def _parse_attach_option() -> exp.AttachOption: 580 return self.expression( 581 exp.AttachOption, 582 this=self._parse_var(any_token=True), 583 expression=self._parse_field(any_token=True), 584 ) 585 586 self._match(TokenType.DATABASE) 587 exists = self._parse_exists(not_=is_attach) 588 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 589 590 if self._match(TokenType.L_PAREN, advance=False): 591 expressions = self._parse_wrapped_csv(_parse_attach_option) 592 else: 593 expressions = None 594 595 return ( 596 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 597 if is_attach 598 else self.expression(exp.Detach, this=this, exists=exists) 599 ) 600 601 def _parse_show_duckdb(self, this: str) -> exp.Show: 602 return self.expression(exp.Show, this=this) 603 604 class Generator(generator.Generator): 605 PARAMETER_TOKEN = "$" 606 NAMED_PLACEHOLDER_TOKEN = "$" 607 JOIN_HINTS = False 608 TABLE_HINTS = False 609 QUERY_HINTS = False 610 LIMIT_FETCH = "LIMIT" 611 STRUCT_DELIMITER = ("(", ")") 612 RENAME_TABLE_WITH_DB = False 613 NVL2_SUPPORTED = False 614 SEMI_ANTI_JOIN_WITH_SIDE = False 615 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 616 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 617 LAST_DAY_SUPPORTS_DATE_PART = False 618 JSON_KEY_VALUE_PAIR_SEP = "," 619 IGNORE_NULLS_IN_FUNC = True 620 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 621 SUPPORTS_CREATE_TABLE_LIKE = False 622 MULTI_ARG_DISTINCT = False 623 CAN_IMPLEMENT_ARRAY_ANY = True 624 SUPPORTS_TO_NUMBER = False 625 SUPPORTS_WINDOW_EXCLUDE = True 626 COPY_HAS_INTO_KEYWORD = False 627 STAR_EXCEPT = "EXCLUDE" 628 PAD_FILL_PATTERN_IS_REQUIRED = True 629 ARRAY_CONCAT_IS_VAR_LEN = False 630 ARRAY_SIZE_DIM_REQUIRED = False 631 NORMALIZE_EXTRACT_DATE_PARTS = True 632 633 TRANSFORMS = { 634 **generator.Generator.TRANSFORMS, 635 exp.ApproxDistinct: approx_count_distinct_sql, 636 exp.Array: inline_array_unless_query, 637 exp.ArrayFilter: rename_func("LIST_FILTER"), 638 exp.ArrayRemove: remove_from_array_using_filter, 639 exp.ArraySort: _array_sort_sql, 640 exp.ArraySum: rename_func("LIST_SUM"), 641 exp.BitwiseXor: rename_func("XOR"), 642 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 643 exp.CurrentDate: lambda *_: "CURRENT_DATE", 644 exp.CurrentTime: lambda *_: "CURRENT_TIME", 645 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 646 exp.DayOfMonth: rename_func("DAYOFMONTH"), 647 exp.DayOfWeek: rename_func("DAYOFWEEK"), 648 exp.DayOfWeekIso: rename_func("ISODOW"), 649 exp.DayOfYear: rename_func("DAYOFYEAR"), 650 exp.DataType: _datatype_sql, 651 exp.Date: _date_sql, 652 exp.DateAdd: _date_delta_sql, 653 exp.DateFromParts: rename_func("MAKE_DATE"), 654 exp.DateSub: _date_delta_sql, 655 exp.DateDiff: _date_diff_sql, 656 exp.DateStrToDate: datestrtodate_sql, 657 exp.Datetime: no_datetime_sql, 658 exp.DatetimeSub: _date_delta_sql, 659 exp.DatetimeAdd: _date_delta_sql, 660 exp.DateToDi: lambda self, 661 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 662 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 663 exp.DiToDate: lambda self, 664 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 665 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 666 exp.GenerateDateArray: _generate_datetime_array_sql, 667 exp.GenerateTimestampArray: _generate_datetime_array_sql, 668 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False), 669 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 670 exp.Explode: rename_func("UNNEST"), 671 exp.IntDiv: lambda self, e: self.binary(e, "//"), 672 exp.IsInf: rename_func("ISINF"), 673 exp.IsNan: rename_func("ISNAN"), 674 exp.JSONBExists: rename_func("JSON_EXISTS"), 675 exp.JSONExtract: _arrow_json_extract_sql, 676 exp.JSONExtractArray: _json_extract_value_array_sql, 677 exp.JSONExtractScalar: _arrow_json_extract_sql, 678 exp.JSONFormat: _json_format_sql, 679 exp.JSONValueArray: _json_extract_value_array_sql, 680 exp.Lateral: explode_to_unnest_sql, 681 exp.LogicalOr: rename_func("BOOL_OR"), 682 exp.LogicalAnd: rename_func("BOOL_AND"), 683 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 684 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 685 exp.MonthsBetween: lambda self, e: self.func( 686 "DATEDIFF", 687 "'month'", 688 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 689 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 690 ), 691 exp.PercentileCont: rename_func("QUANTILE_CONT"), 692 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 693 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 694 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 695 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 696 exp.RegexpReplace: lambda self, e: self.func( 697 "REGEXP_REPLACE", 698 e.this, 699 e.expression, 700 e.args.get("replacement"), 701 e.args.get("modifiers"), 702 ), 703 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 704 exp.RegexpILike: lambda self, e: self.func( 705 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 706 ), 707 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 708 exp.Return: lambda self, e: self.sql(e, "this"), 709 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 710 exp.Rand: rename_func("RANDOM"), 711 exp.SHA: rename_func("SHA1"), 712 exp.SHA2: sha256_sql, 713 exp.Split: rename_func("STR_SPLIT"), 714 exp.SortArray: _sort_array_sql, 715 exp.StrPosition: strposition_sql, 716 exp.StrToUnix: lambda self, e: self.func( 717 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 718 ), 719 exp.Struct: _struct_sql, 720 exp.Transform: rename_func("LIST_TRANSFORM"), 721 exp.TimeAdd: _date_delta_sql, 722 exp.Time: no_time_sql, 723 exp.TimeDiff: _timediff_sql, 724 exp.Timestamp: no_timestamp_sql, 725 exp.TimestampDiff: lambda self, e: self.func( 726 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 727 ), 728 exp.TimestampTrunc: timestamptrunc_sql(), 729 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 730 exp.TimeStrToTime: timestrtotime_sql, 731 exp.TimeStrToUnix: lambda self, e: self.func( 732 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 733 ), 734 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 735 exp.TimeToUnix: rename_func("EPOCH"), 736 exp.TsOrDiToDi: lambda self, 737 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 738 exp.TsOrDsAdd: _date_delta_sql, 739 exp.TsOrDsDiff: lambda self, e: self.func( 740 "DATE_DIFF", 741 f"'{e.args.get('unit') or 'DAY'}'", 742 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 743 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 744 ), 745 exp.UnixToStr: lambda self, e: self.func( 746 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 747 ), 748 exp.DatetimeTrunc: lambda self, e: self.func( 749 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 750 ), 751 exp.UnixToTime: _unix_to_time_sql, 752 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 753 exp.VariancePop: rename_func("VAR_POP"), 754 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 755 exp.Xor: bool_xor_sql, 756 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 757 rename_func("LEVENSHTEIN") 758 ), 759 exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"), 760 exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"), 761 exp.DateBin: rename_func("TIME_BUCKET"), 762 } 763 764 SUPPORTED_JSON_PATH_PARTS = { 765 exp.JSONPathKey, 766 exp.JSONPathRoot, 767 exp.JSONPathSubscript, 768 exp.JSONPathWildcard, 769 } 770 771 TYPE_MAPPING = { 772 **generator.Generator.TYPE_MAPPING, 773 exp.DataType.Type.BINARY: "BLOB", 774 exp.DataType.Type.BPCHAR: "TEXT", 775 exp.DataType.Type.CHAR: "TEXT", 776 exp.DataType.Type.DATETIME: "TIMESTAMP", 777 exp.DataType.Type.FLOAT: "REAL", 778 exp.DataType.Type.JSONB: "JSON", 779 exp.DataType.Type.NCHAR: "TEXT", 780 exp.DataType.Type.NVARCHAR: "TEXT", 781 exp.DataType.Type.UINT: "UINTEGER", 782 exp.DataType.Type.VARBINARY: "BLOB", 783 exp.DataType.Type.ROWVERSION: "BLOB", 784 exp.DataType.Type.VARCHAR: "TEXT", 785 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 786 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 787 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 788 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 789 } 790 791 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 792 RESERVED_KEYWORDS = { 793 "array", 794 "analyse", 795 "union", 796 "all", 797 "when", 798 "in_p", 799 "default", 800 "create_p", 801 "window", 802 "asymmetric", 803 "to", 804 "else", 805 "localtime", 806 "from", 807 "end_p", 808 "select", 809 "current_date", 810 "foreign", 811 "with", 812 "grant", 813 "session_user", 814 "or", 815 "except", 816 "references", 817 "fetch", 818 "limit", 819 "group_p", 820 "leading", 821 "into", 822 "collate", 823 "offset", 824 "do", 825 "then", 826 "localtimestamp", 827 "check_p", 828 "lateral_p", 829 "current_role", 830 "where", 831 "asc_p", 832 "placing", 833 "desc_p", 834 "user", 835 "unique", 836 "initially", 837 "column", 838 "both", 839 "some", 840 "as", 841 "any", 842 "only", 843 "deferrable", 844 "null_p", 845 "current_time", 846 "true_p", 847 "table", 848 "case", 849 "trailing", 850 "variadic", 851 "for", 852 "on", 853 "distinct", 854 "false_p", 855 "not", 856 "constraint", 857 "current_timestamp", 858 "returning", 859 "primary", 860 "intersect", 861 "having", 862 "analyze", 863 "current_user", 864 "and", 865 "cast", 866 "symmetric", 867 "using", 868 "order", 869 "current_catalog", 870 } 871 872 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 873 874 # DuckDB doesn't generally support CREATE TABLE .. properties 875 # https://duckdb.org/docs/sql/statements/create_table.html 876 PROPERTIES_LOCATION = { 877 prop: exp.Properties.Location.UNSUPPORTED 878 for prop in generator.Generator.PROPERTIES_LOCATION 879 } 880 881 # There are a few exceptions (e.g. temporary tables) which are supported or 882 # can be transpiled to DuckDB, so we explicitly override them accordingly 883 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 884 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 885 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 886 887 IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS = ( 888 exp.FirstValue, 889 exp.Lag, 890 exp.LastValue, 891 exp.Lead, 892 exp.NthValue, 893 ) 894 895 def show_sql(self, expression: exp.Show) -> str: 896 return f"SHOW {expression.name}" 897 898 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 899 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 900 901 def strtotime_sql(self, expression: exp.StrToTime) -> str: 902 if expression.args.get("safe"): 903 formatted_time = self.format_time(expression) 904 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 905 return str_to_time_sql(self, expression) 906 907 def strtodate_sql(self, expression: exp.StrToDate) -> str: 908 if expression.args.get("safe"): 909 formatted_time = self.format_time(expression) 910 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 911 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 912 913 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 914 arg = expression.this 915 if expression.args.get("safe"): 916 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 917 return self.func("JSON", arg) 918 919 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 920 nano = expression.args.get("nano") 921 if nano is not None: 922 expression.set( 923 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 924 ) 925 926 return rename_func("MAKE_TIME")(self, expression) 927 928 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 929 sec = expression.args["sec"] 930 931 milli = expression.args.get("milli") 932 if milli is not None: 933 sec += milli.pop() / exp.Literal.number(1000.0) 934 935 nano = expression.args.get("nano") 936 if nano is not None: 937 sec += nano.pop() / exp.Literal.number(1000000000.0) 938 939 if milli or nano: 940 expression.set("sec", sec) 941 942 return rename_func("MAKE_TIMESTAMP")(self, expression) 943 944 def tablesample_sql( 945 self, 946 expression: exp.TableSample, 947 tablesample_keyword: t.Optional[str] = None, 948 ) -> str: 949 if not isinstance(expression.parent, exp.Select): 950 # This sample clause only applies to a single source, not the entire resulting relation 951 tablesample_keyword = "TABLESAMPLE" 952 953 if expression.args.get("size"): 954 method = expression.args.get("method") 955 if method and method.name.upper() != "RESERVOIR": 956 self.unsupported( 957 f"Sampling method {method} is not supported with a discrete sample count, " 958 "defaulting to reservoir sampling" 959 ) 960 expression.set("method", exp.var("RESERVOIR")) 961 962 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 963 964 def interval_sql(self, expression: exp.Interval) -> str: 965 multiplier: t.Optional[int] = None 966 unit = expression.text("unit").lower() 967 968 if unit.startswith("week"): 969 multiplier = 7 970 if unit.startswith("quarter"): 971 multiplier = 90 972 973 if multiplier: 974 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 975 976 return super().interval_sql(expression) 977 978 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 979 if isinstance(expression.parent, exp.UserDefinedFunction): 980 return self.sql(expression, "this") 981 return super().columndef_sql(expression, sep) 982 983 def join_sql(self, expression: exp.Join) -> str: 984 if ( 985 expression.side == "LEFT" 986 and not expression.args.get("on") 987 and isinstance(expression.this, exp.Unnest) 988 ): 989 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 990 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 991 return super().join_sql(expression.on(exp.true())) 992 993 return super().join_sql(expression) 994 995 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 996 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 997 if expression.args.get("is_end_exclusive"): 998 return rename_func("RANGE")(self, expression) 999 1000 return self.function_fallback_sql(expression) 1001 1002 def countif_sql(self, expression: exp.CountIf) -> str: 1003 if self.dialect.version >= Version("1.2"): 1004 return self.function_fallback_sql(expression) 1005 1006 # https://github.com/tobymao/sqlglot/pull/4749 1007 return count_if_to_sum(self, expression) 1008 1009 def bracket_sql(self, expression: exp.Bracket) -> str: 1010 if self.dialect.version >= Version("1.2"): 1011 return super().bracket_sql(expression) 1012 1013 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1014 this = expression.this 1015 if isinstance(this, exp.Array): 1016 this.replace(exp.paren(this)) 1017 1018 bracket = super().bracket_sql(expression) 1019 1020 if not expression.args.get("returns_list_for_maps"): 1021 if not this.type: 1022 from sqlglot.optimizer.annotate_types import annotate_types 1023 1024 this = annotate_types(this, dialect=self.dialect) 1025 1026 if this.is_type(exp.DataType.Type.MAP): 1027 bracket = f"({bracket})[1]" 1028 1029 return bracket 1030 1031 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1032 expression_sql = self.sql(expression, "expression") 1033 1034 func = expression.this 1035 if isinstance(func, exp.PERCENTILES): 1036 # Make the order key the first arg and slide the fraction to the right 1037 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1038 order_col = expression.find(exp.Ordered) 1039 if order_col: 1040 func.set("expression", func.this) 1041 func.set("this", order_col.this) 1042 1043 this = self.sql(expression, "this").rstrip(")") 1044 1045 return f"{this}{expression_sql})" 1046 1047 def length_sql(self, expression: exp.Length) -> str: 1048 arg = expression.this 1049 1050 # Dialects like BQ and Snowflake also accept binary values as args, so 1051 # DDB will attempt to infer the type or resort to case/when resolution 1052 if not expression.args.get("binary") or arg.is_string: 1053 return self.func("LENGTH", arg) 1054 1055 if not arg.type: 1056 from sqlglot.optimizer.annotate_types import annotate_types 1057 1058 arg = annotate_types(arg, dialect=self.dialect) 1059 1060 if arg.is_type(*exp.DataType.TEXT_TYPES): 1061 return self.func("LENGTH", arg) 1062 1063 # We need these casts to make duckdb's static type checker happy 1064 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1065 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1066 1067 case = ( 1068 exp.case(self.func("TYPEOF", arg)) 1069 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1070 .else_( 1071 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1072 ) # anonymous to break length_sql recursion 1073 ) 1074 1075 return self.sql(case) 1076 1077 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1078 this = expression.this 1079 key = expression.args.get("key") 1080 key_sql = key.name if isinstance(key, exp.Expression) else "" 1081 value_sql = self.sql(expression, "value") 1082 1083 kv_sql = f"{key_sql} := {value_sql}" 1084 1085 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1086 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1087 if isinstance(this, exp.Struct) and not this.expressions: 1088 return self.func("STRUCT_PACK", kv_sql) 1089 1090 return self.func("STRUCT_INSERT", this, kv_sql) 1091 1092 def unnest_sql(self, expression: exp.Unnest) -> str: 1093 explode_array = expression.args.get("explode_array") 1094 if explode_array: 1095 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1096 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1097 expression.expressions.append( 1098 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1099 ) 1100 1101 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1102 alias = expression.args.get("alias") 1103 if alias: 1104 expression.set("alias", None) 1105 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 1106 1107 unnest_sql = super().unnest_sql(expression) 1108 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1109 return self.sql(select) 1110 1111 return super().unnest_sql(expression) 1112 1113 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1114 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1115 # DuckDB should render IGNORE NULLS only for the general-purpose 1116 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1117 return super().ignorenulls_sql(expression) 1118 1119 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 1120 return self.sql(expression, "this") 1121 1122 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 1123 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1124 # DuckDB should render RESPECT NULLS only for the general-purpose 1125 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 1126 return super().respectnulls_sql(expression) 1127 1128 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 1129 return self.sql(expression, "this") 1130 1131 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1132 this = self.sql(expression, "this") 1133 null_text = self.sql(expression, "null") 1134 1135 if null_text: 1136 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1137 1138 return self.func("ARRAY_TO_STRING", this, expression.expression) 1139 1140 @unsupported_args("position", "occurrence") 1141 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1142 group = expression.args.get("group") 1143 params = expression.args.get("parameters") 1144 1145 # Do not render group if there is no following argument, 1146 # and it's the default value for this dialect 1147 if ( 1148 not params 1149 and group 1150 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1151 ): 1152 group = None 1153 return self.func( 1154 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1155 ) 1156 1157 @unsupported_args("culture") 1158 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1159 fmt = expression.args.get("format") 1160 if fmt and fmt.is_int: 1161 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1162 1163 self.unsupported("Only integer formats are supported by NumberToStr") 1164 return self.function_fallback_sql(expression) 1165 1166 def autoincrementcolumnconstraint_sql(self, _) -> str: 1167 self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB") 1168 return "" 1169 1170 def aliases_sql(self, expression: exp.Aliases) -> str: 1171 this = expression.this 1172 if isinstance(this, exp.Posexplode): 1173 return self.posexplode_sql(this) 1174 1175 return super().aliases_sql(expression) 1176 1177 def posexplode_sql(self, expression: exp.Posexplode) -> str: 1178 this = expression.this 1179 parent = expression.parent 1180 1181 # The default Spark aliases are "pos" and "col", unless specified otherwise 1182 pos, col = exp.to_identifier("pos"), exp.to_identifier("col") 1183 1184 if isinstance(parent, exp.Aliases): 1185 # Column case: SELECT POSEXPLODE(col) [AS (a, b)] 1186 pos, col = parent.expressions 1187 elif isinstance(parent, exp.Table): 1188 # Table case: SELECT * FROM POSEXPLODE(col) [AS (a, b)] 1189 alias = parent.args.get("alias") 1190 if alias: 1191 pos, col = alias.columns or [pos, col] 1192 alias.pop() 1193 1194 # Translate POSEXPLODE to UNNEST + GENERATE_SUBSCRIPTS 1195 # Note: In Spark pos is 0-indexed, but in DuckDB it's 1-indexed, so we subtract 1 from GENERATE_SUBSCRIPTS 1196 unnest_sql = self.sql(exp.Unnest(expressions=[this], alias=col)) 1197 gen_subscripts = self.sql( 1198 exp.Alias( 1199 this=exp.Anonymous( 1200 this="GENERATE_SUBSCRIPTS", expressions=[this, exp.Literal.number(1)] 1201 ) 1202 - exp.Literal.number(1), 1203 alias=pos, 1204 ) 1205 ) 1206 1207 posexplode_sql = self.format_args(gen_subscripts, unnest_sql) 1208 1209 if isinstance(parent, exp.From) or (parent and isinstance(parent.parent, exp.From)): 1210 # SELECT * FROM POSEXPLODE(col) -> SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(...), UNNEST(...)) 1211 return self.sql(exp.Subquery(this=exp.Select(expressions=[posexplode_sql]))) 1212 1213 return posexplode_sql
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
A NULL
arg in CONCAT
yields NULL
by default, but in some dialects it yields an empty string.
Whether ORDER BY ALL is supported (expands to all the selected columns) as in DuckDB, Spark3/Databricks
Whether expressions such as x::INT[5] should be parsed as fixed-size array defs/casts e.g. in DuckDB. In dialects which don't support fixed size arrays such as Snowflake, this should be interpreted as a subscript/index operator.
Whether failing to parse a JSON path expression using the JSONPath dialect will log a warning.
Whether number literals can include underscores for better readability
Specifies the strategy according to which identifiers should be normalized.
300 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 301 if isinstance(path, exp.Literal): 302 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 303 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 304 # This check ensures we'll avoid trying to parse these as JSON paths, which can 305 # either result in a noisy warning or in an invalid representation of the path. 306 path_text = path.name 307 if path_text.startswith("/") or "[#" in path_text: 308 return path 309 310 return super().to_json_path(path)
312 class Tokenizer(tokens.Tokenizer): 313 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 314 HEREDOC_STRINGS = ["$"] 315 316 HEREDOC_TAG_IS_IDENTIFIER = True 317 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 318 319 KEYWORDS = { 320 **tokens.Tokenizer.KEYWORDS, 321 "//": TokenType.DIV, 322 "**": TokenType.DSTAR, 323 "^@": TokenType.CARET_AT, 324 "@>": TokenType.AT_GT, 325 "<@": TokenType.LT_AT, 326 "ATTACH": TokenType.ATTACH, 327 "BINARY": TokenType.VARBINARY, 328 "BITSTRING": TokenType.BIT, 329 "BPCHAR": TokenType.TEXT, 330 "CHAR": TokenType.TEXT, 331 "DATETIME": TokenType.TIMESTAMPNTZ, 332 "DETACH": TokenType.DETACH, 333 "EXCLUDE": TokenType.EXCEPT, 334 "LOGICAL": TokenType.BOOLEAN, 335 "ONLY": TokenType.ONLY, 336 "PIVOT_WIDER": TokenType.PIVOT, 337 "POSITIONAL": TokenType.POSITIONAL, 338 "SIGNED": TokenType.INT, 339 "STRING": TokenType.TEXT, 340 "SUMMARIZE": TokenType.SUMMARIZE, 341 "TIMESTAMP": TokenType.TIMESTAMPNTZ, 342 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 343 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 344 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 345 "TIMESTAMP_US": TokenType.TIMESTAMP, 346 "UBIGINT": TokenType.UBIGINT, 347 "UINTEGER": TokenType.UINT, 348 "USMALLINT": TokenType.USMALLINT, 349 "UTINYINT": TokenType.UTINYINT, 350 "VARCHAR": TokenType.TEXT, 351 } 352 KEYWORDS.pop("/*+") 353 354 SINGLE_TOKENS = { 355 **tokens.Tokenizer.SINGLE_TOKENS, 356 "$": TokenType.PARAMETER, 357 } 358 359 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
361 class Parser(parser.Parser): 362 BITWISE = { 363 **parser.Parser.BITWISE, 364 TokenType.TILDA: exp.RegexpLike, 365 } 366 BITWISE.pop(TokenType.CARET) 367 368 RANGE_PARSERS = { 369 **parser.Parser.RANGE_PARSERS, 370 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 371 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 372 } 373 374 EXPONENT = { 375 **parser.Parser.EXPONENT, 376 TokenType.CARET: exp.Pow, 377 TokenType.DSTAR: exp.Pow, 378 } 379 380 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 381 382 SHOW_PARSERS = { 383 "TABLES": _show_parser("TABLES"), 384 "ALL TABLES": _show_parser("ALL TABLES"), 385 } 386 387 FUNCTIONS = { 388 **parser.Parser.FUNCTIONS, 389 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 390 "ARRAY_SORT": exp.SortArray.from_arg_list, 391 "DATEDIFF": _build_date_diff, 392 "DATE_DIFF": _build_date_diff, 393 "DATE_TRUNC": date_trunc_to_time, 394 "DATETRUNC": date_trunc_to_time, 395 "DECODE": lambda args: exp.Decode( 396 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 397 ), 398 "EDITDIST3": exp.Levenshtein.from_arg_list, 399 "ENCODE": lambda args: exp.Encode( 400 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 401 ), 402 "EPOCH": exp.TimeToUnix.from_arg_list, 403 "EPOCH_MS": lambda args: exp.UnixToTime( 404 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 405 ), 406 "GENERATE_SERIES": _build_generate_series(), 407 "JSON": exp.ParseJSON.from_arg_list, 408 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 409 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 410 "LIST_HAS": exp.ArrayContains.from_arg_list, 411 "LIST_REVERSE_SORT": _build_sort_array_desc, 412 "LIST_SORT": exp.SortArray.from_arg_list, 413 "LIST_VALUE": lambda args: exp.Array(expressions=args), 414 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 415 "MAKE_TIMESTAMP": _build_make_timestamp, 416 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 417 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 418 "RANGE": _build_generate_series(end_exclusive=True), 419 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 420 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 421 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 422 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 423 this=seq_get(args, 0), 424 expression=seq_get(args, 1), 425 replacement=seq_get(args, 2), 426 modifiers=seq_get(args, 3), 427 ), 428 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 429 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 430 "STRING_SPLIT": exp.Split.from_arg_list, 431 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 432 "STRING_TO_ARRAY": exp.Split.from_arg_list, 433 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 434 "STRUCT_PACK": exp.Struct.from_arg_list, 435 "STR_SPLIT": exp.Split.from_arg_list, 436 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 437 "TIME_BUCKET": exp.DateBin.from_arg_list, 438 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 439 "UNNEST": exp.Explode.from_arg_list, 440 "XOR": binary_from_function(exp.BitwiseXor), 441 } 442 443 FUNCTIONS.pop("DATE_SUB") 444 FUNCTIONS.pop("GLOB") 445 446 FUNCTION_PARSERS = { 447 **parser.Parser.FUNCTION_PARSERS, 448 **dict.fromkeys( 449 ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() 450 ), 451 } 452 FUNCTION_PARSERS.pop("DECODE") 453 454 NO_PAREN_FUNCTION_PARSERS = { 455 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 456 "MAP": lambda self: self._parse_map(), 457 "@": lambda self: exp.Abs(this=self._parse_bitwise()), 458 } 459 460 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 461 TokenType.SEMI, 462 TokenType.ANTI, 463 } 464 465 PLACEHOLDER_PARSERS = { 466 **parser.Parser.PLACEHOLDER_PARSERS, 467 TokenType.PARAMETER: lambda self: ( 468 self.expression(exp.Placeholder, this=self._prev.text) 469 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 470 else None 471 ), 472 } 473 474 TYPE_CONVERTERS = { 475 # https://duckdb.org/docs/sql/data_types/numeric 476 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 477 # https://duckdb.org/docs/sql/data_types/text 478 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 479 } 480 481 STATEMENT_PARSERS = { 482 **parser.Parser.STATEMENT_PARSERS, 483 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 484 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 485 TokenType.SHOW: lambda self: self._parse_show(), 486 } 487 488 def _parse_expression(self) -> t.Optional[exp.Expression]: 489 # DuckDB supports prefix aliases, e.g. foo: 1 490 if self._next and self._next.token_type == TokenType.COLON: 491 alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) 492 self._match(TokenType.COLON) 493 comments = self._prev_comments or [] 494 495 this = self._parse_assignment() 496 if isinstance(this, exp.Expression): 497 # Moves the comment next to the alias in `alias: expr /* comment */` 498 comments += this.pop_comments() or [] 499 500 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 501 502 return super()._parse_expression() 503 504 def _parse_table( 505 self, 506 schema: bool = False, 507 joins: bool = False, 508 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 509 parse_bracket: bool = False, 510 is_db_reference: bool = False, 511 parse_partition: bool = False, 512 consume_pipe: bool = False, 513 ) -> t.Optional[exp.Expression]: 514 # DuckDB supports prefix aliases, e.g. FROM foo: bar 515 if self._next and self._next.token_type == TokenType.COLON: 516 alias = self._parse_table_alias( 517 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 518 ) 519 self._match(TokenType.COLON) 520 comments = self._prev_comments or [] 521 else: 522 alias = None 523 comments = [] 524 525 table = super()._parse_table( 526 schema=schema, 527 joins=joins, 528 alias_tokens=alias_tokens, 529 parse_bracket=parse_bracket, 530 is_db_reference=is_db_reference, 531 parse_partition=parse_partition, 532 ) 533 if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias): 534 # Moves the comment next to the alias in `alias: table /* comment */` 535 comments += table.pop_comments() or [] 536 alias.comments = alias.pop_comments() + comments 537 table.set("alias", alias) 538 539 return table 540 541 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 542 # https://duckdb.org/docs/sql/samples.html 543 sample = super()._parse_table_sample(as_modifier=as_modifier) 544 if sample and not sample.args.get("method"): 545 if sample.args.get("size"): 546 sample.set("method", exp.var("RESERVOIR")) 547 else: 548 sample.set("method", exp.var("SYSTEM")) 549 550 return sample 551 552 def _parse_bracket( 553 self, this: t.Optional[exp.Expression] = None 554 ) -> t.Optional[exp.Expression]: 555 bracket = super()._parse_bracket(this) 556 557 if self.dialect.version < Version("1.2.0") and isinstance(bracket, exp.Bracket): 558 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 559 bracket.set("returns_list_for_maps", True) 560 561 return bracket 562 563 def _parse_map(self) -> exp.ToMap | exp.Map: 564 if self._match(TokenType.L_BRACE, advance=False): 565 return self.expression(exp.ToMap, this=self._parse_bracket()) 566 567 args = self._parse_wrapped_csv(self._parse_assignment) 568 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 569 570 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 571 return self._parse_field_def() 572 573 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 574 if len(aggregations) == 1: 575 return super()._pivot_column_names(aggregations) 576 return pivot_column_names(aggregations, dialect="duckdb") 577 578 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 579 def _parse_attach_option() -> exp.AttachOption: 580 return self.expression( 581 exp.AttachOption, 582 this=self._parse_var(any_token=True), 583 expression=self._parse_field(any_token=True), 584 ) 585 586 self._match(TokenType.DATABASE) 587 exists = self._parse_exists(not_=is_attach) 588 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 589 590 if self._match(TokenType.L_PAREN, advance=False): 591 expressions = self._parse_wrapped_csv(_parse_attach_option) 592 else: 593 expressions = None 594 595 return ( 596 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 597 if is_attach 598 else self.expression(exp.Detach, this=this, exists=exists) 599 ) 600 601 def _parse_show_duckdb(self, this: str) -> exp.Show: 602 return self.expression(exp.Show, this=this)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- COLON_PLACEHOLDER_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- TERM
- FACTOR
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- ALTER_RENAME_REQUIRES_COLUMN
- JOINS_HAVE_EQUAL_PRECEDENCE
- ZONE_AWARE_TIMESTAMP_CONSTRUCTOR
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- errors
- sql
604 class Generator(generator.Generator): 605 PARAMETER_TOKEN = "$" 606 NAMED_PLACEHOLDER_TOKEN = "$" 607 JOIN_HINTS = False 608 TABLE_HINTS = False 609 QUERY_HINTS = False 610 LIMIT_FETCH = "LIMIT" 611 STRUCT_DELIMITER = ("(", ")") 612 RENAME_TABLE_WITH_DB = False 613 NVL2_SUPPORTED = False 614 SEMI_ANTI_JOIN_WITH_SIDE = False 615 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 616 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 617 LAST_DAY_SUPPORTS_DATE_PART = False 618 JSON_KEY_VALUE_PAIR_SEP = "," 619 IGNORE_NULLS_IN_FUNC = True 620 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 621 SUPPORTS_CREATE_TABLE_LIKE = False 622 MULTI_ARG_DISTINCT = False 623 CAN_IMPLEMENT_ARRAY_ANY = True 624 SUPPORTS_TO_NUMBER = False 625 SUPPORTS_WINDOW_EXCLUDE = True 626 COPY_HAS_INTO_KEYWORD = False 627 STAR_EXCEPT = "EXCLUDE" 628 PAD_FILL_PATTERN_IS_REQUIRED = True 629 ARRAY_CONCAT_IS_VAR_LEN = False 630 ARRAY_SIZE_DIM_REQUIRED = False 631 NORMALIZE_EXTRACT_DATE_PARTS = True 632 633 TRANSFORMS = { 634 **generator.Generator.TRANSFORMS, 635 exp.ApproxDistinct: approx_count_distinct_sql, 636 exp.Array: inline_array_unless_query, 637 exp.ArrayFilter: rename_func("LIST_FILTER"), 638 exp.ArrayRemove: remove_from_array_using_filter, 639 exp.ArraySort: _array_sort_sql, 640 exp.ArraySum: rename_func("LIST_SUM"), 641 exp.BitwiseXor: rename_func("XOR"), 642 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 643 exp.CurrentDate: lambda *_: "CURRENT_DATE", 644 exp.CurrentTime: lambda *_: "CURRENT_TIME", 645 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 646 exp.DayOfMonth: rename_func("DAYOFMONTH"), 647 exp.DayOfWeek: rename_func("DAYOFWEEK"), 648 exp.DayOfWeekIso: rename_func("ISODOW"), 649 exp.DayOfYear: rename_func("DAYOFYEAR"), 650 exp.DataType: _datatype_sql, 651 exp.Date: _date_sql, 652 exp.DateAdd: _date_delta_sql, 653 exp.DateFromParts: rename_func("MAKE_DATE"), 654 exp.DateSub: _date_delta_sql, 655 exp.DateDiff: _date_diff_sql, 656 exp.DateStrToDate: datestrtodate_sql, 657 exp.Datetime: no_datetime_sql, 658 exp.DatetimeSub: _date_delta_sql, 659 exp.DatetimeAdd: _date_delta_sql, 660 exp.DateToDi: lambda self, 661 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 662 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 663 exp.DiToDate: lambda self, 664 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 665 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 666 exp.GenerateDateArray: _generate_datetime_array_sql, 667 exp.GenerateTimestampArray: _generate_datetime_array_sql, 668 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False), 669 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 670 exp.Explode: rename_func("UNNEST"), 671 exp.IntDiv: lambda self, e: self.binary(e, "//"), 672 exp.IsInf: rename_func("ISINF"), 673 exp.IsNan: rename_func("ISNAN"), 674 exp.JSONBExists: rename_func("JSON_EXISTS"), 675 exp.JSONExtract: _arrow_json_extract_sql, 676 exp.JSONExtractArray: _json_extract_value_array_sql, 677 exp.JSONExtractScalar: _arrow_json_extract_sql, 678 exp.JSONFormat: _json_format_sql, 679 exp.JSONValueArray: _json_extract_value_array_sql, 680 exp.Lateral: explode_to_unnest_sql, 681 exp.LogicalOr: rename_func("BOOL_OR"), 682 exp.LogicalAnd: rename_func("BOOL_AND"), 683 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 684 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 685 exp.MonthsBetween: lambda self, e: self.func( 686 "DATEDIFF", 687 "'month'", 688 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 689 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 690 ), 691 exp.PercentileCont: rename_func("QUANTILE_CONT"), 692 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 693 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 694 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 695 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 696 exp.RegexpReplace: lambda self, e: self.func( 697 "REGEXP_REPLACE", 698 e.this, 699 e.expression, 700 e.args.get("replacement"), 701 e.args.get("modifiers"), 702 ), 703 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 704 exp.RegexpILike: lambda self, e: self.func( 705 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 706 ), 707 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 708 exp.Return: lambda self, e: self.sql(e, "this"), 709 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 710 exp.Rand: rename_func("RANDOM"), 711 exp.SHA: rename_func("SHA1"), 712 exp.SHA2: sha256_sql, 713 exp.Split: rename_func("STR_SPLIT"), 714 exp.SortArray: _sort_array_sql, 715 exp.StrPosition: strposition_sql, 716 exp.StrToUnix: lambda self, e: self.func( 717 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 718 ), 719 exp.Struct: _struct_sql, 720 exp.Transform: rename_func("LIST_TRANSFORM"), 721 exp.TimeAdd: _date_delta_sql, 722 exp.Time: no_time_sql, 723 exp.TimeDiff: _timediff_sql, 724 exp.Timestamp: no_timestamp_sql, 725 exp.TimestampDiff: lambda self, e: self.func( 726 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 727 ), 728 exp.TimestampTrunc: timestamptrunc_sql(), 729 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 730 exp.TimeStrToTime: timestrtotime_sql, 731 exp.TimeStrToUnix: lambda self, e: self.func( 732 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 733 ), 734 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 735 exp.TimeToUnix: rename_func("EPOCH"), 736 exp.TsOrDiToDi: lambda self, 737 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 738 exp.TsOrDsAdd: _date_delta_sql, 739 exp.TsOrDsDiff: lambda self, e: self.func( 740 "DATE_DIFF", 741 f"'{e.args.get('unit') or 'DAY'}'", 742 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 743 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 744 ), 745 exp.UnixToStr: lambda self, e: self.func( 746 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 747 ), 748 exp.DatetimeTrunc: lambda self, e: self.func( 749 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 750 ), 751 exp.UnixToTime: _unix_to_time_sql, 752 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 753 exp.VariancePop: rename_func("VAR_POP"), 754 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 755 exp.Xor: bool_xor_sql, 756 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 757 rename_func("LEVENSHTEIN") 758 ), 759 exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"), 760 exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"), 761 exp.DateBin: rename_func("TIME_BUCKET"), 762 } 763 764 SUPPORTED_JSON_PATH_PARTS = { 765 exp.JSONPathKey, 766 exp.JSONPathRoot, 767 exp.JSONPathSubscript, 768 exp.JSONPathWildcard, 769 } 770 771 TYPE_MAPPING = { 772 **generator.Generator.TYPE_MAPPING, 773 exp.DataType.Type.BINARY: "BLOB", 774 exp.DataType.Type.BPCHAR: "TEXT", 775 exp.DataType.Type.CHAR: "TEXT", 776 exp.DataType.Type.DATETIME: "TIMESTAMP", 777 exp.DataType.Type.FLOAT: "REAL", 778 exp.DataType.Type.JSONB: "JSON", 779 exp.DataType.Type.NCHAR: "TEXT", 780 exp.DataType.Type.NVARCHAR: "TEXT", 781 exp.DataType.Type.UINT: "UINTEGER", 782 exp.DataType.Type.VARBINARY: "BLOB", 783 exp.DataType.Type.ROWVERSION: "BLOB", 784 exp.DataType.Type.VARCHAR: "TEXT", 785 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 786 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 787 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 788 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 789 } 790 791 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 792 RESERVED_KEYWORDS = { 793 "array", 794 "analyse", 795 "union", 796 "all", 797 "when", 798 "in_p", 799 "default", 800 "create_p", 801 "window", 802 "asymmetric", 803 "to", 804 "else", 805 "localtime", 806 "from", 807 "end_p", 808 "select", 809 "current_date", 810 "foreign", 811 "with", 812 "grant", 813 "session_user", 814 "or", 815 "except", 816 "references", 817 "fetch", 818 "limit", 819 "group_p", 820 "leading", 821 "into", 822 "collate", 823 "offset", 824 "do", 825 "then", 826 "localtimestamp", 827 "check_p", 828 "lateral_p", 829 "current_role", 830 "where", 831 "asc_p", 832 "placing", 833 "desc_p", 834 "user", 835 "unique", 836 "initially", 837 "column", 838 "both", 839 "some", 840 "as", 841 "any", 842 "only", 843 "deferrable", 844 "null_p", 845 "current_time", 846 "true_p", 847 "table", 848 "case", 849 "trailing", 850 "variadic", 851 "for", 852 "on", 853 "distinct", 854 "false_p", 855 "not", 856 "constraint", 857 "current_timestamp", 858 "returning", 859 "primary", 860 "intersect", 861 "having", 862 "analyze", 863 "current_user", 864 "and", 865 "cast", 866 "symmetric", 867 "using", 868 "order", 869 "current_catalog", 870 } 871 872 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 873 874 # DuckDB doesn't generally support CREATE TABLE .. properties 875 # https://duckdb.org/docs/sql/statements/create_table.html 876 PROPERTIES_LOCATION = { 877 prop: exp.Properties.Location.UNSUPPORTED 878 for prop in generator.Generator.PROPERTIES_LOCATION 879 } 880 881 # There are a few exceptions (e.g. temporary tables) which are supported or 882 # can be transpiled to DuckDB, so we explicitly override them accordingly 883 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 884 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 885 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 886 887 IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS = ( 888 exp.FirstValue, 889 exp.Lag, 890 exp.LastValue, 891 exp.Lead, 892 exp.NthValue, 893 ) 894 895 def show_sql(self, expression: exp.Show) -> str: 896 return f"SHOW {expression.name}" 897 898 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 899 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 900 901 def strtotime_sql(self, expression: exp.StrToTime) -> str: 902 if expression.args.get("safe"): 903 formatted_time = self.format_time(expression) 904 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 905 return str_to_time_sql(self, expression) 906 907 def strtodate_sql(self, expression: exp.StrToDate) -> str: 908 if expression.args.get("safe"): 909 formatted_time = self.format_time(expression) 910 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 911 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 912 913 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 914 arg = expression.this 915 if expression.args.get("safe"): 916 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 917 return self.func("JSON", arg) 918 919 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 920 nano = expression.args.get("nano") 921 if nano is not None: 922 expression.set( 923 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 924 ) 925 926 return rename_func("MAKE_TIME")(self, expression) 927 928 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 929 sec = expression.args["sec"] 930 931 milli = expression.args.get("milli") 932 if milli is not None: 933 sec += milli.pop() / exp.Literal.number(1000.0) 934 935 nano = expression.args.get("nano") 936 if nano is not None: 937 sec += nano.pop() / exp.Literal.number(1000000000.0) 938 939 if milli or nano: 940 expression.set("sec", sec) 941 942 return rename_func("MAKE_TIMESTAMP")(self, expression) 943 944 def tablesample_sql( 945 self, 946 expression: exp.TableSample, 947 tablesample_keyword: t.Optional[str] = None, 948 ) -> str: 949 if not isinstance(expression.parent, exp.Select): 950 # This sample clause only applies to a single source, not the entire resulting relation 951 tablesample_keyword = "TABLESAMPLE" 952 953 if expression.args.get("size"): 954 method = expression.args.get("method") 955 if method and method.name.upper() != "RESERVOIR": 956 self.unsupported( 957 f"Sampling method {method} is not supported with a discrete sample count, " 958 "defaulting to reservoir sampling" 959 ) 960 expression.set("method", exp.var("RESERVOIR")) 961 962 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 963 964 def interval_sql(self, expression: exp.Interval) -> str: 965 multiplier: t.Optional[int] = None 966 unit = expression.text("unit").lower() 967 968 if unit.startswith("week"): 969 multiplier = 7 970 if unit.startswith("quarter"): 971 multiplier = 90 972 973 if multiplier: 974 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 975 976 return super().interval_sql(expression) 977 978 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 979 if isinstance(expression.parent, exp.UserDefinedFunction): 980 return self.sql(expression, "this") 981 return super().columndef_sql(expression, sep) 982 983 def join_sql(self, expression: exp.Join) -> str: 984 if ( 985 expression.side == "LEFT" 986 and not expression.args.get("on") 987 and isinstance(expression.this, exp.Unnest) 988 ): 989 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 990 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 991 return super().join_sql(expression.on(exp.true())) 992 993 return super().join_sql(expression) 994 995 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 996 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 997 if expression.args.get("is_end_exclusive"): 998 return rename_func("RANGE")(self, expression) 999 1000 return self.function_fallback_sql(expression) 1001 1002 def countif_sql(self, expression: exp.CountIf) -> str: 1003 if self.dialect.version >= Version("1.2"): 1004 return self.function_fallback_sql(expression) 1005 1006 # https://github.com/tobymao/sqlglot/pull/4749 1007 return count_if_to_sum(self, expression) 1008 1009 def bracket_sql(self, expression: exp.Bracket) -> str: 1010 if self.dialect.version >= Version("1.2"): 1011 return super().bracket_sql(expression) 1012 1013 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1014 this = expression.this 1015 if isinstance(this, exp.Array): 1016 this.replace(exp.paren(this)) 1017 1018 bracket = super().bracket_sql(expression) 1019 1020 if not expression.args.get("returns_list_for_maps"): 1021 if not this.type: 1022 from sqlglot.optimizer.annotate_types import annotate_types 1023 1024 this = annotate_types(this, dialect=self.dialect) 1025 1026 if this.is_type(exp.DataType.Type.MAP): 1027 bracket = f"({bracket})[1]" 1028 1029 return bracket 1030 1031 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1032 expression_sql = self.sql(expression, "expression") 1033 1034 func = expression.this 1035 if isinstance(func, exp.PERCENTILES): 1036 # Make the order key the first arg and slide the fraction to the right 1037 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1038 order_col = expression.find(exp.Ordered) 1039 if order_col: 1040 func.set("expression", func.this) 1041 func.set("this", order_col.this) 1042 1043 this = self.sql(expression, "this").rstrip(")") 1044 1045 return f"{this}{expression_sql})" 1046 1047 def length_sql(self, expression: exp.Length) -> str: 1048 arg = expression.this 1049 1050 # Dialects like BQ and Snowflake also accept binary values as args, so 1051 # DDB will attempt to infer the type or resort to case/when resolution 1052 if not expression.args.get("binary") or arg.is_string: 1053 return self.func("LENGTH", arg) 1054 1055 if not arg.type: 1056 from sqlglot.optimizer.annotate_types import annotate_types 1057 1058 arg = annotate_types(arg, dialect=self.dialect) 1059 1060 if arg.is_type(*exp.DataType.TEXT_TYPES): 1061 return self.func("LENGTH", arg) 1062 1063 # We need these casts to make duckdb's static type checker happy 1064 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1065 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1066 1067 case = ( 1068 exp.case(self.func("TYPEOF", arg)) 1069 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1070 .else_( 1071 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1072 ) # anonymous to break length_sql recursion 1073 ) 1074 1075 return self.sql(case) 1076 1077 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1078 this = expression.this 1079 key = expression.args.get("key") 1080 key_sql = key.name if isinstance(key, exp.Expression) else "" 1081 value_sql = self.sql(expression, "value") 1082 1083 kv_sql = f"{key_sql} := {value_sql}" 1084 1085 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1086 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1087 if isinstance(this, exp.Struct) and not this.expressions: 1088 return self.func("STRUCT_PACK", kv_sql) 1089 1090 return self.func("STRUCT_INSERT", this, kv_sql) 1091 1092 def unnest_sql(self, expression: exp.Unnest) -> str: 1093 explode_array = expression.args.get("explode_array") 1094 if explode_array: 1095 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1096 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1097 expression.expressions.append( 1098 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1099 ) 1100 1101 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1102 alias = expression.args.get("alias") 1103 if alias: 1104 expression.set("alias", None) 1105 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 1106 1107 unnest_sql = super().unnest_sql(expression) 1108 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1109 return self.sql(select) 1110 1111 return super().unnest_sql(expression) 1112 1113 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1114 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1115 # DuckDB should render IGNORE NULLS only for the general-purpose 1116 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1117 return super().ignorenulls_sql(expression) 1118 1119 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 1120 return self.sql(expression, "this") 1121 1122 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 1123 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1124 # DuckDB should render RESPECT NULLS only for the general-purpose 1125 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 1126 return super().respectnulls_sql(expression) 1127 1128 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 1129 return self.sql(expression, "this") 1130 1131 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1132 this = self.sql(expression, "this") 1133 null_text = self.sql(expression, "null") 1134 1135 if null_text: 1136 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1137 1138 return self.func("ARRAY_TO_STRING", this, expression.expression) 1139 1140 @unsupported_args("position", "occurrence") 1141 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1142 group = expression.args.get("group") 1143 params = expression.args.get("parameters") 1144 1145 # Do not render group if there is no following argument, 1146 # and it's the default value for this dialect 1147 if ( 1148 not params 1149 and group 1150 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1151 ): 1152 group = None 1153 return self.func( 1154 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1155 ) 1156 1157 @unsupported_args("culture") 1158 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1159 fmt = expression.args.get("format") 1160 if fmt and fmt.is_int: 1161 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1162 1163 self.unsupported("Only integer formats are supported by NumberToStr") 1164 return self.function_fallback_sql(expression) 1165 1166 def autoincrementcolumnconstraint_sql(self, _) -> str: 1167 self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB") 1168 return "" 1169 1170 def aliases_sql(self, expression: exp.Aliases) -> str: 1171 this = expression.this 1172 if isinstance(this, exp.Posexplode): 1173 return self.posexplode_sql(this) 1174 1175 return super().aliases_sql(expression) 1176 1177 def posexplode_sql(self, expression: exp.Posexplode) -> str: 1178 this = expression.this 1179 parent = expression.parent 1180 1181 # The default Spark aliases are "pos" and "col", unless specified otherwise 1182 pos, col = exp.to_identifier("pos"), exp.to_identifier("col") 1183 1184 if isinstance(parent, exp.Aliases): 1185 # Column case: SELECT POSEXPLODE(col) [AS (a, b)] 1186 pos, col = parent.expressions 1187 elif isinstance(parent, exp.Table): 1188 # Table case: SELECT * FROM POSEXPLODE(col) [AS (a, b)] 1189 alias = parent.args.get("alias") 1190 if alias: 1191 pos, col = alias.columns or [pos, col] 1192 alias.pop() 1193 1194 # Translate POSEXPLODE to UNNEST + GENERATE_SUBSCRIPTS 1195 # Note: In Spark pos is 0-indexed, but in DuckDB it's 1-indexed, so we subtract 1 from GENERATE_SUBSCRIPTS 1196 unnest_sql = self.sql(exp.Unnest(expressions=[this], alias=col)) 1197 gen_subscripts = self.sql( 1198 exp.Alias( 1199 this=exp.Anonymous( 1200 this="GENERATE_SUBSCRIPTS", expressions=[this, exp.Literal.number(1)] 1201 ) 1202 - exp.Literal.number(1), 1203 alias=pos, 1204 ) 1205 ) 1206 1207 posexplode_sql = self.format_args(gen_subscripts, unnest_sql) 1208 1209 if isinstance(parent, exp.From) or (parent and isinstance(parent.parent, exp.From)): 1210 # SELECT * FROM POSEXPLODE(col) -> SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(...), UNNEST(...)) 1211 return self.sql(exp.Subquery(this=exp.Select(expressions=[posexplode_sql]))) 1212 1213 return posexplode_sql
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
907 def strtodate_sql(self, expression: exp.StrToDate) -> str: 908 if expression.args.get("safe"): 909 formatted_time = self.format_time(expression) 910 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 911 return f"CAST({str_to_time_sql(self, expression)} AS DATE)"
919 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 920 nano = expression.args.get("nano") 921 if nano is not None: 922 expression.set( 923 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 924 ) 925 926 return rename_func("MAKE_TIME")(self, expression)
928 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 929 sec = expression.args["sec"] 930 931 milli = expression.args.get("milli") 932 if milli is not None: 933 sec += milli.pop() / exp.Literal.number(1000.0) 934 935 nano = expression.args.get("nano") 936 if nano is not None: 937 sec += nano.pop() / exp.Literal.number(1000000000.0) 938 939 if milli or nano: 940 expression.set("sec", sec) 941 942 return rename_func("MAKE_TIMESTAMP")(self, expression)
944 def tablesample_sql( 945 self, 946 expression: exp.TableSample, 947 tablesample_keyword: t.Optional[str] = None, 948 ) -> str: 949 if not isinstance(expression.parent, exp.Select): 950 # This sample clause only applies to a single source, not the entire resulting relation 951 tablesample_keyword = "TABLESAMPLE" 952 953 if expression.args.get("size"): 954 method = expression.args.get("method") 955 if method and method.name.upper() != "RESERVOIR": 956 self.unsupported( 957 f"Sampling method {method} is not supported with a discrete sample count, " 958 "defaulting to reservoir sampling" 959 ) 960 expression.set("method", exp.var("RESERVOIR")) 961 962 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword)
964 def interval_sql(self, expression: exp.Interval) -> str: 965 multiplier: t.Optional[int] = None 966 unit = expression.text("unit").lower() 967 968 if unit.startswith("week"): 969 multiplier = 7 970 if unit.startswith("quarter"): 971 multiplier = 90 972 973 if multiplier: 974 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 975 976 return super().interval_sql(expression)
983 def join_sql(self, expression: exp.Join) -> str: 984 if ( 985 expression.side == "LEFT" 986 and not expression.args.get("on") 987 and isinstance(expression.this, exp.Unnest) 988 ): 989 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 990 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 991 return super().join_sql(expression.on(exp.true())) 992 993 return super().join_sql(expression)
1009 def bracket_sql(self, expression: exp.Bracket) -> str: 1010 if self.dialect.version >= Version("1.2"): 1011 return super().bracket_sql(expression) 1012 1013 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1014 this = expression.this 1015 if isinstance(this, exp.Array): 1016 this.replace(exp.paren(this)) 1017 1018 bracket = super().bracket_sql(expression) 1019 1020 if not expression.args.get("returns_list_for_maps"): 1021 if not this.type: 1022 from sqlglot.optimizer.annotate_types import annotate_types 1023 1024 this = annotate_types(this, dialect=self.dialect) 1025 1026 if this.is_type(exp.DataType.Type.MAP): 1027 bracket = f"({bracket})[1]" 1028 1029 return bracket
1031 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1032 expression_sql = self.sql(expression, "expression") 1033 1034 func = expression.this 1035 if isinstance(func, exp.PERCENTILES): 1036 # Make the order key the first arg and slide the fraction to the right 1037 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1038 order_col = expression.find(exp.Ordered) 1039 if order_col: 1040 func.set("expression", func.this) 1041 func.set("this", order_col.this) 1042 1043 this = self.sql(expression, "this").rstrip(")") 1044 1045 return f"{this}{expression_sql})"
1047 def length_sql(self, expression: exp.Length) -> str: 1048 arg = expression.this 1049 1050 # Dialects like BQ and Snowflake also accept binary values as args, so 1051 # DDB will attempt to infer the type or resort to case/when resolution 1052 if not expression.args.get("binary") or arg.is_string: 1053 return self.func("LENGTH", arg) 1054 1055 if not arg.type: 1056 from sqlglot.optimizer.annotate_types import annotate_types 1057 1058 arg = annotate_types(arg, dialect=self.dialect) 1059 1060 if arg.is_type(*exp.DataType.TEXT_TYPES): 1061 return self.func("LENGTH", arg) 1062 1063 # We need these casts to make duckdb's static type checker happy 1064 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1065 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1066 1067 case = ( 1068 exp.case(self.func("TYPEOF", arg)) 1069 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1070 .else_( 1071 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1072 ) # anonymous to break length_sql recursion 1073 ) 1074 1075 return self.sql(case)
1077 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1078 this = expression.this 1079 key = expression.args.get("key") 1080 key_sql = key.name if isinstance(key, exp.Expression) else "" 1081 value_sql = self.sql(expression, "value") 1082 1083 kv_sql = f"{key_sql} := {value_sql}" 1084 1085 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1086 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1087 if isinstance(this, exp.Struct) and not this.expressions: 1088 return self.func("STRUCT_PACK", kv_sql) 1089 1090 return self.func("STRUCT_INSERT", this, kv_sql)
1092 def unnest_sql(self, expression: exp.Unnest) -> str: 1093 explode_array = expression.args.get("explode_array") 1094 if explode_array: 1095 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1096 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1097 expression.expressions.append( 1098 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1099 ) 1100 1101 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1102 alias = expression.args.get("alias") 1103 if alias: 1104 expression.set("alias", None) 1105 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 1106 1107 unnest_sql = super().unnest_sql(expression) 1108 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1109 return self.sql(select) 1110 1111 return super().unnest_sql(expression)
1113 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1114 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1115 # DuckDB should render IGNORE NULLS only for the general-purpose 1116 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1117 return super().ignorenulls_sql(expression) 1118 1119 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 1120 return self.sql(expression, "this")
1122 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 1123 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1124 # DuckDB should render RESPECT NULLS only for the general-purpose 1125 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 1126 return super().respectnulls_sql(expression) 1127 1128 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 1129 return self.sql(expression, "this")
1131 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1132 this = self.sql(expression, "this") 1133 null_text = self.sql(expression, "null") 1134 1135 if null_text: 1136 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1137 1138 return self.func("ARRAY_TO_STRING", this, expression.expression)
1140 @unsupported_args("position", "occurrence") 1141 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1142 group = expression.args.get("group") 1143 params = expression.args.get("parameters") 1144 1145 # Do not render group if there is no following argument, 1146 # and it's the default value for this dialect 1147 if ( 1148 not params 1149 and group 1150 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1151 ): 1152 group = None 1153 return self.func( 1154 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1155 )
1157 @unsupported_args("culture") 1158 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1159 fmt = expression.args.get("format") 1160 if fmt and fmt.is_int: 1161 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1162 1163 self.unsupported("Only integer formats are supported by NumberToStr") 1164 return self.function_fallback_sql(expression)
1177 def posexplode_sql(self, expression: exp.Posexplode) -> str: 1178 this = expression.this 1179 parent = expression.parent 1180 1181 # The default Spark aliases are "pos" and "col", unless specified otherwise 1182 pos, col = exp.to_identifier("pos"), exp.to_identifier("col") 1183 1184 if isinstance(parent, exp.Aliases): 1185 # Column case: SELECT POSEXPLODE(col) [AS (a, b)] 1186 pos, col = parent.expressions 1187 elif isinstance(parent, exp.Table): 1188 # Table case: SELECT * FROM POSEXPLODE(col) [AS (a, b)] 1189 alias = parent.args.get("alias") 1190 if alias: 1191 pos, col = alias.columns or [pos, col] 1192 alias.pop() 1193 1194 # Translate POSEXPLODE to UNNEST + GENERATE_SUBSCRIPTS 1195 # Note: In Spark pos is 0-indexed, but in DuckDB it's 1-indexed, so we subtract 1 from GENERATE_SUBSCRIPTS 1196 unnest_sql = self.sql(exp.Unnest(expressions=[this], alias=col)) 1197 gen_subscripts = self.sql( 1198 exp.Alias( 1199 this=exp.Anonymous( 1200 this="GENERATE_SUBSCRIPTS", expressions=[this, exp.Literal.number(1)] 1201 ) 1202 - exp.Literal.number(1), 1203 alias=pos, 1204 ) 1205 ) 1206 1207 posexplode_sql = self.format_args(gen_subscripts, unnest_sql) 1208 1209 if isinstance(parent, exp.From) or (parent and isinstance(parent.parent, exp.From)): 1210 # SELECT * FROM POSEXPLODE(col) -> SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(...), UNNEST(...)) 1211 return self.sql(exp.Subquery(this=exp.Select(expressions=[posexplode_sql]))) 1212 1213 return posexplode_sql
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_ONLY_LITERALS
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_WITH_METHOD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- LIKE_PROPERTY_INSIDE_SCHEMA
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- TRY_SUPPORTED
- SUPPORTS_UESCAPE
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- SUPPORTS_EXPLODING_PROJECTIONS
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- SUPPORTS_UNIX_SECONDS
- ALTER_SET_WRAPPED
- PARSE_JSON_NAME
- ARRAY_SIZE_NAME
- ALTER_SET_TYPE
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- sanitize_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- for_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- addpartition_sql
- distinct_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- xmlkeyvalueoption_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- get_put_sql
- translatecharacters_sql