sqlglot.dialects.duckdb
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6 7from sqlglot.expressions import DATA_TYPE 8from sqlglot.dialects.dialect import ( 9 Dialect, 10 JSON_EXTRACT_TYPE, 11 NormalizationStrategy, 12 Version, 13 approx_count_distinct_sql, 14 arrow_json_extract_sql, 15 binary_from_function, 16 bool_xor_sql, 17 build_default_decimal_type, 18 count_if_to_sum, 19 date_trunc_to_time, 20 datestrtodate_sql, 21 no_datetime_sql, 22 encode_decode_sql, 23 build_formatted_time, 24 inline_array_unless_query, 25 no_comment_column_constraint_sql, 26 no_time_sql, 27 no_timestamp_sql, 28 pivot_column_names, 29 rename_func, 30 remove_from_array_using_filter, 31 strposition_sql, 32 str_to_time_sql, 33 timestamptrunc_sql, 34 timestrtotime_sql, 35 unit_to_var, 36 unit_to_str, 37 sha256_sql, 38 build_regexp_extract, 39 explode_to_unnest_sql, 40 no_make_interval_sql, 41 groupconcat_sql, 42) 43from sqlglot.generator import unsupported_args 44from sqlglot.helper import seq_get 45from sqlglot.tokens import TokenType 46from sqlglot.parser import binary_range_parser 47 48DATETIME_DELTA = t.Union[ 49 exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd, exp.DateSub, exp.DatetimeSub 50] 51 52 53def _date_delta_sql(self: DuckDB.Generator, expression: DATETIME_DELTA) -> str: 54 this = expression.this 55 unit = unit_to_var(expression) 56 op = ( 57 "+" 58 if isinstance(expression, (exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd)) 59 else "-" 60 ) 61 62 to_type: t.Optional[DATA_TYPE] = None 63 if isinstance(expression, exp.TsOrDsAdd): 64 to_type = expression.return_type 65 elif this.is_string: 66 # Cast string literals (i.e function parameters) to the appropriate type for +/- interval to work 67 to_type = ( 68 exp.DataType.Type.DATETIME 69 if isinstance(expression, (exp.DatetimeAdd, exp.DatetimeSub)) 70 else exp.DataType.Type.DATE 71 ) 72 73 this = exp.cast(this, to_type) if to_type else this 74 75 expr = expression.expression 76 interval = expr if isinstance(expr, exp.Interval) else exp.Interval(this=expr, unit=unit) 77 78 return f"{self.sql(this)} {op} {self.sql(interval)}" 79 80 81# BigQuery -> DuckDB conversion for the DATE function 82def _date_sql(self: DuckDB.Generator, expression: exp.Date) -> str: 83 result = f"CAST({self.sql(expression, 'this')} AS DATE)" 84 zone = self.sql(expression, "zone") 85 86 if zone: 87 date_str = self.func("STRFTIME", result, "'%d/%m/%Y'") 88 date_str = f"{date_str} || ' ' || {zone}" 89 90 # This will create a TIMESTAMP with time zone information 91 result = self.func("STRPTIME", date_str, "'%d/%m/%Y %Z'") 92 93 return result 94 95 96# BigQuery -> DuckDB conversion for the TIME_DIFF function 97def _timediff_sql(self: DuckDB.Generator, expression: exp.TimeDiff) -> str: 98 this = exp.cast(expression.this, exp.DataType.Type.TIME) 99 expr = exp.cast(expression.expression, exp.DataType.Type.TIME) 100 101 # Although the 2 dialects share similar signatures, BQ seems to inverse 102 # the sign of the result so the start/end time operands are flipped 103 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 104 105 106@unsupported_args(("expression", "DuckDB's ARRAY_SORT does not support a comparator.")) 107def _array_sort_sql(self: DuckDB.Generator, expression: exp.ArraySort) -> str: 108 return self.func("ARRAY_SORT", expression.this) 109 110 111def _sort_array_sql(self: DuckDB.Generator, expression: exp.SortArray) -> str: 112 name = "ARRAY_REVERSE_SORT" if expression.args.get("asc") == exp.false() else "ARRAY_SORT" 113 return self.func(name, expression.this) 114 115 116def _build_sort_array_desc(args: t.List) -> exp.Expression: 117 return exp.SortArray(this=seq_get(args, 0), asc=exp.false()) 118 119 120def _build_date_diff(args: t.List) -> exp.Expression: 121 return exp.DateDiff(this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)) 122 123 124def _build_generate_series(end_exclusive: bool = False) -> t.Callable[[t.List], exp.GenerateSeries]: 125 def _builder(args: t.List) -> exp.GenerateSeries: 126 # Check https://duckdb.org/docs/sql/functions/nested.html#range-functions 127 if len(args) == 1: 128 # DuckDB uses 0 as a default for the series' start when it's omitted 129 args.insert(0, exp.Literal.number("0")) 130 131 gen_series = exp.GenerateSeries.from_arg_list(args) 132 gen_series.set("is_end_exclusive", end_exclusive) 133 134 return gen_series 135 136 return _builder 137 138 139def _build_make_timestamp(args: t.List) -> exp.Expression: 140 if len(args) == 1: 141 return exp.UnixToTime(this=seq_get(args, 0), scale=exp.UnixToTime.MICROS) 142 143 return exp.TimestampFromParts( 144 year=seq_get(args, 0), 145 month=seq_get(args, 1), 146 day=seq_get(args, 2), 147 hour=seq_get(args, 3), 148 min=seq_get(args, 4), 149 sec=seq_get(args, 5), 150 ) 151 152 153def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[DuckDB.Parser], exp.Show]: 154 def _parse(self: DuckDB.Parser) -> exp.Show: 155 return self._parse_show_duckdb(*args, **kwargs) 156 157 return _parse 158 159 160def _struct_sql(self: DuckDB.Generator, expression: exp.Struct) -> str: 161 args: t.List[str] = [] 162 163 # BigQuery allows inline construction such as "STRUCT<a STRING, b INTEGER>('str', 1)" which is 164 # canonicalized to "ROW('str', 1) AS STRUCT(a TEXT, b INT)" in DuckDB 165 # The transformation to ROW will take place if: 166 # 1. The STRUCT itself does not have proper fields (key := value) as a "proper" STRUCT would 167 # 2. A cast to STRUCT / ARRAY of STRUCTs is found 168 ancestor_cast = expression.find_ancestor(exp.Cast) 169 is_bq_inline_struct = ( 170 (expression.find(exp.PropertyEQ) is None) 171 and ancestor_cast 172 and any( 173 casted_type.is_type(exp.DataType.Type.STRUCT) 174 for casted_type in ancestor_cast.find_all(exp.DataType) 175 ) 176 ) 177 178 for i, expr in enumerate(expression.expressions): 179 is_property_eq = isinstance(expr, exp.PropertyEQ) 180 value = expr.expression if is_property_eq else expr 181 182 if is_bq_inline_struct: 183 args.append(self.sql(value)) 184 else: 185 if is_property_eq: 186 if isinstance(expr.this, exp.Identifier): 187 key = self.sql(exp.Literal.string(expr.name)) 188 else: 189 key = self.sql(expr.this) 190 else: 191 key = self.sql(exp.Literal.string(f"_{i}")) 192 193 args.append(f"{key}: {self.sql(value)}") 194 195 csv_args = ", ".join(args) 196 197 return f"ROW({csv_args})" if is_bq_inline_struct else f"{{{csv_args}}}" 198 199 200def _datatype_sql(self: DuckDB.Generator, expression: exp.DataType) -> str: 201 if expression.is_type("array"): 202 return f"{self.expressions(expression, flat=True)}[{self.expressions(expression, key='values', flat=True)}]" 203 204 # Modifiers are not supported for TIME, [TIME | TIMESTAMP] WITH TIME ZONE 205 if expression.is_type( 206 exp.DataType.Type.TIME, exp.DataType.Type.TIMETZ, exp.DataType.Type.TIMESTAMPTZ 207 ): 208 return expression.this.value 209 210 return self.datatype_sql(expression) 211 212 213def _json_format_sql(self: DuckDB.Generator, expression: exp.JSONFormat) -> str: 214 sql = self.func("TO_JSON", expression.this, expression.args.get("options")) 215 return f"CAST({sql} AS TEXT)" 216 217 218def _unix_to_time_sql(self: DuckDB.Generator, expression: exp.UnixToTime) -> str: 219 scale = expression.args.get("scale") 220 timestamp = expression.this 221 222 if scale in (None, exp.UnixToTime.SECONDS): 223 return self.func("TO_TIMESTAMP", timestamp) 224 if scale == exp.UnixToTime.MILLIS: 225 return self.func("EPOCH_MS", timestamp) 226 if scale == exp.UnixToTime.MICROS: 227 return self.func("MAKE_TIMESTAMP", timestamp) 228 229 return self.func("TO_TIMESTAMP", exp.Div(this=timestamp, expression=exp.func("POW", 10, scale))) 230 231 232WRAPPED_JSON_EXTRACT_EXPRESSIONS = (exp.Binary, exp.Bracket, exp.In) 233 234 235def _arrow_json_extract_sql(self: DuckDB.Generator, expression: JSON_EXTRACT_TYPE) -> str: 236 arrow_sql = arrow_json_extract_sql(self, expression) 237 if not expression.same_parent and isinstance( 238 expression.parent, WRAPPED_JSON_EXTRACT_EXPRESSIONS 239 ): 240 arrow_sql = self.wrap(arrow_sql) 241 return arrow_sql 242 243 244def _implicit_datetime_cast( 245 arg: t.Optional[exp.Expression], type: exp.DataType.Type = exp.DataType.Type.DATE 246) -> t.Optional[exp.Expression]: 247 return exp.cast(arg, type) if isinstance(arg, exp.Literal) else arg 248 249 250def _date_diff_sql(self: DuckDB.Generator, expression: exp.DateDiff) -> str: 251 this = _implicit_datetime_cast(expression.this) 252 expr = _implicit_datetime_cast(expression.expression) 253 254 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 255 256 257def _generate_datetime_array_sql( 258 self: DuckDB.Generator, expression: t.Union[exp.GenerateDateArray, exp.GenerateTimestampArray] 259) -> str: 260 is_generate_date_array = isinstance(expression, exp.GenerateDateArray) 261 262 type = exp.DataType.Type.DATE if is_generate_date_array else exp.DataType.Type.TIMESTAMP 263 start = _implicit_datetime_cast(expression.args.get("start"), type=type) 264 end = _implicit_datetime_cast(expression.args.get("end"), type=type) 265 266 # BQ's GENERATE_DATE_ARRAY & GENERATE_TIMESTAMP_ARRAY are transformed to DuckDB'S GENERATE_SERIES 267 gen_series: t.Union[exp.GenerateSeries, exp.Cast] = exp.GenerateSeries( 268 start=start, end=end, step=expression.args.get("step") 269 ) 270 271 if is_generate_date_array: 272 # The GENERATE_SERIES result type is TIMESTAMP array, so to match BQ's semantics for 273 # GENERATE_DATE_ARRAY we must cast it back to DATE array 274 gen_series = exp.cast(gen_series, exp.DataType.build("ARRAY<DATE>")) 275 276 return self.sql(gen_series) 277 278 279def _json_extract_value_array_sql( 280 self: DuckDB.Generator, expression: exp.JSONValueArray | exp.JSONExtractArray 281) -> str: 282 json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression) 283 data_type = "ARRAY<STRING>" if isinstance(expression, exp.JSONValueArray) else "ARRAY<JSON>" 284 return self.sql(exp.cast(json_extract, to=exp.DataType.build(data_type))) 285 286 287class DuckDB(Dialect): 288 NULL_ORDERING = "nulls_are_last" 289 SUPPORTS_USER_DEFINED_TYPES = True 290 SAFE_DIVISION = True 291 INDEX_OFFSET = 1 292 CONCAT_COALESCE = True 293 SUPPORTS_ORDER_BY_ALL = True 294 SUPPORTS_FIXED_SIZE_ARRAYS = True 295 STRICT_JSON_PATH_SYNTAX = False 296 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 297 298 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 299 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 300 301 DATE_PART_MAPPING = { 302 **Dialect.DATE_PART_MAPPING, 303 "DAYOFWEEKISO": "ISODOW", 304 } 305 DATE_PART_MAPPING.pop("WEEKDAY") 306 307 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 308 if isinstance(path, exp.Literal): 309 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 310 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 311 # This check ensures we'll avoid trying to parse these as JSON paths, which can 312 # either result in a noisy warning or in an invalid representation of the path. 313 path_text = path.name 314 if path_text.startswith("/") or "[#" in path_text: 315 return path 316 317 return super().to_json_path(path) 318 319 class Tokenizer(tokens.Tokenizer): 320 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 321 HEREDOC_STRINGS = ["$"] 322 323 HEREDOC_TAG_IS_IDENTIFIER = True 324 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 325 326 KEYWORDS = { 327 **tokens.Tokenizer.KEYWORDS, 328 "//": TokenType.DIV, 329 "**": TokenType.DSTAR, 330 "^@": TokenType.CARET_AT, 331 "@>": TokenType.AT_GT, 332 "<@": TokenType.LT_AT, 333 "ATTACH": TokenType.ATTACH, 334 "BINARY": TokenType.VARBINARY, 335 "BITSTRING": TokenType.BIT, 336 "BPCHAR": TokenType.TEXT, 337 "CHAR": TokenType.TEXT, 338 "DATETIME": TokenType.TIMESTAMPNTZ, 339 "DETACH": TokenType.DETACH, 340 "EXCLUDE": TokenType.EXCEPT, 341 "LOGICAL": TokenType.BOOLEAN, 342 "ONLY": TokenType.ONLY, 343 "PIVOT_WIDER": TokenType.PIVOT, 344 "POSITIONAL": TokenType.POSITIONAL, 345 "RESET": TokenType.COMMAND, 346 "SIGNED": TokenType.INT, 347 "STRING": TokenType.TEXT, 348 "SUMMARIZE": TokenType.SUMMARIZE, 349 "TIMESTAMP": TokenType.TIMESTAMPNTZ, 350 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 351 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 352 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 353 "TIMESTAMP_US": TokenType.TIMESTAMP, 354 "UBIGINT": TokenType.UBIGINT, 355 "UINTEGER": TokenType.UINT, 356 "USMALLINT": TokenType.USMALLINT, 357 "UTINYINT": TokenType.UTINYINT, 358 "VARCHAR": TokenType.TEXT, 359 } 360 KEYWORDS.pop("/*+") 361 362 SINGLE_TOKENS = { 363 **tokens.Tokenizer.SINGLE_TOKENS, 364 "$": TokenType.PARAMETER, 365 } 366 367 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 368 369 class Parser(parser.Parser): 370 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = True 371 372 BITWISE = { 373 **parser.Parser.BITWISE, 374 TokenType.TILDA: exp.RegexpLike, 375 } 376 BITWISE.pop(TokenType.CARET) 377 378 RANGE_PARSERS = { 379 **parser.Parser.RANGE_PARSERS, 380 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 381 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 382 } 383 384 EXPONENT = { 385 **parser.Parser.EXPONENT, 386 TokenType.CARET: exp.Pow, 387 TokenType.DSTAR: exp.Pow, 388 } 389 390 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 391 392 SHOW_PARSERS = { 393 "TABLES": _show_parser("TABLES"), 394 "ALL TABLES": _show_parser("ALL TABLES"), 395 } 396 397 FUNCTIONS = { 398 **parser.Parser.FUNCTIONS, 399 "ANY_VALUE": lambda args: exp.IgnoreNulls(this=exp.AnyValue.from_arg_list(args)), 400 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 401 "ARRAY_SORT": exp.SortArray.from_arg_list, 402 "DATEDIFF": _build_date_diff, 403 "DATE_DIFF": _build_date_diff, 404 "DATE_TRUNC": date_trunc_to_time, 405 "DATETRUNC": date_trunc_to_time, 406 "DECODE": lambda args: exp.Decode( 407 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 408 ), 409 "EDITDIST3": exp.Levenshtein.from_arg_list, 410 "ENCODE": lambda args: exp.Encode( 411 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 412 ), 413 "EPOCH": exp.TimeToUnix.from_arg_list, 414 "EPOCH_MS": lambda args: exp.UnixToTime( 415 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 416 ), 417 "GENERATE_SERIES": _build_generate_series(), 418 "JSON": exp.ParseJSON.from_arg_list, 419 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 420 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 421 "LIST_CONTAINS": exp.ArrayContains.from_arg_list, 422 "LIST_HAS": exp.ArrayContains.from_arg_list, 423 "LIST_HAS_ANY": exp.ArrayOverlaps.from_arg_list, 424 "LIST_REVERSE_SORT": _build_sort_array_desc, 425 "LIST_SORT": exp.SortArray.from_arg_list, 426 "LIST_VALUE": lambda args: exp.Array(expressions=args), 427 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 428 "MAKE_TIMESTAMP": _build_make_timestamp, 429 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 430 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 431 "RANGE": _build_generate_series(end_exclusive=True), 432 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 433 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 434 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 435 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 436 this=seq_get(args, 0), 437 expression=seq_get(args, 1), 438 replacement=seq_get(args, 2), 439 modifiers=seq_get(args, 3), 440 ), 441 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 442 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 443 "STRING_SPLIT": exp.Split.from_arg_list, 444 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 445 "STRING_TO_ARRAY": exp.Split.from_arg_list, 446 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 447 "STRUCT_PACK": exp.Struct.from_arg_list, 448 "STR_SPLIT": exp.Split.from_arg_list, 449 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 450 "TIME_BUCKET": exp.DateBin.from_arg_list, 451 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 452 "UNNEST": exp.Explode.from_arg_list, 453 "XOR": binary_from_function(exp.BitwiseXor), 454 } 455 456 FUNCTIONS.pop("DATE_SUB") 457 FUNCTIONS.pop("GLOB") 458 459 FUNCTION_PARSERS = { 460 **parser.Parser.FUNCTION_PARSERS, 461 **dict.fromkeys( 462 ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() 463 ), 464 } 465 FUNCTION_PARSERS.pop("DECODE") 466 467 NO_PAREN_FUNCTION_PARSERS = { 468 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 469 "MAP": lambda self: self._parse_map(), 470 "@": lambda self: exp.Abs(this=self._parse_bitwise()), 471 } 472 473 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 474 TokenType.SEMI, 475 TokenType.ANTI, 476 } 477 478 PLACEHOLDER_PARSERS = { 479 **parser.Parser.PLACEHOLDER_PARSERS, 480 TokenType.PARAMETER: lambda self: ( 481 self.expression(exp.Placeholder, this=self._prev.text) 482 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 483 else None 484 ), 485 } 486 487 TYPE_CONVERTERS = { 488 # https://duckdb.org/docs/sql/data_types/numeric 489 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 490 # https://duckdb.org/docs/sql/data_types/text 491 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 492 } 493 494 STATEMENT_PARSERS = { 495 **parser.Parser.STATEMENT_PARSERS, 496 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 497 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 498 TokenType.SHOW: lambda self: self._parse_show(), 499 } 500 501 SET_PARSERS = { 502 **parser.Parser.SET_PARSERS, 503 "VARIABLE": lambda self: self._parse_set_item_assignment("VARIABLE"), 504 } 505 506 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 507 index = self._index 508 if not self._match_text_seq("LAMBDA"): 509 return super()._parse_lambda(alias=alias) 510 511 expressions = self._parse_csv(self._parse_lambda_arg) 512 if not self._match(TokenType.COLON): 513 self._retreat(index) 514 return None 515 516 this = self._replace_lambda(self._parse_assignment(), expressions) 517 return self.expression(exp.Lambda, this=this, expressions=expressions, colon=True) 518 519 def _parse_expression(self) -> t.Optional[exp.Expression]: 520 # DuckDB supports prefix aliases, e.g. foo: 1 521 if self._next and self._next.token_type == TokenType.COLON: 522 alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) 523 self._match(TokenType.COLON) 524 comments = self._prev_comments or [] 525 526 this = self._parse_assignment() 527 if isinstance(this, exp.Expression): 528 # Moves the comment next to the alias in `alias: expr /* comment */` 529 comments += this.pop_comments() or [] 530 531 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 532 533 return super()._parse_expression() 534 535 def _parse_table( 536 self, 537 schema: bool = False, 538 joins: bool = False, 539 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 540 parse_bracket: bool = False, 541 is_db_reference: bool = False, 542 parse_partition: bool = False, 543 consume_pipe: bool = False, 544 ) -> t.Optional[exp.Expression]: 545 # DuckDB supports prefix aliases, e.g. FROM foo: bar 546 if self._next and self._next.token_type == TokenType.COLON: 547 alias = self._parse_table_alias( 548 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 549 ) 550 self._match(TokenType.COLON) 551 comments = self._prev_comments or [] 552 else: 553 alias = None 554 comments = [] 555 556 table = super()._parse_table( 557 schema=schema, 558 joins=joins, 559 alias_tokens=alias_tokens, 560 parse_bracket=parse_bracket, 561 is_db_reference=is_db_reference, 562 parse_partition=parse_partition, 563 ) 564 if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias): 565 # Moves the comment next to the alias in `alias: table /* comment */` 566 comments += table.pop_comments() or [] 567 alias.comments = alias.pop_comments() + comments 568 table.set("alias", alias) 569 570 return table 571 572 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 573 # https://duckdb.org/docs/sql/samples.html 574 sample = super()._parse_table_sample(as_modifier=as_modifier) 575 if sample and not sample.args.get("method"): 576 if sample.args.get("size"): 577 sample.set("method", exp.var("RESERVOIR")) 578 else: 579 sample.set("method", exp.var("SYSTEM")) 580 581 return sample 582 583 def _parse_bracket( 584 self, this: t.Optional[exp.Expression] = None 585 ) -> t.Optional[exp.Expression]: 586 bracket = super()._parse_bracket(this) 587 588 if self.dialect.version < Version("1.2.0") and isinstance(bracket, exp.Bracket): 589 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 590 bracket.set("returns_list_for_maps", True) 591 592 return bracket 593 594 def _parse_map(self) -> exp.ToMap | exp.Map: 595 if self._match(TokenType.L_BRACE, advance=False): 596 return self.expression(exp.ToMap, this=self._parse_bracket()) 597 598 args = self._parse_wrapped_csv(self._parse_assignment) 599 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 600 601 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 602 return self._parse_field_def() 603 604 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 605 if len(aggregations) == 1: 606 return super()._pivot_column_names(aggregations) 607 return pivot_column_names(aggregations, dialect="duckdb") 608 609 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 610 def _parse_attach_option() -> exp.AttachOption: 611 return self.expression( 612 exp.AttachOption, 613 this=self._parse_var(any_token=True), 614 expression=self._parse_field(any_token=True), 615 ) 616 617 self._match(TokenType.DATABASE) 618 exists = self._parse_exists(not_=is_attach) 619 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 620 621 if self._match(TokenType.L_PAREN, advance=False): 622 expressions = self._parse_wrapped_csv(_parse_attach_option) 623 else: 624 expressions = None 625 626 return ( 627 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 628 if is_attach 629 else self.expression(exp.Detach, this=this, exists=exists) 630 ) 631 632 def _parse_show_duckdb(self, this: str) -> exp.Show: 633 return self.expression(exp.Show, this=this) 634 635 def _parse_primary(self) -> t.Optional[exp.Expression]: 636 if self._match_pair(TokenType.HASH, TokenType.NUMBER): 637 return exp.PositionalColumn(this=exp.Literal.number(self._prev.text)) 638 639 return super()._parse_primary() 640 641 class Generator(generator.Generator): 642 PARAMETER_TOKEN = "$" 643 NAMED_PLACEHOLDER_TOKEN = "$" 644 JOIN_HINTS = False 645 TABLE_HINTS = False 646 QUERY_HINTS = False 647 LIMIT_FETCH = "LIMIT" 648 STRUCT_DELIMITER = ("(", ")") 649 RENAME_TABLE_WITH_DB = False 650 NVL2_SUPPORTED = False 651 SEMI_ANTI_JOIN_WITH_SIDE = False 652 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 653 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 654 LAST_DAY_SUPPORTS_DATE_PART = False 655 JSON_KEY_VALUE_PAIR_SEP = "," 656 IGNORE_NULLS_IN_FUNC = True 657 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 658 SUPPORTS_CREATE_TABLE_LIKE = False 659 MULTI_ARG_DISTINCT = False 660 CAN_IMPLEMENT_ARRAY_ANY = True 661 SUPPORTS_TO_NUMBER = False 662 SUPPORTS_WINDOW_EXCLUDE = True 663 COPY_HAS_INTO_KEYWORD = False 664 STAR_EXCEPT = "EXCLUDE" 665 PAD_FILL_PATTERN_IS_REQUIRED = True 666 ARRAY_CONCAT_IS_VAR_LEN = False 667 ARRAY_SIZE_DIM_REQUIRED = False 668 NORMALIZE_EXTRACT_DATE_PARTS = True 669 SUPPORTS_LIKE_QUANTIFIERS = False 670 671 TRANSFORMS = { 672 **generator.Generator.TRANSFORMS, 673 exp.ApproxDistinct: approx_count_distinct_sql, 674 exp.Array: inline_array_unless_query, 675 exp.ArrayFilter: rename_func("LIST_FILTER"), 676 exp.ArrayRemove: remove_from_array_using_filter, 677 exp.ArraySort: _array_sort_sql, 678 exp.ArraySum: rename_func("LIST_SUM"), 679 exp.BitwiseXor: rename_func("XOR"), 680 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 681 exp.CurrentDate: lambda *_: "CURRENT_DATE", 682 exp.CurrentTime: lambda *_: "CURRENT_TIME", 683 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 684 exp.DayOfMonth: rename_func("DAYOFMONTH"), 685 exp.DayOfWeek: rename_func("DAYOFWEEK"), 686 exp.DayOfWeekIso: rename_func("ISODOW"), 687 exp.DayOfYear: rename_func("DAYOFYEAR"), 688 exp.DataType: _datatype_sql, 689 exp.Date: _date_sql, 690 exp.DateAdd: _date_delta_sql, 691 exp.DateFromParts: rename_func("MAKE_DATE"), 692 exp.DateSub: _date_delta_sql, 693 exp.DateDiff: _date_diff_sql, 694 exp.DateStrToDate: datestrtodate_sql, 695 exp.Datetime: no_datetime_sql, 696 exp.DatetimeSub: _date_delta_sql, 697 exp.DatetimeAdd: _date_delta_sql, 698 exp.DateToDi: lambda self, 699 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 700 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 701 exp.DiToDate: lambda self, 702 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 703 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 704 exp.GenerateDateArray: _generate_datetime_array_sql, 705 exp.GenerateTimestampArray: _generate_datetime_array_sql, 706 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False), 707 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 708 exp.Explode: rename_func("UNNEST"), 709 exp.IntDiv: lambda self, e: self.binary(e, "//"), 710 exp.IsInf: rename_func("ISINF"), 711 exp.IsNan: rename_func("ISNAN"), 712 exp.JSONBExists: rename_func("JSON_EXISTS"), 713 exp.JSONExtract: _arrow_json_extract_sql, 714 exp.JSONExtractArray: _json_extract_value_array_sql, 715 exp.JSONExtractScalar: _arrow_json_extract_sql, 716 exp.JSONFormat: _json_format_sql, 717 exp.JSONValueArray: _json_extract_value_array_sql, 718 exp.Lateral: explode_to_unnest_sql, 719 exp.LogicalOr: rename_func("BOOL_OR"), 720 exp.LogicalAnd: rename_func("BOOL_AND"), 721 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 722 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 723 exp.MonthsBetween: lambda self, e: self.func( 724 "DATEDIFF", 725 "'month'", 726 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 727 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 728 ), 729 exp.PercentileCont: rename_func("QUANTILE_CONT"), 730 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 731 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 732 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 733 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 734 exp.RegexpReplace: lambda self, e: self.func( 735 "REGEXP_REPLACE", 736 e.this, 737 e.expression, 738 e.args.get("replacement"), 739 e.args.get("modifiers"), 740 ), 741 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 742 exp.RegexpILike: lambda self, e: self.func( 743 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 744 ), 745 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 746 exp.Return: lambda self, e: self.sql(e, "this"), 747 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 748 exp.Rand: rename_func("RANDOM"), 749 exp.SHA: rename_func("SHA1"), 750 exp.SHA2: sha256_sql, 751 exp.Split: rename_func("STR_SPLIT"), 752 exp.SortArray: _sort_array_sql, 753 exp.StrPosition: strposition_sql, 754 exp.StrToUnix: lambda self, e: self.func( 755 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 756 ), 757 exp.Struct: _struct_sql, 758 exp.Transform: rename_func("LIST_TRANSFORM"), 759 exp.TimeAdd: _date_delta_sql, 760 exp.Time: no_time_sql, 761 exp.TimeDiff: _timediff_sql, 762 exp.Timestamp: no_timestamp_sql, 763 exp.TimestampDiff: lambda self, e: self.func( 764 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 765 ), 766 exp.TimestampTrunc: timestamptrunc_sql(), 767 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 768 exp.TimeStrToTime: timestrtotime_sql, 769 exp.TimeStrToUnix: lambda self, e: self.func( 770 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 771 ), 772 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 773 exp.TimeToUnix: rename_func("EPOCH"), 774 exp.TsOrDiToDi: lambda self, 775 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 776 exp.TsOrDsAdd: _date_delta_sql, 777 exp.TsOrDsDiff: lambda self, e: self.func( 778 "DATE_DIFF", 779 f"'{e.args.get('unit') or 'DAY'}'", 780 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 781 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 782 ), 783 exp.UnixToStr: lambda self, e: self.func( 784 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 785 ), 786 exp.DatetimeTrunc: lambda self, e: self.func( 787 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 788 ), 789 exp.UnixToTime: _unix_to_time_sql, 790 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 791 exp.VariancePop: rename_func("VAR_POP"), 792 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 793 exp.Xor: bool_xor_sql, 794 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 795 rename_func("LEVENSHTEIN") 796 ), 797 exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"), 798 exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"), 799 exp.DateBin: rename_func("TIME_BUCKET"), 800 } 801 802 SUPPORTED_JSON_PATH_PARTS = { 803 exp.JSONPathKey, 804 exp.JSONPathRoot, 805 exp.JSONPathSubscript, 806 exp.JSONPathWildcard, 807 } 808 809 TYPE_MAPPING = { 810 **generator.Generator.TYPE_MAPPING, 811 exp.DataType.Type.BINARY: "BLOB", 812 exp.DataType.Type.BPCHAR: "TEXT", 813 exp.DataType.Type.CHAR: "TEXT", 814 exp.DataType.Type.DATETIME: "TIMESTAMP", 815 exp.DataType.Type.FLOAT: "REAL", 816 exp.DataType.Type.JSONB: "JSON", 817 exp.DataType.Type.NCHAR: "TEXT", 818 exp.DataType.Type.NVARCHAR: "TEXT", 819 exp.DataType.Type.UINT: "UINTEGER", 820 exp.DataType.Type.VARBINARY: "BLOB", 821 exp.DataType.Type.ROWVERSION: "BLOB", 822 exp.DataType.Type.VARCHAR: "TEXT", 823 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 824 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 825 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 826 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 827 } 828 829 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 830 RESERVED_KEYWORDS = { 831 "array", 832 "analyse", 833 "union", 834 "all", 835 "when", 836 "in_p", 837 "default", 838 "create_p", 839 "window", 840 "asymmetric", 841 "to", 842 "else", 843 "localtime", 844 "from", 845 "end_p", 846 "select", 847 "current_date", 848 "foreign", 849 "with", 850 "grant", 851 "session_user", 852 "or", 853 "except", 854 "references", 855 "fetch", 856 "limit", 857 "group_p", 858 "leading", 859 "into", 860 "collate", 861 "offset", 862 "do", 863 "then", 864 "localtimestamp", 865 "check_p", 866 "lateral_p", 867 "current_role", 868 "where", 869 "asc_p", 870 "placing", 871 "desc_p", 872 "user", 873 "unique", 874 "initially", 875 "column", 876 "both", 877 "some", 878 "as", 879 "any", 880 "only", 881 "deferrable", 882 "null_p", 883 "current_time", 884 "true_p", 885 "table", 886 "case", 887 "trailing", 888 "variadic", 889 "for", 890 "on", 891 "distinct", 892 "false_p", 893 "not", 894 "constraint", 895 "current_timestamp", 896 "returning", 897 "primary", 898 "intersect", 899 "having", 900 "analyze", 901 "current_user", 902 "and", 903 "cast", 904 "symmetric", 905 "using", 906 "order", 907 "current_catalog", 908 } 909 910 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 911 912 # DuckDB doesn't generally support CREATE TABLE .. properties 913 # https://duckdb.org/docs/sql/statements/create_table.html 914 PROPERTIES_LOCATION = { 915 prop: exp.Properties.Location.UNSUPPORTED 916 for prop in generator.Generator.PROPERTIES_LOCATION 917 } 918 919 # There are a few exceptions (e.g. temporary tables) which are supported or 920 # can be transpiled to DuckDB, so we explicitly override them accordingly 921 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 922 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 923 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 924 PROPERTIES_LOCATION[exp.SequenceProperties] = exp.Properties.Location.POST_EXPRESSION 925 926 IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS = ( 927 exp.FirstValue, 928 exp.Lag, 929 exp.LastValue, 930 exp.Lead, 931 exp.NthValue, 932 ) 933 934 def lambda_sql( 935 self, expression: exp.Lambda, arrow_sep: str = "->", wrap: bool = True 936 ) -> str: 937 if expression.args.get("colon"): 938 prefix = "LAMBDA " 939 arrow_sep = ":" 940 wrap = False 941 else: 942 prefix = "" 943 944 lambda_sql = super().lambda_sql(expression, arrow_sep=arrow_sep, wrap=wrap) 945 return f"{prefix}{lambda_sql}" 946 947 def show_sql(self, expression: exp.Show) -> str: 948 return f"SHOW {expression.name}" 949 950 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 951 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 952 953 def strtotime_sql(self, expression: exp.StrToTime) -> str: 954 if expression.args.get("safe"): 955 formatted_time = self.format_time(expression) 956 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 957 return str_to_time_sql(self, expression) 958 959 def strtodate_sql(self, expression: exp.StrToDate) -> str: 960 if expression.args.get("safe"): 961 formatted_time = self.format_time(expression) 962 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 963 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 964 965 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 966 arg = expression.this 967 if expression.args.get("safe"): 968 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 969 return self.func("JSON", arg) 970 971 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 972 nano = expression.args.get("nano") 973 if nano is not None: 974 expression.set( 975 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 976 ) 977 978 return rename_func("MAKE_TIME")(self, expression) 979 980 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 981 sec = expression.args["sec"] 982 983 milli = expression.args.get("milli") 984 if milli is not None: 985 sec += milli.pop() / exp.Literal.number(1000.0) 986 987 nano = expression.args.get("nano") 988 if nano is not None: 989 sec += nano.pop() / exp.Literal.number(1000000000.0) 990 991 if milli or nano: 992 expression.set("sec", sec) 993 994 return rename_func("MAKE_TIMESTAMP")(self, expression) 995 996 def tablesample_sql( 997 self, 998 expression: exp.TableSample, 999 tablesample_keyword: t.Optional[str] = None, 1000 ) -> str: 1001 if not isinstance(expression.parent, exp.Select): 1002 # This sample clause only applies to a single source, not the entire resulting relation 1003 tablesample_keyword = "TABLESAMPLE" 1004 1005 if expression.args.get("size"): 1006 method = expression.args.get("method") 1007 if method and method.name.upper() != "RESERVOIR": 1008 self.unsupported( 1009 f"Sampling method {method} is not supported with a discrete sample count, " 1010 "defaulting to reservoir sampling" 1011 ) 1012 expression.set("method", exp.var("RESERVOIR")) 1013 1014 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 1015 1016 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 1017 if isinstance(expression.parent, exp.UserDefinedFunction): 1018 return self.sql(expression, "this") 1019 return super().columndef_sql(expression, sep) 1020 1021 def join_sql(self, expression: exp.Join) -> str: 1022 if ( 1023 expression.side == "LEFT" 1024 and not expression.args.get("on") 1025 and isinstance(expression.this, exp.Unnest) 1026 ): 1027 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 1028 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 1029 return super().join_sql(expression.on(exp.true())) 1030 1031 return super().join_sql(expression) 1032 1033 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 1034 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 1035 if expression.args.get("is_end_exclusive"): 1036 return rename_func("RANGE")(self, expression) 1037 1038 return self.function_fallback_sql(expression) 1039 1040 def countif_sql(self, expression: exp.CountIf) -> str: 1041 if self.dialect.version >= Version("1.2"): 1042 return self.function_fallback_sql(expression) 1043 1044 # https://github.com/tobymao/sqlglot/pull/4749 1045 return count_if_to_sum(self, expression) 1046 1047 def bracket_sql(self, expression: exp.Bracket) -> str: 1048 if self.dialect.version >= Version("1.2"): 1049 return super().bracket_sql(expression) 1050 1051 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1052 this = expression.this 1053 if isinstance(this, exp.Array): 1054 this.replace(exp.paren(this)) 1055 1056 bracket = super().bracket_sql(expression) 1057 1058 if not expression.args.get("returns_list_for_maps"): 1059 if not this.type: 1060 from sqlglot.optimizer.annotate_types import annotate_types 1061 1062 this = annotate_types(this, dialect=self.dialect) 1063 1064 if this.is_type(exp.DataType.Type.MAP): 1065 bracket = f"({bracket})[1]" 1066 1067 return bracket 1068 1069 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1070 expression_sql = self.sql(expression, "expression") 1071 1072 func = expression.this 1073 if isinstance(func, exp.PERCENTILES): 1074 # Make the order key the first arg and slide the fraction to the right 1075 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1076 order_col = expression.find(exp.Ordered) 1077 if order_col: 1078 func.set("expression", func.this) 1079 func.set("this", order_col.this) 1080 1081 this = self.sql(expression, "this").rstrip(")") 1082 1083 return f"{this}{expression_sql})" 1084 1085 def length_sql(self, expression: exp.Length) -> str: 1086 arg = expression.this 1087 1088 # Dialects like BQ and Snowflake also accept binary values as args, so 1089 # DDB will attempt to infer the type or resort to case/when resolution 1090 if not expression.args.get("binary") or arg.is_string: 1091 return self.func("LENGTH", arg) 1092 1093 if not arg.type: 1094 from sqlglot.optimizer.annotate_types import annotate_types 1095 1096 arg = annotate_types(arg, dialect=self.dialect) 1097 1098 if arg.is_type(*exp.DataType.TEXT_TYPES): 1099 return self.func("LENGTH", arg) 1100 1101 # We need these casts to make duckdb's static type checker happy 1102 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1103 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1104 1105 case = ( 1106 exp.case(self.func("TYPEOF", arg)) 1107 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1108 .else_( 1109 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1110 ) # anonymous to break length_sql recursion 1111 ) 1112 1113 return self.sql(case) 1114 1115 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1116 this = expression.this 1117 key = expression.args.get("key") 1118 key_sql = key.name if isinstance(key, exp.Expression) else "" 1119 value_sql = self.sql(expression, "value") 1120 1121 kv_sql = f"{key_sql} := {value_sql}" 1122 1123 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1124 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1125 if isinstance(this, exp.Struct) and not this.expressions: 1126 return self.func("STRUCT_PACK", kv_sql) 1127 1128 return self.func("STRUCT_INSERT", this, kv_sql) 1129 1130 def unnest_sql(self, expression: exp.Unnest) -> str: 1131 explode_array = expression.args.get("explode_array") 1132 if explode_array: 1133 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1134 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1135 expression.expressions.append( 1136 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1137 ) 1138 1139 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1140 alias = expression.args.get("alias") 1141 if isinstance(alias, exp.TableAlias): 1142 expression.set("alias", None) 1143 if alias.columns: 1144 alias = exp.TableAlias(this=seq_get(alias.columns, 0)) 1145 1146 unnest_sql = super().unnest_sql(expression) 1147 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1148 return self.sql(select) 1149 1150 return super().unnest_sql(expression) 1151 1152 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1153 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1154 # DuckDB should render IGNORE NULLS only for the general-purpose 1155 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1156 return super().ignorenulls_sql(expression) 1157 1158 if not isinstance(expression.this, exp.AnyValue): 1159 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 1160 1161 return self.sql(expression, "this") 1162 1163 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 1164 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1165 # DuckDB should render RESPECT NULLS only for the general-purpose 1166 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 1167 return super().respectnulls_sql(expression) 1168 1169 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 1170 return self.sql(expression, "this") 1171 1172 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1173 this = self.sql(expression, "this") 1174 null_text = self.sql(expression, "null") 1175 1176 if null_text: 1177 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1178 1179 return self.func("ARRAY_TO_STRING", this, expression.expression) 1180 1181 @unsupported_args("position", "occurrence") 1182 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1183 group = expression.args.get("group") 1184 params = expression.args.get("parameters") 1185 1186 # Do not render group if there is no following argument, 1187 # and it's the default value for this dialect 1188 if ( 1189 not params 1190 and group 1191 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1192 ): 1193 group = None 1194 return self.func( 1195 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1196 ) 1197 1198 @unsupported_args("culture") 1199 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1200 fmt = expression.args.get("format") 1201 if fmt and fmt.is_int: 1202 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1203 1204 self.unsupported("Only integer formats are supported by NumberToStr") 1205 return self.function_fallback_sql(expression) 1206 1207 def autoincrementcolumnconstraint_sql(self, _) -> str: 1208 self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB") 1209 return "" 1210 1211 def aliases_sql(self, expression: exp.Aliases) -> str: 1212 this = expression.this 1213 if isinstance(this, exp.Posexplode): 1214 return self.posexplode_sql(this) 1215 1216 return super().aliases_sql(expression) 1217 1218 def posexplode_sql(self, expression: exp.Posexplode) -> str: 1219 this = expression.this 1220 parent = expression.parent 1221 1222 # The default Spark aliases are "pos" and "col", unless specified otherwise 1223 pos, col = exp.to_identifier("pos"), exp.to_identifier("col") 1224 1225 if isinstance(parent, exp.Aliases): 1226 # Column case: SELECT POSEXPLODE(col) [AS (a, b)] 1227 pos, col = parent.expressions 1228 elif isinstance(parent, exp.Table): 1229 # Table case: SELECT * FROM POSEXPLODE(col) [AS (a, b)] 1230 alias = parent.args.get("alias") 1231 if alias: 1232 pos, col = alias.columns or [pos, col] 1233 alias.pop() 1234 1235 # Translate POSEXPLODE to UNNEST + GENERATE_SUBSCRIPTS 1236 # Note: In Spark pos is 0-indexed, but in DuckDB it's 1-indexed, so we subtract 1 from GENERATE_SUBSCRIPTS 1237 unnest_sql = self.sql(exp.Unnest(expressions=[this], alias=col)) 1238 gen_subscripts = self.sql( 1239 exp.Alias( 1240 this=exp.Anonymous( 1241 this="GENERATE_SUBSCRIPTS", expressions=[this, exp.Literal.number(1)] 1242 ) 1243 - exp.Literal.number(1), 1244 alias=pos, 1245 ) 1246 ) 1247 1248 posexplode_sql = self.format_args(gen_subscripts, unnest_sql) 1249 1250 if isinstance(parent, exp.From) or (parent and isinstance(parent.parent, exp.From)): 1251 # SELECT * FROM POSEXPLODE(col) -> SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(...), UNNEST(...)) 1252 return self.sql(exp.Subquery(this=exp.Select(expressions=[posexplode_sql]))) 1253 1254 return posexplode_sql 1255 1256 def addmonths_sql(self, expression: exp.AddMonths) -> str: 1257 this = expression.this 1258 1259 if not this.type: 1260 from sqlglot.optimizer.annotate_types import annotate_types 1261 1262 this = annotate_types(this, dialect=self.dialect) 1263 1264 if this.is_type(*exp.DataType.TEXT_TYPES): 1265 this = exp.Cast(this=this, to=exp.DataType(this=exp.DataType.Type.TIMESTAMP)) 1266 1267 func = self.func( 1268 "DATE_ADD", this, exp.Interval(this=expression.expression, unit=exp.var("MONTH")) 1269 ) 1270 1271 # DuckDB's DATE_ADD function returns TIMESTAMP/DATETIME by default, even when the input is DATE 1272 # To match for example Snowflake's ADD_MONTHS behavior (which preserves the input type) 1273 # We need to cast the result back to the original type when the input is DATE or TIMESTAMPTZ 1274 # Example: ADD_MONTHS('2023-01-31'::date, 1) should return DATE, not TIMESTAMP 1275 if this.is_type(exp.DataType.Type.DATE, exp.DataType.Type.TIMESTAMPTZ): 1276 return self.sql(exp.Cast(this=func, to=this.type)) 1277 1278 return self.sql(func)
288class DuckDB(Dialect): 289 NULL_ORDERING = "nulls_are_last" 290 SUPPORTS_USER_DEFINED_TYPES = True 291 SAFE_DIVISION = True 292 INDEX_OFFSET = 1 293 CONCAT_COALESCE = True 294 SUPPORTS_ORDER_BY_ALL = True 295 SUPPORTS_FIXED_SIZE_ARRAYS = True 296 STRICT_JSON_PATH_SYNTAX = False 297 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 298 299 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 300 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 301 302 DATE_PART_MAPPING = { 303 **Dialect.DATE_PART_MAPPING, 304 "DAYOFWEEKISO": "ISODOW", 305 } 306 DATE_PART_MAPPING.pop("WEEKDAY") 307 308 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 309 if isinstance(path, exp.Literal): 310 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 311 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 312 # This check ensures we'll avoid trying to parse these as JSON paths, which can 313 # either result in a noisy warning or in an invalid representation of the path. 314 path_text = path.name 315 if path_text.startswith("/") or "[#" in path_text: 316 return path 317 318 return super().to_json_path(path) 319 320 class Tokenizer(tokens.Tokenizer): 321 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 322 HEREDOC_STRINGS = ["$"] 323 324 HEREDOC_TAG_IS_IDENTIFIER = True 325 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 326 327 KEYWORDS = { 328 **tokens.Tokenizer.KEYWORDS, 329 "//": TokenType.DIV, 330 "**": TokenType.DSTAR, 331 "^@": TokenType.CARET_AT, 332 "@>": TokenType.AT_GT, 333 "<@": TokenType.LT_AT, 334 "ATTACH": TokenType.ATTACH, 335 "BINARY": TokenType.VARBINARY, 336 "BITSTRING": TokenType.BIT, 337 "BPCHAR": TokenType.TEXT, 338 "CHAR": TokenType.TEXT, 339 "DATETIME": TokenType.TIMESTAMPNTZ, 340 "DETACH": TokenType.DETACH, 341 "EXCLUDE": TokenType.EXCEPT, 342 "LOGICAL": TokenType.BOOLEAN, 343 "ONLY": TokenType.ONLY, 344 "PIVOT_WIDER": TokenType.PIVOT, 345 "POSITIONAL": TokenType.POSITIONAL, 346 "RESET": TokenType.COMMAND, 347 "SIGNED": TokenType.INT, 348 "STRING": TokenType.TEXT, 349 "SUMMARIZE": TokenType.SUMMARIZE, 350 "TIMESTAMP": TokenType.TIMESTAMPNTZ, 351 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 352 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 353 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 354 "TIMESTAMP_US": TokenType.TIMESTAMP, 355 "UBIGINT": TokenType.UBIGINT, 356 "UINTEGER": TokenType.UINT, 357 "USMALLINT": TokenType.USMALLINT, 358 "UTINYINT": TokenType.UTINYINT, 359 "VARCHAR": TokenType.TEXT, 360 } 361 KEYWORDS.pop("/*+") 362 363 SINGLE_TOKENS = { 364 **tokens.Tokenizer.SINGLE_TOKENS, 365 "$": TokenType.PARAMETER, 366 } 367 368 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 369 370 class Parser(parser.Parser): 371 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = True 372 373 BITWISE = { 374 **parser.Parser.BITWISE, 375 TokenType.TILDA: exp.RegexpLike, 376 } 377 BITWISE.pop(TokenType.CARET) 378 379 RANGE_PARSERS = { 380 **parser.Parser.RANGE_PARSERS, 381 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 382 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 383 } 384 385 EXPONENT = { 386 **parser.Parser.EXPONENT, 387 TokenType.CARET: exp.Pow, 388 TokenType.DSTAR: exp.Pow, 389 } 390 391 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 392 393 SHOW_PARSERS = { 394 "TABLES": _show_parser("TABLES"), 395 "ALL TABLES": _show_parser("ALL TABLES"), 396 } 397 398 FUNCTIONS = { 399 **parser.Parser.FUNCTIONS, 400 "ANY_VALUE": lambda args: exp.IgnoreNulls(this=exp.AnyValue.from_arg_list(args)), 401 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 402 "ARRAY_SORT": exp.SortArray.from_arg_list, 403 "DATEDIFF": _build_date_diff, 404 "DATE_DIFF": _build_date_diff, 405 "DATE_TRUNC": date_trunc_to_time, 406 "DATETRUNC": date_trunc_to_time, 407 "DECODE": lambda args: exp.Decode( 408 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 409 ), 410 "EDITDIST3": exp.Levenshtein.from_arg_list, 411 "ENCODE": lambda args: exp.Encode( 412 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 413 ), 414 "EPOCH": exp.TimeToUnix.from_arg_list, 415 "EPOCH_MS": lambda args: exp.UnixToTime( 416 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 417 ), 418 "GENERATE_SERIES": _build_generate_series(), 419 "JSON": exp.ParseJSON.from_arg_list, 420 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 421 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 422 "LIST_CONTAINS": exp.ArrayContains.from_arg_list, 423 "LIST_HAS": exp.ArrayContains.from_arg_list, 424 "LIST_HAS_ANY": exp.ArrayOverlaps.from_arg_list, 425 "LIST_REVERSE_SORT": _build_sort_array_desc, 426 "LIST_SORT": exp.SortArray.from_arg_list, 427 "LIST_VALUE": lambda args: exp.Array(expressions=args), 428 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 429 "MAKE_TIMESTAMP": _build_make_timestamp, 430 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 431 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 432 "RANGE": _build_generate_series(end_exclusive=True), 433 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 434 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 435 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 436 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 437 this=seq_get(args, 0), 438 expression=seq_get(args, 1), 439 replacement=seq_get(args, 2), 440 modifiers=seq_get(args, 3), 441 ), 442 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 443 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 444 "STRING_SPLIT": exp.Split.from_arg_list, 445 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 446 "STRING_TO_ARRAY": exp.Split.from_arg_list, 447 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 448 "STRUCT_PACK": exp.Struct.from_arg_list, 449 "STR_SPLIT": exp.Split.from_arg_list, 450 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 451 "TIME_BUCKET": exp.DateBin.from_arg_list, 452 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 453 "UNNEST": exp.Explode.from_arg_list, 454 "XOR": binary_from_function(exp.BitwiseXor), 455 } 456 457 FUNCTIONS.pop("DATE_SUB") 458 FUNCTIONS.pop("GLOB") 459 460 FUNCTION_PARSERS = { 461 **parser.Parser.FUNCTION_PARSERS, 462 **dict.fromkeys( 463 ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() 464 ), 465 } 466 FUNCTION_PARSERS.pop("DECODE") 467 468 NO_PAREN_FUNCTION_PARSERS = { 469 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 470 "MAP": lambda self: self._parse_map(), 471 "@": lambda self: exp.Abs(this=self._parse_bitwise()), 472 } 473 474 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 475 TokenType.SEMI, 476 TokenType.ANTI, 477 } 478 479 PLACEHOLDER_PARSERS = { 480 **parser.Parser.PLACEHOLDER_PARSERS, 481 TokenType.PARAMETER: lambda self: ( 482 self.expression(exp.Placeholder, this=self._prev.text) 483 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 484 else None 485 ), 486 } 487 488 TYPE_CONVERTERS = { 489 # https://duckdb.org/docs/sql/data_types/numeric 490 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 491 # https://duckdb.org/docs/sql/data_types/text 492 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 493 } 494 495 STATEMENT_PARSERS = { 496 **parser.Parser.STATEMENT_PARSERS, 497 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 498 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 499 TokenType.SHOW: lambda self: self._parse_show(), 500 } 501 502 SET_PARSERS = { 503 **parser.Parser.SET_PARSERS, 504 "VARIABLE": lambda self: self._parse_set_item_assignment("VARIABLE"), 505 } 506 507 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 508 index = self._index 509 if not self._match_text_seq("LAMBDA"): 510 return super()._parse_lambda(alias=alias) 511 512 expressions = self._parse_csv(self._parse_lambda_arg) 513 if not self._match(TokenType.COLON): 514 self._retreat(index) 515 return None 516 517 this = self._replace_lambda(self._parse_assignment(), expressions) 518 return self.expression(exp.Lambda, this=this, expressions=expressions, colon=True) 519 520 def _parse_expression(self) -> t.Optional[exp.Expression]: 521 # DuckDB supports prefix aliases, e.g. foo: 1 522 if self._next and self._next.token_type == TokenType.COLON: 523 alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) 524 self._match(TokenType.COLON) 525 comments = self._prev_comments or [] 526 527 this = self._parse_assignment() 528 if isinstance(this, exp.Expression): 529 # Moves the comment next to the alias in `alias: expr /* comment */` 530 comments += this.pop_comments() or [] 531 532 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 533 534 return super()._parse_expression() 535 536 def _parse_table( 537 self, 538 schema: bool = False, 539 joins: bool = False, 540 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 541 parse_bracket: bool = False, 542 is_db_reference: bool = False, 543 parse_partition: bool = False, 544 consume_pipe: bool = False, 545 ) -> t.Optional[exp.Expression]: 546 # DuckDB supports prefix aliases, e.g. FROM foo: bar 547 if self._next and self._next.token_type == TokenType.COLON: 548 alias = self._parse_table_alias( 549 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 550 ) 551 self._match(TokenType.COLON) 552 comments = self._prev_comments or [] 553 else: 554 alias = None 555 comments = [] 556 557 table = super()._parse_table( 558 schema=schema, 559 joins=joins, 560 alias_tokens=alias_tokens, 561 parse_bracket=parse_bracket, 562 is_db_reference=is_db_reference, 563 parse_partition=parse_partition, 564 ) 565 if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias): 566 # Moves the comment next to the alias in `alias: table /* comment */` 567 comments += table.pop_comments() or [] 568 alias.comments = alias.pop_comments() + comments 569 table.set("alias", alias) 570 571 return table 572 573 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 574 # https://duckdb.org/docs/sql/samples.html 575 sample = super()._parse_table_sample(as_modifier=as_modifier) 576 if sample and not sample.args.get("method"): 577 if sample.args.get("size"): 578 sample.set("method", exp.var("RESERVOIR")) 579 else: 580 sample.set("method", exp.var("SYSTEM")) 581 582 return sample 583 584 def _parse_bracket( 585 self, this: t.Optional[exp.Expression] = None 586 ) -> t.Optional[exp.Expression]: 587 bracket = super()._parse_bracket(this) 588 589 if self.dialect.version < Version("1.2.0") and isinstance(bracket, exp.Bracket): 590 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 591 bracket.set("returns_list_for_maps", True) 592 593 return bracket 594 595 def _parse_map(self) -> exp.ToMap | exp.Map: 596 if self._match(TokenType.L_BRACE, advance=False): 597 return self.expression(exp.ToMap, this=self._parse_bracket()) 598 599 args = self._parse_wrapped_csv(self._parse_assignment) 600 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 601 602 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 603 return self._parse_field_def() 604 605 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 606 if len(aggregations) == 1: 607 return super()._pivot_column_names(aggregations) 608 return pivot_column_names(aggregations, dialect="duckdb") 609 610 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 611 def _parse_attach_option() -> exp.AttachOption: 612 return self.expression( 613 exp.AttachOption, 614 this=self._parse_var(any_token=True), 615 expression=self._parse_field(any_token=True), 616 ) 617 618 self._match(TokenType.DATABASE) 619 exists = self._parse_exists(not_=is_attach) 620 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 621 622 if self._match(TokenType.L_PAREN, advance=False): 623 expressions = self._parse_wrapped_csv(_parse_attach_option) 624 else: 625 expressions = None 626 627 return ( 628 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 629 if is_attach 630 else self.expression(exp.Detach, this=this, exists=exists) 631 ) 632 633 def _parse_show_duckdb(self, this: str) -> exp.Show: 634 return self.expression(exp.Show, this=this) 635 636 def _parse_primary(self) -> t.Optional[exp.Expression]: 637 if self._match_pair(TokenType.HASH, TokenType.NUMBER): 638 return exp.PositionalColumn(this=exp.Literal.number(self._prev.text)) 639 640 return super()._parse_primary() 641 642 class Generator(generator.Generator): 643 PARAMETER_TOKEN = "$" 644 NAMED_PLACEHOLDER_TOKEN = "$" 645 JOIN_HINTS = False 646 TABLE_HINTS = False 647 QUERY_HINTS = False 648 LIMIT_FETCH = "LIMIT" 649 STRUCT_DELIMITER = ("(", ")") 650 RENAME_TABLE_WITH_DB = False 651 NVL2_SUPPORTED = False 652 SEMI_ANTI_JOIN_WITH_SIDE = False 653 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 654 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 655 LAST_DAY_SUPPORTS_DATE_PART = False 656 JSON_KEY_VALUE_PAIR_SEP = "," 657 IGNORE_NULLS_IN_FUNC = True 658 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 659 SUPPORTS_CREATE_TABLE_LIKE = False 660 MULTI_ARG_DISTINCT = False 661 CAN_IMPLEMENT_ARRAY_ANY = True 662 SUPPORTS_TO_NUMBER = False 663 SUPPORTS_WINDOW_EXCLUDE = True 664 COPY_HAS_INTO_KEYWORD = False 665 STAR_EXCEPT = "EXCLUDE" 666 PAD_FILL_PATTERN_IS_REQUIRED = True 667 ARRAY_CONCAT_IS_VAR_LEN = False 668 ARRAY_SIZE_DIM_REQUIRED = False 669 NORMALIZE_EXTRACT_DATE_PARTS = True 670 SUPPORTS_LIKE_QUANTIFIERS = False 671 672 TRANSFORMS = { 673 **generator.Generator.TRANSFORMS, 674 exp.ApproxDistinct: approx_count_distinct_sql, 675 exp.Array: inline_array_unless_query, 676 exp.ArrayFilter: rename_func("LIST_FILTER"), 677 exp.ArrayRemove: remove_from_array_using_filter, 678 exp.ArraySort: _array_sort_sql, 679 exp.ArraySum: rename_func("LIST_SUM"), 680 exp.BitwiseXor: rename_func("XOR"), 681 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 682 exp.CurrentDate: lambda *_: "CURRENT_DATE", 683 exp.CurrentTime: lambda *_: "CURRENT_TIME", 684 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 685 exp.DayOfMonth: rename_func("DAYOFMONTH"), 686 exp.DayOfWeek: rename_func("DAYOFWEEK"), 687 exp.DayOfWeekIso: rename_func("ISODOW"), 688 exp.DayOfYear: rename_func("DAYOFYEAR"), 689 exp.DataType: _datatype_sql, 690 exp.Date: _date_sql, 691 exp.DateAdd: _date_delta_sql, 692 exp.DateFromParts: rename_func("MAKE_DATE"), 693 exp.DateSub: _date_delta_sql, 694 exp.DateDiff: _date_diff_sql, 695 exp.DateStrToDate: datestrtodate_sql, 696 exp.Datetime: no_datetime_sql, 697 exp.DatetimeSub: _date_delta_sql, 698 exp.DatetimeAdd: _date_delta_sql, 699 exp.DateToDi: lambda self, 700 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 701 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 702 exp.DiToDate: lambda self, 703 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 704 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 705 exp.GenerateDateArray: _generate_datetime_array_sql, 706 exp.GenerateTimestampArray: _generate_datetime_array_sql, 707 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False), 708 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 709 exp.Explode: rename_func("UNNEST"), 710 exp.IntDiv: lambda self, e: self.binary(e, "//"), 711 exp.IsInf: rename_func("ISINF"), 712 exp.IsNan: rename_func("ISNAN"), 713 exp.JSONBExists: rename_func("JSON_EXISTS"), 714 exp.JSONExtract: _arrow_json_extract_sql, 715 exp.JSONExtractArray: _json_extract_value_array_sql, 716 exp.JSONExtractScalar: _arrow_json_extract_sql, 717 exp.JSONFormat: _json_format_sql, 718 exp.JSONValueArray: _json_extract_value_array_sql, 719 exp.Lateral: explode_to_unnest_sql, 720 exp.LogicalOr: rename_func("BOOL_OR"), 721 exp.LogicalAnd: rename_func("BOOL_AND"), 722 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 723 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 724 exp.MonthsBetween: lambda self, e: self.func( 725 "DATEDIFF", 726 "'month'", 727 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 728 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 729 ), 730 exp.PercentileCont: rename_func("QUANTILE_CONT"), 731 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 732 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 733 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 734 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 735 exp.RegexpReplace: lambda self, e: self.func( 736 "REGEXP_REPLACE", 737 e.this, 738 e.expression, 739 e.args.get("replacement"), 740 e.args.get("modifiers"), 741 ), 742 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 743 exp.RegexpILike: lambda self, e: self.func( 744 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 745 ), 746 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 747 exp.Return: lambda self, e: self.sql(e, "this"), 748 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 749 exp.Rand: rename_func("RANDOM"), 750 exp.SHA: rename_func("SHA1"), 751 exp.SHA2: sha256_sql, 752 exp.Split: rename_func("STR_SPLIT"), 753 exp.SortArray: _sort_array_sql, 754 exp.StrPosition: strposition_sql, 755 exp.StrToUnix: lambda self, e: self.func( 756 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 757 ), 758 exp.Struct: _struct_sql, 759 exp.Transform: rename_func("LIST_TRANSFORM"), 760 exp.TimeAdd: _date_delta_sql, 761 exp.Time: no_time_sql, 762 exp.TimeDiff: _timediff_sql, 763 exp.Timestamp: no_timestamp_sql, 764 exp.TimestampDiff: lambda self, e: self.func( 765 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 766 ), 767 exp.TimestampTrunc: timestamptrunc_sql(), 768 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 769 exp.TimeStrToTime: timestrtotime_sql, 770 exp.TimeStrToUnix: lambda self, e: self.func( 771 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 772 ), 773 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 774 exp.TimeToUnix: rename_func("EPOCH"), 775 exp.TsOrDiToDi: lambda self, 776 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 777 exp.TsOrDsAdd: _date_delta_sql, 778 exp.TsOrDsDiff: lambda self, e: self.func( 779 "DATE_DIFF", 780 f"'{e.args.get('unit') or 'DAY'}'", 781 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 782 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 783 ), 784 exp.UnixToStr: lambda self, e: self.func( 785 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 786 ), 787 exp.DatetimeTrunc: lambda self, e: self.func( 788 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 789 ), 790 exp.UnixToTime: _unix_to_time_sql, 791 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 792 exp.VariancePop: rename_func("VAR_POP"), 793 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 794 exp.Xor: bool_xor_sql, 795 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 796 rename_func("LEVENSHTEIN") 797 ), 798 exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"), 799 exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"), 800 exp.DateBin: rename_func("TIME_BUCKET"), 801 } 802 803 SUPPORTED_JSON_PATH_PARTS = { 804 exp.JSONPathKey, 805 exp.JSONPathRoot, 806 exp.JSONPathSubscript, 807 exp.JSONPathWildcard, 808 } 809 810 TYPE_MAPPING = { 811 **generator.Generator.TYPE_MAPPING, 812 exp.DataType.Type.BINARY: "BLOB", 813 exp.DataType.Type.BPCHAR: "TEXT", 814 exp.DataType.Type.CHAR: "TEXT", 815 exp.DataType.Type.DATETIME: "TIMESTAMP", 816 exp.DataType.Type.FLOAT: "REAL", 817 exp.DataType.Type.JSONB: "JSON", 818 exp.DataType.Type.NCHAR: "TEXT", 819 exp.DataType.Type.NVARCHAR: "TEXT", 820 exp.DataType.Type.UINT: "UINTEGER", 821 exp.DataType.Type.VARBINARY: "BLOB", 822 exp.DataType.Type.ROWVERSION: "BLOB", 823 exp.DataType.Type.VARCHAR: "TEXT", 824 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 825 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 826 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 827 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 828 } 829 830 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 831 RESERVED_KEYWORDS = { 832 "array", 833 "analyse", 834 "union", 835 "all", 836 "when", 837 "in_p", 838 "default", 839 "create_p", 840 "window", 841 "asymmetric", 842 "to", 843 "else", 844 "localtime", 845 "from", 846 "end_p", 847 "select", 848 "current_date", 849 "foreign", 850 "with", 851 "grant", 852 "session_user", 853 "or", 854 "except", 855 "references", 856 "fetch", 857 "limit", 858 "group_p", 859 "leading", 860 "into", 861 "collate", 862 "offset", 863 "do", 864 "then", 865 "localtimestamp", 866 "check_p", 867 "lateral_p", 868 "current_role", 869 "where", 870 "asc_p", 871 "placing", 872 "desc_p", 873 "user", 874 "unique", 875 "initially", 876 "column", 877 "both", 878 "some", 879 "as", 880 "any", 881 "only", 882 "deferrable", 883 "null_p", 884 "current_time", 885 "true_p", 886 "table", 887 "case", 888 "trailing", 889 "variadic", 890 "for", 891 "on", 892 "distinct", 893 "false_p", 894 "not", 895 "constraint", 896 "current_timestamp", 897 "returning", 898 "primary", 899 "intersect", 900 "having", 901 "analyze", 902 "current_user", 903 "and", 904 "cast", 905 "symmetric", 906 "using", 907 "order", 908 "current_catalog", 909 } 910 911 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 912 913 # DuckDB doesn't generally support CREATE TABLE .. properties 914 # https://duckdb.org/docs/sql/statements/create_table.html 915 PROPERTIES_LOCATION = { 916 prop: exp.Properties.Location.UNSUPPORTED 917 for prop in generator.Generator.PROPERTIES_LOCATION 918 } 919 920 # There are a few exceptions (e.g. temporary tables) which are supported or 921 # can be transpiled to DuckDB, so we explicitly override them accordingly 922 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 923 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 924 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 925 PROPERTIES_LOCATION[exp.SequenceProperties] = exp.Properties.Location.POST_EXPRESSION 926 927 IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS = ( 928 exp.FirstValue, 929 exp.Lag, 930 exp.LastValue, 931 exp.Lead, 932 exp.NthValue, 933 ) 934 935 def lambda_sql( 936 self, expression: exp.Lambda, arrow_sep: str = "->", wrap: bool = True 937 ) -> str: 938 if expression.args.get("colon"): 939 prefix = "LAMBDA " 940 arrow_sep = ":" 941 wrap = False 942 else: 943 prefix = "" 944 945 lambda_sql = super().lambda_sql(expression, arrow_sep=arrow_sep, wrap=wrap) 946 return f"{prefix}{lambda_sql}" 947 948 def show_sql(self, expression: exp.Show) -> str: 949 return f"SHOW {expression.name}" 950 951 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 952 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 953 954 def strtotime_sql(self, expression: exp.StrToTime) -> str: 955 if expression.args.get("safe"): 956 formatted_time = self.format_time(expression) 957 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 958 return str_to_time_sql(self, expression) 959 960 def strtodate_sql(self, expression: exp.StrToDate) -> str: 961 if expression.args.get("safe"): 962 formatted_time = self.format_time(expression) 963 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 964 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 965 966 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 967 arg = expression.this 968 if expression.args.get("safe"): 969 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 970 return self.func("JSON", arg) 971 972 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 973 nano = expression.args.get("nano") 974 if nano is not None: 975 expression.set( 976 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 977 ) 978 979 return rename_func("MAKE_TIME")(self, expression) 980 981 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 982 sec = expression.args["sec"] 983 984 milli = expression.args.get("milli") 985 if milli is not None: 986 sec += milli.pop() / exp.Literal.number(1000.0) 987 988 nano = expression.args.get("nano") 989 if nano is not None: 990 sec += nano.pop() / exp.Literal.number(1000000000.0) 991 992 if milli or nano: 993 expression.set("sec", sec) 994 995 return rename_func("MAKE_TIMESTAMP")(self, expression) 996 997 def tablesample_sql( 998 self, 999 expression: exp.TableSample, 1000 tablesample_keyword: t.Optional[str] = None, 1001 ) -> str: 1002 if not isinstance(expression.parent, exp.Select): 1003 # This sample clause only applies to a single source, not the entire resulting relation 1004 tablesample_keyword = "TABLESAMPLE" 1005 1006 if expression.args.get("size"): 1007 method = expression.args.get("method") 1008 if method and method.name.upper() != "RESERVOIR": 1009 self.unsupported( 1010 f"Sampling method {method} is not supported with a discrete sample count, " 1011 "defaulting to reservoir sampling" 1012 ) 1013 expression.set("method", exp.var("RESERVOIR")) 1014 1015 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 1016 1017 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 1018 if isinstance(expression.parent, exp.UserDefinedFunction): 1019 return self.sql(expression, "this") 1020 return super().columndef_sql(expression, sep) 1021 1022 def join_sql(self, expression: exp.Join) -> str: 1023 if ( 1024 expression.side == "LEFT" 1025 and not expression.args.get("on") 1026 and isinstance(expression.this, exp.Unnest) 1027 ): 1028 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 1029 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 1030 return super().join_sql(expression.on(exp.true())) 1031 1032 return super().join_sql(expression) 1033 1034 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 1035 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 1036 if expression.args.get("is_end_exclusive"): 1037 return rename_func("RANGE")(self, expression) 1038 1039 return self.function_fallback_sql(expression) 1040 1041 def countif_sql(self, expression: exp.CountIf) -> str: 1042 if self.dialect.version >= Version("1.2"): 1043 return self.function_fallback_sql(expression) 1044 1045 # https://github.com/tobymao/sqlglot/pull/4749 1046 return count_if_to_sum(self, expression) 1047 1048 def bracket_sql(self, expression: exp.Bracket) -> str: 1049 if self.dialect.version >= Version("1.2"): 1050 return super().bracket_sql(expression) 1051 1052 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1053 this = expression.this 1054 if isinstance(this, exp.Array): 1055 this.replace(exp.paren(this)) 1056 1057 bracket = super().bracket_sql(expression) 1058 1059 if not expression.args.get("returns_list_for_maps"): 1060 if not this.type: 1061 from sqlglot.optimizer.annotate_types import annotate_types 1062 1063 this = annotate_types(this, dialect=self.dialect) 1064 1065 if this.is_type(exp.DataType.Type.MAP): 1066 bracket = f"({bracket})[1]" 1067 1068 return bracket 1069 1070 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1071 expression_sql = self.sql(expression, "expression") 1072 1073 func = expression.this 1074 if isinstance(func, exp.PERCENTILES): 1075 # Make the order key the first arg and slide the fraction to the right 1076 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1077 order_col = expression.find(exp.Ordered) 1078 if order_col: 1079 func.set("expression", func.this) 1080 func.set("this", order_col.this) 1081 1082 this = self.sql(expression, "this").rstrip(")") 1083 1084 return f"{this}{expression_sql})" 1085 1086 def length_sql(self, expression: exp.Length) -> str: 1087 arg = expression.this 1088 1089 # Dialects like BQ and Snowflake also accept binary values as args, so 1090 # DDB will attempt to infer the type or resort to case/when resolution 1091 if not expression.args.get("binary") or arg.is_string: 1092 return self.func("LENGTH", arg) 1093 1094 if not arg.type: 1095 from sqlglot.optimizer.annotate_types import annotate_types 1096 1097 arg = annotate_types(arg, dialect=self.dialect) 1098 1099 if arg.is_type(*exp.DataType.TEXT_TYPES): 1100 return self.func("LENGTH", arg) 1101 1102 # We need these casts to make duckdb's static type checker happy 1103 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1104 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1105 1106 case = ( 1107 exp.case(self.func("TYPEOF", arg)) 1108 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1109 .else_( 1110 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1111 ) # anonymous to break length_sql recursion 1112 ) 1113 1114 return self.sql(case) 1115 1116 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1117 this = expression.this 1118 key = expression.args.get("key") 1119 key_sql = key.name if isinstance(key, exp.Expression) else "" 1120 value_sql = self.sql(expression, "value") 1121 1122 kv_sql = f"{key_sql} := {value_sql}" 1123 1124 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1125 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1126 if isinstance(this, exp.Struct) and not this.expressions: 1127 return self.func("STRUCT_PACK", kv_sql) 1128 1129 return self.func("STRUCT_INSERT", this, kv_sql) 1130 1131 def unnest_sql(self, expression: exp.Unnest) -> str: 1132 explode_array = expression.args.get("explode_array") 1133 if explode_array: 1134 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1135 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1136 expression.expressions.append( 1137 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1138 ) 1139 1140 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1141 alias = expression.args.get("alias") 1142 if isinstance(alias, exp.TableAlias): 1143 expression.set("alias", None) 1144 if alias.columns: 1145 alias = exp.TableAlias(this=seq_get(alias.columns, 0)) 1146 1147 unnest_sql = super().unnest_sql(expression) 1148 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1149 return self.sql(select) 1150 1151 return super().unnest_sql(expression) 1152 1153 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1154 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1155 # DuckDB should render IGNORE NULLS only for the general-purpose 1156 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1157 return super().ignorenulls_sql(expression) 1158 1159 if not isinstance(expression.this, exp.AnyValue): 1160 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 1161 1162 return self.sql(expression, "this") 1163 1164 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 1165 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1166 # DuckDB should render RESPECT NULLS only for the general-purpose 1167 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 1168 return super().respectnulls_sql(expression) 1169 1170 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 1171 return self.sql(expression, "this") 1172 1173 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1174 this = self.sql(expression, "this") 1175 null_text = self.sql(expression, "null") 1176 1177 if null_text: 1178 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1179 1180 return self.func("ARRAY_TO_STRING", this, expression.expression) 1181 1182 @unsupported_args("position", "occurrence") 1183 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1184 group = expression.args.get("group") 1185 params = expression.args.get("parameters") 1186 1187 # Do not render group if there is no following argument, 1188 # and it's the default value for this dialect 1189 if ( 1190 not params 1191 and group 1192 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1193 ): 1194 group = None 1195 return self.func( 1196 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1197 ) 1198 1199 @unsupported_args("culture") 1200 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1201 fmt = expression.args.get("format") 1202 if fmt and fmt.is_int: 1203 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1204 1205 self.unsupported("Only integer formats are supported by NumberToStr") 1206 return self.function_fallback_sql(expression) 1207 1208 def autoincrementcolumnconstraint_sql(self, _) -> str: 1209 self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB") 1210 return "" 1211 1212 def aliases_sql(self, expression: exp.Aliases) -> str: 1213 this = expression.this 1214 if isinstance(this, exp.Posexplode): 1215 return self.posexplode_sql(this) 1216 1217 return super().aliases_sql(expression) 1218 1219 def posexplode_sql(self, expression: exp.Posexplode) -> str: 1220 this = expression.this 1221 parent = expression.parent 1222 1223 # The default Spark aliases are "pos" and "col", unless specified otherwise 1224 pos, col = exp.to_identifier("pos"), exp.to_identifier("col") 1225 1226 if isinstance(parent, exp.Aliases): 1227 # Column case: SELECT POSEXPLODE(col) [AS (a, b)] 1228 pos, col = parent.expressions 1229 elif isinstance(parent, exp.Table): 1230 # Table case: SELECT * FROM POSEXPLODE(col) [AS (a, b)] 1231 alias = parent.args.get("alias") 1232 if alias: 1233 pos, col = alias.columns or [pos, col] 1234 alias.pop() 1235 1236 # Translate POSEXPLODE to UNNEST + GENERATE_SUBSCRIPTS 1237 # Note: In Spark pos is 0-indexed, but in DuckDB it's 1-indexed, so we subtract 1 from GENERATE_SUBSCRIPTS 1238 unnest_sql = self.sql(exp.Unnest(expressions=[this], alias=col)) 1239 gen_subscripts = self.sql( 1240 exp.Alias( 1241 this=exp.Anonymous( 1242 this="GENERATE_SUBSCRIPTS", expressions=[this, exp.Literal.number(1)] 1243 ) 1244 - exp.Literal.number(1), 1245 alias=pos, 1246 ) 1247 ) 1248 1249 posexplode_sql = self.format_args(gen_subscripts, unnest_sql) 1250 1251 if isinstance(parent, exp.From) or (parent and isinstance(parent.parent, exp.From)): 1252 # SELECT * FROM POSEXPLODE(col) -> SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(...), UNNEST(...)) 1253 return self.sql(exp.Subquery(this=exp.Select(expressions=[posexplode_sql]))) 1254 1255 return posexplode_sql 1256 1257 def addmonths_sql(self, expression: exp.AddMonths) -> str: 1258 this = expression.this 1259 1260 if not this.type: 1261 from sqlglot.optimizer.annotate_types import annotate_types 1262 1263 this = annotate_types(this, dialect=self.dialect) 1264 1265 if this.is_type(*exp.DataType.TEXT_TYPES): 1266 this = exp.Cast(this=this, to=exp.DataType(this=exp.DataType.Type.TIMESTAMP)) 1267 1268 func = self.func( 1269 "DATE_ADD", this, exp.Interval(this=expression.expression, unit=exp.var("MONTH")) 1270 ) 1271 1272 # DuckDB's DATE_ADD function returns TIMESTAMP/DATETIME by default, even when the input is DATE 1273 # To match for example Snowflake's ADD_MONTHS behavior (which preserves the input type) 1274 # We need to cast the result back to the original type when the input is DATE or TIMESTAMPTZ 1275 # Example: ADD_MONTHS('2023-01-31'::date, 1) should return DATE, not TIMESTAMP 1276 if this.is_type(exp.DataType.Type.DATE, exp.DataType.Type.TIMESTAMPTZ): 1277 return self.sql(exp.Cast(this=func, to=this.type)) 1278 1279 return self.sql(func)
Default NULL ordering method to use if not explicitly set.
Possible values: "nulls_are_small", "nulls_are_large", "nulls_are_last"
A NULL arg in CONCAT yields NULL by default, but in some dialects it yields an empty string.
Whether ORDER BY ALL is supported (expands to all the selected columns) as in DuckDB, Spark3/Databricks
Whether expressions such as x::INT[5] should be parsed as fixed-size array defs/casts e.g. in DuckDB. In dialects which don't support fixed size arrays such as Snowflake, this should be interpreted as a subscript/index operator.
Whether failing to parse a JSON path expression using the JSONPath dialect will log a warning.
Whether number literals can include underscores for better readability
Specifies the strategy according to which identifiers should be normalized.
308 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 309 if isinstance(path, exp.Literal): 310 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 311 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 312 # This check ensures we'll avoid trying to parse these as JSON paths, which can 313 # either result in a noisy warning or in an invalid representation of the path. 314 path_text = path.name 315 if path_text.startswith("/") or "[#" in path_text: 316 return path 317 318 return super().to_json_path(path)
320 class Tokenizer(tokens.Tokenizer): 321 BYTE_STRINGS = [("e'", "'"), ("E'", "'")] 322 HEREDOC_STRINGS = ["$"] 323 324 HEREDOC_TAG_IS_IDENTIFIER = True 325 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 326 327 KEYWORDS = { 328 **tokens.Tokenizer.KEYWORDS, 329 "//": TokenType.DIV, 330 "**": TokenType.DSTAR, 331 "^@": TokenType.CARET_AT, 332 "@>": TokenType.AT_GT, 333 "<@": TokenType.LT_AT, 334 "ATTACH": TokenType.ATTACH, 335 "BINARY": TokenType.VARBINARY, 336 "BITSTRING": TokenType.BIT, 337 "BPCHAR": TokenType.TEXT, 338 "CHAR": TokenType.TEXT, 339 "DATETIME": TokenType.TIMESTAMPNTZ, 340 "DETACH": TokenType.DETACH, 341 "EXCLUDE": TokenType.EXCEPT, 342 "LOGICAL": TokenType.BOOLEAN, 343 "ONLY": TokenType.ONLY, 344 "PIVOT_WIDER": TokenType.PIVOT, 345 "POSITIONAL": TokenType.POSITIONAL, 346 "RESET": TokenType.COMMAND, 347 "SIGNED": TokenType.INT, 348 "STRING": TokenType.TEXT, 349 "SUMMARIZE": TokenType.SUMMARIZE, 350 "TIMESTAMP": TokenType.TIMESTAMPNTZ, 351 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 352 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 353 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 354 "TIMESTAMP_US": TokenType.TIMESTAMP, 355 "UBIGINT": TokenType.UBIGINT, 356 "UINTEGER": TokenType.UINT, 357 "USMALLINT": TokenType.USMALLINT, 358 "UTINYINT": TokenType.UTINYINT, 359 "VARCHAR": TokenType.TEXT, 360 } 361 KEYWORDS.pop("/*+") 362 363 SINGLE_TOKENS = { 364 **tokens.Tokenizer.SINGLE_TOKENS, 365 "$": TokenType.PARAMETER, 366 } 367 368 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
370 class Parser(parser.Parser): 371 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = True 372 373 BITWISE = { 374 **parser.Parser.BITWISE, 375 TokenType.TILDA: exp.RegexpLike, 376 } 377 BITWISE.pop(TokenType.CARET) 378 379 RANGE_PARSERS = { 380 **parser.Parser.RANGE_PARSERS, 381 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 382 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 383 } 384 385 EXPONENT = { 386 **parser.Parser.EXPONENT, 387 TokenType.CARET: exp.Pow, 388 TokenType.DSTAR: exp.Pow, 389 } 390 391 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 392 393 SHOW_PARSERS = { 394 "TABLES": _show_parser("TABLES"), 395 "ALL TABLES": _show_parser("ALL TABLES"), 396 } 397 398 FUNCTIONS = { 399 **parser.Parser.FUNCTIONS, 400 "ANY_VALUE": lambda args: exp.IgnoreNulls(this=exp.AnyValue.from_arg_list(args)), 401 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 402 "ARRAY_SORT": exp.SortArray.from_arg_list, 403 "DATEDIFF": _build_date_diff, 404 "DATE_DIFF": _build_date_diff, 405 "DATE_TRUNC": date_trunc_to_time, 406 "DATETRUNC": date_trunc_to_time, 407 "DECODE": lambda args: exp.Decode( 408 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 409 ), 410 "EDITDIST3": exp.Levenshtein.from_arg_list, 411 "ENCODE": lambda args: exp.Encode( 412 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 413 ), 414 "EPOCH": exp.TimeToUnix.from_arg_list, 415 "EPOCH_MS": lambda args: exp.UnixToTime( 416 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 417 ), 418 "GENERATE_SERIES": _build_generate_series(), 419 "JSON": exp.ParseJSON.from_arg_list, 420 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 421 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 422 "LIST_CONTAINS": exp.ArrayContains.from_arg_list, 423 "LIST_HAS": exp.ArrayContains.from_arg_list, 424 "LIST_HAS_ANY": exp.ArrayOverlaps.from_arg_list, 425 "LIST_REVERSE_SORT": _build_sort_array_desc, 426 "LIST_SORT": exp.SortArray.from_arg_list, 427 "LIST_VALUE": lambda args: exp.Array(expressions=args), 428 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 429 "MAKE_TIMESTAMP": _build_make_timestamp, 430 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 431 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 432 "RANGE": _build_generate_series(end_exclusive=True), 433 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 434 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 435 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 436 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 437 this=seq_get(args, 0), 438 expression=seq_get(args, 1), 439 replacement=seq_get(args, 2), 440 modifiers=seq_get(args, 3), 441 ), 442 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 443 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 444 "STRING_SPLIT": exp.Split.from_arg_list, 445 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 446 "STRING_TO_ARRAY": exp.Split.from_arg_list, 447 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 448 "STRUCT_PACK": exp.Struct.from_arg_list, 449 "STR_SPLIT": exp.Split.from_arg_list, 450 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 451 "TIME_BUCKET": exp.DateBin.from_arg_list, 452 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 453 "UNNEST": exp.Explode.from_arg_list, 454 "XOR": binary_from_function(exp.BitwiseXor), 455 } 456 457 FUNCTIONS.pop("DATE_SUB") 458 FUNCTIONS.pop("GLOB") 459 460 FUNCTION_PARSERS = { 461 **parser.Parser.FUNCTION_PARSERS, 462 **dict.fromkeys( 463 ("GROUP_CONCAT", "LISTAGG", "STRINGAGG"), lambda self: self._parse_string_agg() 464 ), 465 } 466 FUNCTION_PARSERS.pop("DECODE") 467 468 NO_PAREN_FUNCTION_PARSERS = { 469 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 470 "MAP": lambda self: self._parse_map(), 471 "@": lambda self: exp.Abs(this=self._parse_bitwise()), 472 } 473 474 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 475 TokenType.SEMI, 476 TokenType.ANTI, 477 } 478 479 PLACEHOLDER_PARSERS = { 480 **parser.Parser.PLACEHOLDER_PARSERS, 481 TokenType.PARAMETER: lambda self: ( 482 self.expression(exp.Placeholder, this=self._prev.text) 483 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 484 else None 485 ), 486 } 487 488 TYPE_CONVERTERS = { 489 # https://duckdb.org/docs/sql/data_types/numeric 490 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 491 # https://duckdb.org/docs/sql/data_types/text 492 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 493 } 494 495 STATEMENT_PARSERS = { 496 **parser.Parser.STATEMENT_PARSERS, 497 TokenType.ATTACH: lambda self: self._parse_attach_detach(), 498 TokenType.DETACH: lambda self: self._parse_attach_detach(is_attach=False), 499 TokenType.SHOW: lambda self: self._parse_show(), 500 } 501 502 SET_PARSERS = { 503 **parser.Parser.SET_PARSERS, 504 "VARIABLE": lambda self: self._parse_set_item_assignment("VARIABLE"), 505 } 506 507 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 508 index = self._index 509 if not self._match_text_seq("LAMBDA"): 510 return super()._parse_lambda(alias=alias) 511 512 expressions = self._parse_csv(self._parse_lambda_arg) 513 if not self._match(TokenType.COLON): 514 self._retreat(index) 515 return None 516 517 this = self._replace_lambda(self._parse_assignment(), expressions) 518 return self.expression(exp.Lambda, this=this, expressions=expressions, colon=True) 519 520 def _parse_expression(self) -> t.Optional[exp.Expression]: 521 # DuckDB supports prefix aliases, e.g. foo: 1 522 if self._next and self._next.token_type == TokenType.COLON: 523 alias = self._parse_id_var(tokens=self.ALIAS_TOKENS) 524 self._match(TokenType.COLON) 525 comments = self._prev_comments or [] 526 527 this = self._parse_assignment() 528 if isinstance(this, exp.Expression): 529 # Moves the comment next to the alias in `alias: expr /* comment */` 530 comments += this.pop_comments() or [] 531 532 return self.expression(exp.Alias, comments=comments, this=this, alias=alias) 533 534 return super()._parse_expression() 535 536 def _parse_table( 537 self, 538 schema: bool = False, 539 joins: bool = False, 540 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 541 parse_bracket: bool = False, 542 is_db_reference: bool = False, 543 parse_partition: bool = False, 544 consume_pipe: bool = False, 545 ) -> t.Optional[exp.Expression]: 546 # DuckDB supports prefix aliases, e.g. FROM foo: bar 547 if self._next and self._next.token_type == TokenType.COLON: 548 alias = self._parse_table_alias( 549 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 550 ) 551 self._match(TokenType.COLON) 552 comments = self._prev_comments or [] 553 else: 554 alias = None 555 comments = [] 556 557 table = super()._parse_table( 558 schema=schema, 559 joins=joins, 560 alias_tokens=alias_tokens, 561 parse_bracket=parse_bracket, 562 is_db_reference=is_db_reference, 563 parse_partition=parse_partition, 564 ) 565 if isinstance(table, exp.Expression) and isinstance(alias, exp.TableAlias): 566 # Moves the comment next to the alias in `alias: table /* comment */` 567 comments += table.pop_comments() or [] 568 alias.comments = alias.pop_comments() + comments 569 table.set("alias", alias) 570 571 return table 572 573 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 574 # https://duckdb.org/docs/sql/samples.html 575 sample = super()._parse_table_sample(as_modifier=as_modifier) 576 if sample and not sample.args.get("method"): 577 if sample.args.get("size"): 578 sample.set("method", exp.var("RESERVOIR")) 579 else: 580 sample.set("method", exp.var("SYSTEM")) 581 582 return sample 583 584 def _parse_bracket( 585 self, this: t.Optional[exp.Expression] = None 586 ) -> t.Optional[exp.Expression]: 587 bracket = super()._parse_bracket(this) 588 589 if self.dialect.version < Version("1.2.0") and isinstance(bracket, exp.Bracket): 590 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 591 bracket.set("returns_list_for_maps", True) 592 593 return bracket 594 595 def _parse_map(self) -> exp.ToMap | exp.Map: 596 if self._match(TokenType.L_BRACE, advance=False): 597 return self.expression(exp.ToMap, this=self._parse_bracket()) 598 599 args = self._parse_wrapped_csv(self._parse_assignment) 600 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 601 602 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 603 return self._parse_field_def() 604 605 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 606 if len(aggregations) == 1: 607 return super()._pivot_column_names(aggregations) 608 return pivot_column_names(aggregations, dialect="duckdb") 609 610 def _parse_attach_detach(self, is_attach=True) -> exp.Attach | exp.Detach: 611 def _parse_attach_option() -> exp.AttachOption: 612 return self.expression( 613 exp.AttachOption, 614 this=self._parse_var(any_token=True), 615 expression=self._parse_field(any_token=True), 616 ) 617 618 self._match(TokenType.DATABASE) 619 exists = self._parse_exists(not_=is_attach) 620 this = self._parse_alias(self._parse_primary_or_var(), explicit=True) 621 622 if self._match(TokenType.L_PAREN, advance=False): 623 expressions = self._parse_wrapped_csv(_parse_attach_option) 624 else: 625 expressions = None 626 627 return ( 628 self.expression(exp.Attach, this=this, exists=exists, expressions=expressions) 629 if is_attach 630 else self.expression(exp.Detach, this=this, exists=exists) 631 ) 632 633 def _parse_show_duckdb(self, this: str) -> exp.Show: 634 return self.expression(exp.Show, this=this) 635 636 def _parse_primary(self) -> t.Optional[exp.Expression]: 637 if self._match_pair(TokenType.HASH, TokenType.NUMBER): 638 return exp.PositionalColumn(this=exp.Literal.number(self._prev.text)) 639 640 return super()._parse_primary()
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- COLON_PLACEHOLDER_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- TERM
- FACTOR
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- CAST_COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- QUERY_MODIFIER_TOKENS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- ALTER_RENAME_REQUIRES_COLUMN
- JOINS_HAVE_EQUAL_PRECEDENCE
- ZONE_AWARE_TIMESTAMP_CONSTRUCTOR
- JSON_EXTRACT_REQUIRES_JSON_EXPRESSION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- build_cast
- errors
- sql
642 class Generator(generator.Generator): 643 PARAMETER_TOKEN = "$" 644 NAMED_PLACEHOLDER_TOKEN = "$" 645 JOIN_HINTS = False 646 TABLE_HINTS = False 647 QUERY_HINTS = False 648 LIMIT_FETCH = "LIMIT" 649 STRUCT_DELIMITER = ("(", ")") 650 RENAME_TABLE_WITH_DB = False 651 NVL2_SUPPORTED = False 652 SEMI_ANTI_JOIN_WITH_SIDE = False 653 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 654 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 655 LAST_DAY_SUPPORTS_DATE_PART = False 656 JSON_KEY_VALUE_PAIR_SEP = "," 657 IGNORE_NULLS_IN_FUNC = True 658 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 659 SUPPORTS_CREATE_TABLE_LIKE = False 660 MULTI_ARG_DISTINCT = False 661 CAN_IMPLEMENT_ARRAY_ANY = True 662 SUPPORTS_TO_NUMBER = False 663 SUPPORTS_WINDOW_EXCLUDE = True 664 COPY_HAS_INTO_KEYWORD = False 665 STAR_EXCEPT = "EXCLUDE" 666 PAD_FILL_PATTERN_IS_REQUIRED = True 667 ARRAY_CONCAT_IS_VAR_LEN = False 668 ARRAY_SIZE_DIM_REQUIRED = False 669 NORMALIZE_EXTRACT_DATE_PARTS = True 670 SUPPORTS_LIKE_QUANTIFIERS = False 671 672 TRANSFORMS = { 673 **generator.Generator.TRANSFORMS, 674 exp.ApproxDistinct: approx_count_distinct_sql, 675 exp.Array: inline_array_unless_query, 676 exp.ArrayFilter: rename_func("LIST_FILTER"), 677 exp.ArrayRemove: remove_from_array_using_filter, 678 exp.ArraySort: _array_sort_sql, 679 exp.ArraySum: rename_func("LIST_SUM"), 680 exp.BitwiseXor: rename_func("XOR"), 681 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 682 exp.CurrentDate: lambda *_: "CURRENT_DATE", 683 exp.CurrentTime: lambda *_: "CURRENT_TIME", 684 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 685 exp.DayOfMonth: rename_func("DAYOFMONTH"), 686 exp.DayOfWeek: rename_func("DAYOFWEEK"), 687 exp.DayOfWeekIso: rename_func("ISODOW"), 688 exp.DayOfYear: rename_func("DAYOFYEAR"), 689 exp.DataType: _datatype_sql, 690 exp.Date: _date_sql, 691 exp.DateAdd: _date_delta_sql, 692 exp.DateFromParts: rename_func("MAKE_DATE"), 693 exp.DateSub: _date_delta_sql, 694 exp.DateDiff: _date_diff_sql, 695 exp.DateStrToDate: datestrtodate_sql, 696 exp.Datetime: no_datetime_sql, 697 exp.DatetimeSub: _date_delta_sql, 698 exp.DatetimeAdd: _date_delta_sql, 699 exp.DateToDi: lambda self, 700 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 701 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 702 exp.DiToDate: lambda self, 703 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 704 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 705 exp.GenerateDateArray: _generate_datetime_array_sql, 706 exp.GenerateTimestampArray: _generate_datetime_array_sql, 707 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, within_group=False), 708 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 709 exp.Explode: rename_func("UNNEST"), 710 exp.IntDiv: lambda self, e: self.binary(e, "//"), 711 exp.IsInf: rename_func("ISINF"), 712 exp.IsNan: rename_func("ISNAN"), 713 exp.JSONBExists: rename_func("JSON_EXISTS"), 714 exp.JSONExtract: _arrow_json_extract_sql, 715 exp.JSONExtractArray: _json_extract_value_array_sql, 716 exp.JSONExtractScalar: _arrow_json_extract_sql, 717 exp.JSONFormat: _json_format_sql, 718 exp.JSONValueArray: _json_extract_value_array_sql, 719 exp.Lateral: explode_to_unnest_sql, 720 exp.LogicalOr: rename_func("BOOL_OR"), 721 exp.LogicalAnd: rename_func("BOOL_AND"), 722 exp.MakeInterval: lambda self, e: no_make_interval_sql(self, e, sep=" "), 723 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 724 exp.MonthsBetween: lambda self, e: self.func( 725 "DATEDIFF", 726 "'month'", 727 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 728 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 729 ), 730 exp.PercentileCont: rename_func("QUANTILE_CONT"), 731 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 732 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 733 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 734 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 735 exp.RegexpReplace: lambda self, e: self.func( 736 "REGEXP_REPLACE", 737 e.this, 738 e.expression, 739 e.args.get("replacement"), 740 e.args.get("modifiers"), 741 ), 742 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 743 exp.RegexpILike: lambda self, e: self.func( 744 "REGEXP_MATCHES", e.this, e.expression, exp.Literal.string("i") 745 ), 746 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 747 exp.Return: lambda self, e: self.sql(e, "this"), 748 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 749 exp.Rand: rename_func("RANDOM"), 750 exp.SHA: rename_func("SHA1"), 751 exp.SHA2: sha256_sql, 752 exp.Split: rename_func("STR_SPLIT"), 753 exp.SortArray: _sort_array_sql, 754 exp.StrPosition: strposition_sql, 755 exp.StrToUnix: lambda self, e: self.func( 756 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 757 ), 758 exp.Struct: _struct_sql, 759 exp.Transform: rename_func("LIST_TRANSFORM"), 760 exp.TimeAdd: _date_delta_sql, 761 exp.Time: no_time_sql, 762 exp.TimeDiff: _timediff_sql, 763 exp.Timestamp: no_timestamp_sql, 764 exp.TimestampDiff: lambda self, e: self.func( 765 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 766 ), 767 exp.TimestampTrunc: timestamptrunc_sql(), 768 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 769 exp.TimeStrToTime: timestrtotime_sql, 770 exp.TimeStrToUnix: lambda self, e: self.func( 771 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 772 ), 773 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 774 exp.TimeToUnix: rename_func("EPOCH"), 775 exp.TsOrDiToDi: lambda self, 776 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 777 exp.TsOrDsAdd: _date_delta_sql, 778 exp.TsOrDsDiff: lambda self, e: self.func( 779 "DATE_DIFF", 780 f"'{e.args.get('unit') or 'DAY'}'", 781 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 782 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 783 ), 784 exp.UnixToStr: lambda self, e: self.func( 785 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 786 ), 787 exp.DatetimeTrunc: lambda self, e: self.func( 788 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 789 ), 790 exp.UnixToTime: _unix_to_time_sql, 791 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 792 exp.VariancePop: rename_func("VAR_POP"), 793 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 794 exp.Xor: bool_xor_sql, 795 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 796 rename_func("LEVENSHTEIN") 797 ), 798 exp.JSONObjectAgg: rename_func("JSON_GROUP_OBJECT"), 799 exp.JSONBObjectAgg: rename_func("JSON_GROUP_OBJECT"), 800 exp.DateBin: rename_func("TIME_BUCKET"), 801 } 802 803 SUPPORTED_JSON_PATH_PARTS = { 804 exp.JSONPathKey, 805 exp.JSONPathRoot, 806 exp.JSONPathSubscript, 807 exp.JSONPathWildcard, 808 } 809 810 TYPE_MAPPING = { 811 **generator.Generator.TYPE_MAPPING, 812 exp.DataType.Type.BINARY: "BLOB", 813 exp.DataType.Type.BPCHAR: "TEXT", 814 exp.DataType.Type.CHAR: "TEXT", 815 exp.DataType.Type.DATETIME: "TIMESTAMP", 816 exp.DataType.Type.FLOAT: "REAL", 817 exp.DataType.Type.JSONB: "JSON", 818 exp.DataType.Type.NCHAR: "TEXT", 819 exp.DataType.Type.NVARCHAR: "TEXT", 820 exp.DataType.Type.UINT: "UINTEGER", 821 exp.DataType.Type.VARBINARY: "BLOB", 822 exp.DataType.Type.ROWVERSION: "BLOB", 823 exp.DataType.Type.VARCHAR: "TEXT", 824 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 825 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 826 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 827 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 828 } 829 830 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 831 RESERVED_KEYWORDS = { 832 "array", 833 "analyse", 834 "union", 835 "all", 836 "when", 837 "in_p", 838 "default", 839 "create_p", 840 "window", 841 "asymmetric", 842 "to", 843 "else", 844 "localtime", 845 "from", 846 "end_p", 847 "select", 848 "current_date", 849 "foreign", 850 "with", 851 "grant", 852 "session_user", 853 "or", 854 "except", 855 "references", 856 "fetch", 857 "limit", 858 "group_p", 859 "leading", 860 "into", 861 "collate", 862 "offset", 863 "do", 864 "then", 865 "localtimestamp", 866 "check_p", 867 "lateral_p", 868 "current_role", 869 "where", 870 "asc_p", 871 "placing", 872 "desc_p", 873 "user", 874 "unique", 875 "initially", 876 "column", 877 "both", 878 "some", 879 "as", 880 "any", 881 "only", 882 "deferrable", 883 "null_p", 884 "current_time", 885 "true_p", 886 "table", 887 "case", 888 "trailing", 889 "variadic", 890 "for", 891 "on", 892 "distinct", 893 "false_p", 894 "not", 895 "constraint", 896 "current_timestamp", 897 "returning", 898 "primary", 899 "intersect", 900 "having", 901 "analyze", 902 "current_user", 903 "and", 904 "cast", 905 "symmetric", 906 "using", 907 "order", 908 "current_catalog", 909 } 910 911 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 912 913 # DuckDB doesn't generally support CREATE TABLE .. properties 914 # https://duckdb.org/docs/sql/statements/create_table.html 915 PROPERTIES_LOCATION = { 916 prop: exp.Properties.Location.UNSUPPORTED 917 for prop in generator.Generator.PROPERTIES_LOCATION 918 } 919 920 # There are a few exceptions (e.g. temporary tables) which are supported or 921 # can be transpiled to DuckDB, so we explicitly override them accordingly 922 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 923 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 924 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 925 PROPERTIES_LOCATION[exp.SequenceProperties] = exp.Properties.Location.POST_EXPRESSION 926 927 IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS = ( 928 exp.FirstValue, 929 exp.Lag, 930 exp.LastValue, 931 exp.Lead, 932 exp.NthValue, 933 ) 934 935 def lambda_sql( 936 self, expression: exp.Lambda, arrow_sep: str = "->", wrap: bool = True 937 ) -> str: 938 if expression.args.get("colon"): 939 prefix = "LAMBDA " 940 arrow_sep = ":" 941 wrap = False 942 else: 943 prefix = "" 944 945 lambda_sql = super().lambda_sql(expression, arrow_sep=arrow_sep, wrap=wrap) 946 return f"{prefix}{lambda_sql}" 947 948 def show_sql(self, expression: exp.Show) -> str: 949 return f"SHOW {expression.name}" 950 951 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 952 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 953 954 def strtotime_sql(self, expression: exp.StrToTime) -> str: 955 if expression.args.get("safe"): 956 formatted_time = self.format_time(expression) 957 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 958 return str_to_time_sql(self, expression) 959 960 def strtodate_sql(self, expression: exp.StrToDate) -> str: 961 if expression.args.get("safe"): 962 formatted_time = self.format_time(expression) 963 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 964 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 965 966 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 967 arg = expression.this 968 if expression.args.get("safe"): 969 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 970 return self.func("JSON", arg) 971 972 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 973 nano = expression.args.get("nano") 974 if nano is not None: 975 expression.set( 976 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 977 ) 978 979 return rename_func("MAKE_TIME")(self, expression) 980 981 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 982 sec = expression.args["sec"] 983 984 milli = expression.args.get("milli") 985 if milli is not None: 986 sec += milli.pop() / exp.Literal.number(1000.0) 987 988 nano = expression.args.get("nano") 989 if nano is not None: 990 sec += nano.pop() / exp.Literal.number(1000000000.0) 991 992 if milli or nano: 993 expression.set("sec", sec) 994 995 return rename_func("MAKE_TIMESTAMP")(self, expression) 996 997 def tablesample_sql( 998 self, 999 expression: exp.TableSample, 1000 tablesample_keyword: t.Optional[str] = None, 1001 ) -> str: 1002 if not isinstance(expression.parent, exp.Select): 1003 # This sample clause only applies to a single source, not the entire resulting relation 1004 tablesample_keyword = "TABLESAMPLE" 1005 1006 if expression.args.get("size"): 1007 method = expression.args.get("method") 1008 if method and method.name.upper() != "RESERVOIR": 1009 self.unsupported( 1010 f"Sampling method {method} is not supported with a discrete sample count, " 1011 "defaulting to reservoir sampling" 1012 ) 1013 expression.set("method", exp.var("RESERVOIR")) 1014 1015 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 1016 1017 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 1018 if isinstance(expression.parent, exp.UserDefinedFunction): 1019 return self.sql(expression, "this") 1020 return super().columndef_sql(expression, sep) 1021 1022 def join_sql(self, expression: exp.Join) -> str: 1023 if ( 1024 expression.side == "LEFT" 1025 and not expression.args.get("on") 1026 and isinstance(expression.this, exp.Unnest) 1027 ): 1028 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 1029 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 1030 return super().join_sql(expression.on(exp.true())) 1031 1032 return super().join_sql(expression) 1033 1034 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 1035 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 1036 if expression.args.get("is_end_exclusive"): 1037 return rename_func("RANGE")(self, expression) 1038 1039 return self.function_fallback_sql(expression) 1040 1041 def countif_sql(self, expression: exp.CountIf) -> str: 1042 if self.dialect.version >= Version("1.2"): 1043 return self.function_fallback_sql(expression) 1044 1045 # https://github.com/tobymao/sqlglot/pull/4749 1046 return count_if_to_sum(self, expression) 1047 1048 def bracket_sql(self, expression: exp.Bracket) -> str: 1049 if self.dialect.version >= Version("1.2"): 1050 return super().bracket_sql(expression) 1051 1052 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1053 this = expression.this 1054 if isinstance(this, exp.Array): 1055 this.replace(exp.paren(this)) 1056 1057 bracket = super().bracket_sql(expression) 1058 1059 if not expression.args.get("returns_list_for_maps"): 1060 if not this.type: 1061 from sqlglot.optimizer.annotate_types import annotate_types 1062 1063 this = annotate_types(this, dialect=self.dialect) 1064 1065 if this.is_type(exp.DataType.Type.MAP): 1066 bracket = f"({bracket})[1]" 1067 1068 return bracket 1069 1070 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1071 expression_sql = self.sql(expression, "expression") 1072 1073 func = expression.this 1074 if isinstance(func, exp.PERCENTILES): 1075 # Make the order key the first arg and slide the fraction to the right 1076 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1077 order_col = expression.find(exp.Ordered) 1078 if order_col: 1079 func.set("expression", func.this) 1080 func.set("this", order_col.this) 1081 1082 this = self.sql(expression, "this").rstrip(")") 1083 1084 return f"{this}{expression_sql})" 1085 1086 def length_sql(self, expression: exp.Length) -> str: 1087 arg = expression.this 1088 1089 # Dialects like BQ and Snowflake also accept binary values as args, so 1090 # DDB will attempt to infer the type or resort to case/when resolution 1091 if not expression.args.get("binary") or arg.is_string: 1092 return self.func("LENGTH", arg) 1093 1094 if not arg.type: 1095 from sqlglot.optimizer.annotate_types import annotate_types 1096 1097 arg = annotate_types(arg, dialect=self.dialect) 1098 1099 if arg.is_type(*exp.DataType.TEXT_TYPES): 1100 return self.func("LENGTH", arg) 1101 1102 # We need these casts to make duckdb's static type checker happy 1103 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1104 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1105 1106 case = ( 1107 exp.case(self.func("TYPEOF", arg)) 1108 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1109 .else_( 1110 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1111 ) # anonymous to break length_sql recursion 1112 ) 1113 1114 return self.sql(case) 1115 1116 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1117 this = expression.this 1118 key = expression.args.get("key") 1119 key_sql = key.name if isinstance(key, exp.Expression) else "" 1120 value_sql = self.sql(expression, "value") 1121 1122 kv_sql = f"{key_sql} := {value_sql}" 1123 1124 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1125 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1126 if isinstance(this, exp.Struct) and not this.expressions: 1127 return self.func("STRUCT_PACK", kv_sql) 1128 1129 return self.func("STRUCT_INSERT", this, kv_sql) 1130 1131 def unnest_sql(self, expression: exp.Unnest) -> str: 1132 explode_array = expression.args.get("explode_array") 1133 if explode_array: 1134 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1135 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1136 expression.expressions.append( 1137 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1138 ) 1139 1140 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1141 alias = expression.args.get("alias") 1142 if isinstance(alias, exp.TableAlias): 1143 expression.set("alias", None) 1144 if alias.columns: 1145 alias = exp.TableAlias(this=seq_get(alias.columns, 0)) 1146 1147 unnest_sql = super().unnest_sql(expression) 1148 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1149 return self.sql(select) 1150 1151 return super().unnest_sql(expression) 1152 1153 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1154 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1155 # DuckDB should render IGNORE NULLS only for the general-purpose 1156 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1157 return super().ignorenulls_sql(expression) 1158 1159 if not isinstance(expression.this, exp.AnyValue): 1160 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 1161 1162 return self.sql(expression, "this") 1163 1164 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 1165 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1166 # DuckDB should render RESPECT NULLS only for the general-purpose 1167 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 1168 return super().respectnulls_sql(expression) 1169 1170 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 1171 return self.sql(expression, "this") 1172 1173 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1174 this = self.sql(expression, "this") 1175 null_text = self.sql(expression, "null") 1176 1177 if null_text: 1178 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1179 1180 return self.func("ARRAY_TO_STRING", this, expression.expression) 1181 1182 @unsupported_args("position", "occurrence") 1183 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1184 group = expression.args.get("group") 1185 params = expression.args.get("parameters") 1186 1187 # Do not render group if there is no following argument, 1188 # and it's the default value for this dialect 1189 if ( 1190 not params 1191 and group 1192 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1193 ): 1194 group = None 1195 return self.func( 1196 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1197 ) 1198 1199 @unsupported_args("culture") 1200 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1201 fmt = expression.args.get("format") 1202 if fmt and fmt.is_int: 1203 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1204 1205 self.unsupported("Only integer formats are supported by NumberToStr") 1206 return self.function_fallback_sql(expression) 1207 1208 def autoincrementcolumnconstraint_sql(self, _) -> str: 1209 self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB") 1210 return "" 1211 1212 def aliases_sql(self, expression: exp.Aliases) -> str: 1213 this = expression.this 1214 if isinstance(this, exp.Posexplode): 1215 return self.posexplode_sql(this) 1216 1217 return super().aliases_sql(expression) 1218 1219 def posexplode_sql(self, expression: exp.Posexplode) -> str: 1220 this = expression.this 1221 parent = expression.parent 1222 1223 # The default Spark aliases are "pos" and "col", unless specified otherwise 1224 pos, col = exp.to_identifier("pos"), exp.to_identifier("col") 1225 1226 if isinstance(parent, exp.Aliases): 1227 # Column case: SELECT POSEXPLODE(col) [AS (a, b)] 1228 pos, col = parent.expressions 1229 elif isinstance(parent, exp.Table): 1230 # Table case: SELECT * FROM POSEXPLODE(col) [AS (a, b)] 1231 alias = parent.args.get("alias") 1232 if alias: 1233 pos, col = alias.columns or [pos, col] 1234 alias.pop() 1235 1236 # Translate POSEXPLODE to UNNEST + GENERATE_SUBSCRIPTS 1237 # Note: In Spark pos is 0-indexed, but in DuckDB it's 1-indexed, so we subtract 1 from GENERATE_SUBSCRIPTS 1238 unnest_sql = self.sql(exp.Unnest(expressions=[this], alias=col)) 1239 gen_subscripts = self.sql( 1240 exp.Alias( 1241 this=exp.Anonymous( 1242 this="GENERATE_SUBSCRIPTS", expressions=[this, exp.Literal.number(1)] 1243 ) 1244 - exp.Literal.number(1), 1245 alias=pos, 1246 ) 1247 ) 1248 1249 posexplode_sql = self.format_args(gen_subscripts, unnest_sql) 1250 1251 if isinstance(parent, exp.From) or (parent and isinstance(parent.parent, exp.From)): 1252 # SELECT * FROM POSEXPLODE(col) -> SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(...), UNNEST(...)) 1253 return self.sql(exp.Subquery(this=exp.Select(expressions=[posexplode_sql]))) 1254 1255 return posexplode_sql 1256 1257 def addmonths_sql(self, expression: exp.AddMonths) -> str: 1258 this = expression.this 1259 1260 if not this.type: 1261 from sqlglot.optimizer.annotate_types import annotate_types 1262 1263 this = annotate_types(this, dialect=self.dialect) 1264 1265 if this.is_type(*exp.DataType.TEXT_TYPES): 1266 this = exp.Cast(this=this, to=exp.DataType(this=exp.DataType.Type.TIMESTAMP)) 1267 1268 func = self.func( 1269 "DATE_ADD", this, exp.Interval(this=expression.expression, unit=exp.var("MONTH")) 1270 ) 1271 1272 # DuckDB's DATE_ADD function returns TIMESTAMP/DATETIME by default, even when the input is DATE 1273 # To match for example Snowflake's ADD_MONTHS behavior (which preserves the input type) 1274 # We need to cast the result back to the original type when the input is DATE or TIMESTAMPTZ 1275 # Example: ADD_MONTHS('2023-01-31'::date, 1) should return DATE, not TIMESTAMP 1276 if this.is_type(exp.DataType.Type.DATE, exp.DataType.Type.TIMESTAMPTZ): 1277 return self.sql(exp.Cast(this=func, to=this.type)) 1278 1279 return self.sql(func)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHEREclause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
935 def lambda_sql( 936 self, expression: exp.Lambda, arrow_sep: str = "->", wrap: bool = True 937 ) -> str: 938 if expression.args.get("colon"): 939 prefix = "LAMBDA " 940 arrow_sep = ":" 941 wrap = False 942 else: 943 prefix = "" 944 945 lambda_sql = super().lambda_sql(expression, arrow_sep=arrow_sep, wrap=wrap) 946 return f"{prefix}{lambda_sql}"
960 def strtodate_sql(self, expression: exp.StrToDate) -> str: 961 if expression.args.get("safe"): 962 formatted_time = self.format_time(expression) 963 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 964 return f"CAST({str_to_time_sql(self, expression)} AS DATE)"
972 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 973 nano = expression.args.get("nano") 974 if nano is not None: 975 expression.set( 976 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 977 ) 978 979 return rename_func("MAKE_TIME")(self, expression)
981 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 982 sec = expression.args["sec"] 983 984 milli = expression.args.get("milli") 985 if milli is not None: 986 sec += milli.pop() / exp.Literal.number(1000.0) 987 988 nano = expression.args.get("nano") 989 if nano is not None: 990 sec += nano.pop() / exp.Literal.number(1000000000.0) 991 992 if milli or nano: 993 expression.set("sec", sec) 994 995 return rename_func("MAKE_TIMESTAMP")(self, expression)
997 def tablesample_sql( 998 self, 999 expression: exp.TableSample, 1000 tablesample_keyword: t.Optional[str] = None, 1001 ) -> str: 1002 if not isinstance(expression.parent, exp.Select): 1003 # This sample clause only applies to a single source, not the entire resulting relation 1004 tablesample_keyword = "TABLESAMPLE" 1005 1006 if expression.args.get("size"): 1007 method = expression.args.get("method") 1008 if method and method.name.upper() != "RESERVOIR": 1009 self.unsupported( 1010 f"Sampling method {method} is not supported with a discrete sample count, " 1011 "defaulting to reservoir sampling" 1012 ) 1013 expression.set("method", exp.var("RESERVOIR")) 1014 1015 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword)
1022 def join_sql(self, expression: exp.Join) -> str: 1023 if ( 1024 expression.side == "LEFT" 1025 and not expression.args.get("on") 1026 and isinstance(expression.this, exp.Unnest) 1027 ): 1028 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 1029 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 1030 return super().join_sql(expression.on(exp.true())) 1031 1032 return super().join_sql(expression)
1048 def bracket_sql(self, expression: exp.Bracket) -> str: 1049 if self.dialect.version >= Version("1.2"): 1050 return super().bracket_sql(expression) 1051 1052 # https://duckdb.org/2025/02/05/announcing-duckdb-120.html#breaking-changes 1053 this = expression.this 1054 if isinstance(this, exp.Array): 1055 this.replace(exp.paren(this)) 1056 1057 bracket = super().bracket_sql(expression) 1058 1059 if not expression.args.get("returns_list_for_maps"): 1060 if not this.type: 1061 from sqlglot.optimizer.annotate_types import annotate_types 1062 1063 this = annotate_types(this, dialect=self.dialect) 1064 1065 if this.is_type(exp.DataType.Type.MAP): 1066 bracket = f"({bracket})[1]" 1067 1068 return bracket
1070 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 1071 expression_sql = self.sql(expression, "expression") 1072 1073 func = expression.this 1074 if isinstance(func, exp.PERCENTILES): 1075 # Make the order key the first arg and slide the fraction to the right 1076 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 1077 order_col = expression.find(exp.Ordered) 1078 if order_col: 1079 func.set("expression", func.this) 1080 func.set("this", order_col.this) 1081 1082 this = self.sql(expression, "this").rstrip(")") 1083 1084 return f"{this}{expression_sql})"
1086 def length_sql(self, expression: exp.Length) -> str: 1087 arg = expression.this 1088 1089 # Dialects like BQ and Snowflake also accept binary values as args, so 1090 # DDB will attempt to infer the type or resort to case/when resolution 1091 if not expression.args.get("binary") or arg.is_string: 1092 return self.func("LENGTH", arg) 1093 1094 if not arg.type: 1095 from sqlglot.optimizer.annotate_types import annotate_types 1096 1097 arg = annotate_types(arg, dialect=self.dialect) 1098 1099 if arg.is_type(*exp.DataType.TEXT_TYPES): 1100 return self.func("LENGTH", arg) 1101 1102 # We need these casts to make duckdb's static type checker happy 1103 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 1104 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 1105 1106 case = ( 1107 exp.case(self.func("TYPEOF", arg)) 1108 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 1109 .else_( 1110 exp.Anonymous(this="LENGTH", expressions=[varchar]) 1111 ) # anonymous to break length_sql recursion 1112 ) 1113 1114 return self.sql(case)
1116 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 1117 this = expression.this 1118 key = expression.args.get("key") 1119 key_sql = key.name if isinstance(key, exp.Expression) else "" 1120 value_sql = self.sql(expression, "value") 1121 1122 kv_sql = f"{key_sql} := {value_sql}" 1123 1124 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 1125 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 1126 if isinstance(this, exp.Struct) and not this.expressions: 1127 return self.func("STRUCT_PACK", kv_sql) 1128 1129 return self.func("STRUCT_INSERT", this, kv_sql)
1131 def unnest_sql(self, expression: exp.Unnest) -> str: 1132 explode_array = expression.args.get("explode_array") 1133 if explode_array: 1134 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 1135 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 1136 expression.expressions.append( 1137 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 1138 ) 1139 1140 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 1141 alias = expression.args.get("alias") 1142 if isinstance(alias, exp.TableAlias): 1143 expression.set("alias", None) 1144 if alias.columns: 1145 alias = exp.TableAlias(this=seq_get(alias.columns, 0)) 1146 1147 unnest_sql = super().unnest_sql(expression) 1148 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 1149 return self.sql(select) 1150 1151 return super().unnest_sql(expression)
1153 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 1154 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1155 # DuckDB should render IGNORE NULLS only for the general-purpose 1156 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 1157 return super().ignorenulls_sql(expression) 1158 1159 if not isinstance(expression.this, exp.AnyValue): 1160 self.unsupported("IGNORE NULLS is not supported for non-window functions.") 1161 1162 return self.sql(expression, "this")
1164 def respectnulls_sql(self, expression: exp.RespectNulls) -> str: 1165 if isinstance(expression.this, self.IGNORE_RESPECT_NULLS_WINDOW_FUNCTIONS): 1166 # DuckDB should render RESPECT NULLS only for the general-purpose 1167 # window functions that accept it e.g. FIRST_VALUE(... RESPECT NULLS) OVER (...) 1168 return super().respectnulls_sql(expression) 1169 1170 self.unsupported("RESPECT NULLS is not supported for non-window functions.") 1171 return self.sql(expression, "this")
1173 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 1174 this = self.sql(expression, "this") 1175 null_text = self.sql(expression, "null") 1176 1177 if null_text: 1178 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 1179 1180 return self.func("ARRAY_TO_STRING", this, expression.expression)
1182 @unsupported_args("position", "occurrence") 1183 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 1184 group = expression.args.get("group") 1185 params = expression.args.get("parameters") 1186 1187 # Do not render group if there is no following argument, 1188 # and it's the default value for this dialect 1189 if ( 1190 not params 1191 and group 1192 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 1193 ): 1194 group = None 1195 return self.func( 1196 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 1197 )
1199 @unsupported_args("culture") 1200 def numbertostr_sql(self, expression: exp.NumberToStr) -> str: 1201 fmt = expression.args.get("format") 1202 if fmt and fmt.is_int: 1203 return self.func("FORMAT", f"'{{:,.{fmt.name}f}}'", expression.this) 1204 1205 self.unsupported("Only integer formats are supported by NumberToStr") 1206 return self.function_fallback_sql(expression)
1219 def posexplode_sql(self, expression: exp.Posexplode) -> str: 1220 this = expression.this 1221 parent = expression.parent 1222 1223 # The default Spark aliases are "pos" and "col", unless specified otherwise 1224 pos, col = exp.to_identifier("pos"), exp.to_identifier("col") 1225 1226 if isinstance(parent, exp.Aliases): 1227 # Column case: SELECT POSEXPLODE(col) [AS (a, b)] 1228 pos, col = parent.expressions 1229 elif isinstance(parent, exp.Table): 1230 # Table case: SELECT * FROM POSEXPLODE(col) [AS (a, b)] 1231 alias = parent.args.get("alias") 1232 if alias: 1233 pos, col = alias.columns or [pos, col] 1234 alias.pop() 1235 1236 # Translate POSEXPLODE to UNNEST + GENERATE_SUBSCRIPTS 1237 # Note: In Spark pos is 0-indexed, but in DuckDB it's 1-indexed, so we subtract 1 from GENERATE_SUBSCRIPTS 1238 unnest_sql = self.sql(exp.Unnest(expressions=[this], alias=col)) 1239 gen_subscripts = self.sql( 1240 exp.Alias( 1241 this=exp.Anonymous( 1242 this="GENERATE_SUBSCRIPTS", expressions=[this, exp.Literal.number(1)] 1243 ) 1244 - exp.Literal.number(1), 1245 alias=pos, 1246 ) 1247 ) 1248 1249 posexplode_sql = self.format_args(gen_subscripts, unnest_sql) 1250 1251 if isinstance(parent, exp.From) or (parent and isinstance(parent.parent, exp.From)): 1252 # SELECT * FROM POSEXPLODE(col) -> SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(...), UNNEST(...)) 1253 return self.sql(exp.Subquery(this=exp.Select(expressions=[posexplode_sql]))) 1254 1255 return posexplode_sql
1257 def addmonths_sql(self, expression: exp.AddMonths) -> str: 1258 this = expression.this 1259 1260 if not this.type: 1261 from sqlglot.optimizer.annotate_types import annotate_types 1262 1263 this = annotate_types(this, dialect=self.dialect) 1264 1265 if this.is_type(*exp.DataType.TEXT_TYPES): 1266 this = exp.Cast(this=this, to=exp.DataType(this=exp.DataType.Type.TIMESTAMP)) 1267 1268 func = self.func( 1269 "DATE_ADD", this, exp.Interval(this=expression.expression, unit=exp.var("MONTH")) 1270 ) 1271 1272 # DuckDB's DATE_ADD function returns TIMESTAMP/DATETIME by default, even when the input is DATE 1273 # To match for example Snowflake's ADD_MONTHS behavior (which preserves the input type) 1274 # We need to cast the result back to the original type when the input is DATE or TIMESTAMPTZ 1275 # Example: ADD_MONTHS('2023-01-31'::date, 1) should return DATE, not TIMESTAMP 1276 if this.is_type(exp.DataType.Type.DATE, exp.DataType.Type.TIMESTAMPTZ): 1277 return self.sql(exp.Cast(this=func, to=this.type)) 1278 1279 return self.sql(func)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_ONLY_LITERALS
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_WITH_METHOD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- LIKE_PROPERTY_INSIDE_SCHEMA
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- TRY_SUPPORTED
- SUPPORTS_UESCAPE
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- SUPPORTS_EXPLODING_PROJECTIONS
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- SUPPORTS_UNIX_SECONDS
- ALTER_SET_WRAPPED
- PARSE_JSON_NAME
- ARRAY_SIZE_NAME
- ALTER_SET_TYPE
- SUPPORTS_BETWEEN_FLAGS
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- sanitize_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- queryband_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- for_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- formatphrase_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- addpartition_sql
- distinct_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- is_sql
- like_sql
- ilike_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- xmlkeyvalueoption_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- get_put_sql
- translatecharacters_sql
- decodecase_sql
- semanticview_sql
- getextract_sql
- datefromunixdate_sql