diff --git a/pydough/database_connectors/database_connector.py b/pydough/database_connectors/database_connector.py index b13ddd083..fae5e3e46 100644 --- a/pydough/database_connectors/database_connector.py +++ b/pydough/database_connectors/database_connector.py @@ -108,6 +108,7 @@ class DatabaseDialect(Enum): ANSI = "ansi" SQLITE = "sqlite" SNOWFLAKE = "snowflake" + TRINO = "trino" MYSQL = "mysql" POSTGRES = "postgres" BODOSQL = "bodosql" diff --git a/pydough/sqlglot/execute_relational.py b/pydough/sqlglot/execute_relational.py index e1e92e7e4..71d026d20 100644 --- a/pydough/sqlglot/execute_relational.py +++ b/pydough/sqlglot/execute_relational.py @@ -14,6 +14,7 @@ from sqlglot.dialects import Postgres as PostgresDialect from sqlglot.dialects import Snowflake as SnowflakeDialect from sqlglot.dialects import SQLite as SQLiteDialect +from sqlglot.dialects import Trino as TrinoDialect from sqlglot.dialects.mysql import MySQL from sqlglot.errors import SqlglotError from sqlglot.expressions import ( @@ -486,6 +487,8 @@ def convert_dialect_to_sqlglot(dialect: DatabaseDialect) -> SQLGlotDialect: # The BodoSQL dialect is essentially a subset of the Snowflake SQL # dialect without many of the extraneous features. return SnowflakeDialect() + case DatabaseDialect.TRINO: + return TrinoDialect() case DatabaseDialect.MYSQL: return MySQLDialect() case DatabaseDialect.POSTGRES: diff --git a/pydough/sqlglot/transform_bindings/__init__.py b/pydough/sqlglot/transform_bindings/__init__.py index af970d134..ad51044ff 100644 --- a/pydough/sqlglot/transform_bindings/__init__.py +++ b/pydough/sqlglot/transform_bindings/__init__.py @@ -10,6 +10,7 @@ "PostgresTransformBindings", "SQLiteTransformBindings", "SnowflakeTransformBindings", + "TrinoTransformBindings", "bindings_from_dialect", ] @@ -24,6 +25,7 @@ from .postgres_transform_bindings import PostgresTransformBindings from .sf_transform_bindings import SnowflakeTransformBindings from .sqlite_transform_bindings import SQLiteTransformBindings +from .trino_transform_bindings import TrinoTransformBindings if TYPE_CHECKING: from pydough.sqlglot.sqlglot_relational_visitor import SQLGlotRelationalVisitor @@ -53,6 +55,8 @@ def bindings_from_dialect( return SQLiteTransformBindings(configs, visitor) case DatabaseDialect.SNOWFLAKE: return SnowflakeTransformBindings(configs, visitor) + case DatabaseDialect.TRINO: + return TrinoTransformBindings(configs, visitor) case DatabaseDialect.BODOSQL: return BodoSQLTransformBindings(configs, visitor) case DatabaseDialect.MYSQL: diff --git a/pydough/sqlglot/transform_bindings/trino_transform_bindings.py b/pydough/sqlglot/transform_bindings/trino_transform_bindings.py new file mode 100644 index 000000000..92b1c7104 --- /dev/null +++ b/pydough/sqlglot/transform_bindings/trino_transform_bindings.py @@ -0,0 +1,134 @@ +""" +Definition of SQLGlot transformation bindings for the Trino dialect. +""" + +__all__ = ["TrinoTransformBindings"] + + +import sqlglot.expressions as sqlglot_expressions +from sqlglot.expressions import Expression as SQLGlotExpression + +import pydough.pydough_operators as pydop +from pydough.configs import DayOfWeek +from pydough.types import PyDoughType + +from .base_transform_bindings import BaseTransformBindings +from .sqlglot_transform_utils import DateTimeUnit, apply_parens + + +class TrinoTransformBindings(BaseTransformBindings): + """ + Subclass of BaseTransformBindings for the Trino dialect. + """ + + @property + def values_alias_column(self) -> bool: + return False + + PYDOP_TO_TRINO_FUNC: dict[pydop.PyDoughExpressionOperator, str] = { + pydop.STARTSWITH: "STARTS_WITH", + pydop.LPAD: "LPAD", + pydop.RPAD: "RPAD", + pydop.SIGN: "SIGN", + pydop.SMALLEST: "LEAST", + pydop.LARGEST: "GREATEST", + pydop.GETPART: "SPLIT_PART", + } + """ + Mapping of PyDough operators to equivalent Trino SQL function names + These are used to generate anonymous function calls in SQLGlot + """ + + @property + def dialect_start_of_week(self) -> DayOfWeek: + """ + Which day of the week is considered the start of the week within the + SQL dialect. Individual dialects may override this. + """ + return DayOfWeek.MONDAY + + @property + def dialect_dow_mapping(self) -> dict[str, int]: + return { + "Monday": 1, + "Tuesday": 2, + "Wednesday": 3, + "Thursday": 4, + "Friday": 5, + "Saturday": 6, + "Sunday": 7, + } + + def convert_call_to_sqlglot( + self, + operator: pydop.PyDoughExpressionOperator, + args: list[SQLGlotExpression], + types: list[PyDoughType], + ) -> SQLGlotExpression: + if operator in self.PYDOP_TO_TRINO_FUNC: + return sqlglot_expressions.Anonymous( + this=self.PYDOP_TO_TRINO_FUNC[operator], expressions=args + ) + + return super().convert_call_to_sqlglot(operator, args, types) + + def convert_extract_datetime( + self, + args: list[SQLGlotExpression], + types: list[PyDoughType], + unit: DateTimeUnit, + ) -> SQLGlotExpression: + # Update argument type to fit datetime + dt_expr: SQLGlotExpression = self.handle_datetime_base_arg(args[0]) + func_expr: SQLGlotExpression + match unit: + case DateTimeUnit.YEAR: + func_expr = sqlglot_expressions.Year(this=dt_expr) + case DateTimeUnit.QUARTER: + func_expr = sqlglot_expressions.Quarter(this=dt_expr) + case DateTimeUnit.MONTH: + func_expr = sqlglot_expressions.Month(this=dt_expr) + case DateTimeUnit.DAY: + func_expr = sqlglot_expressions.Day(this=dt_expr) + case DateTimeUnit.HOUR | DateTimeUnit.MINUTE | DateTimeUnit.SECOND: + func_expr = sqlglot_expressions.Anonymous( + this=unit.value.upper(), expressions=[dt_expr] + ) + return func_expr + + def apply_datetime_truncation( + self, base: SQLGlotExpression, unit: DateTimeUnit + ) -> SQLGlotExpression: + if unit is DateTimeUnit.WEEK: + # 1. Get shifted_weekday (# of days since the start of week) + # 2. Subtract shifted_weekday DAYS from the datetime + # 3. Truncate the result to the nearest day + shifted_weekday: SQLGlotExpression = self.days_from_start_of_week(base) + date_sub: SQLGlotExpression = sqlglot_expressions.DateSub( + this=base, + expression=shifted_weekday, + unit=sqlglot_expressions.Var(this="DAY"), + ) + return sqlglot_expressions.DateTrunc( + this=date_sub, + unit=sqlglot_expressions.Var(this="DAY"), + ) + else: + # For other units, use the standard SQLGlot truncation + return super().apply_datetime_truncation(base, unit) + + def days_from_start_of_week(self, base: SQLGlotExpression) -> SQLGlotExpression: + offset: int = (-self.start_of_week_offset) % 7 + dow_expr: SQLGlotExpression = self.dialect_day_of_week(base) + if offset == 1: + return dow_expr + breakpoint() + return sqlglot_expressions.Mod( + this=apply_parens( + sqlglot_expressions.Add( + this=dow_expr, + expression=sqlglot_expressions.Literal.number(offset - 1), + ) + ), + expression=sqlglot_expressions.Literal.number(7), + ) diff --git a/pyproject.toml b/pyproject.toml index 024fcd13f..261a33656 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,6 +34,7 @@ dev-dependencies = [ "pytest-repeat", "boto3", "pydough[snowflake]", + "pydough[trino]", "pydough[mysql]", "pydough[postgres]", "pydough[server]", @@ -42,6 +43,7 @@ dev-dependencies = [ [project.optional-dependencies] snowflake = ["snowflake-connector-python[pandas]==4.1.1"] +trino = ["trino"] mysql = ["mysql-connector-python==9.5.0"] postgres = ["psycopg2-binary"] server = ["fastapi", "httpx", "uvicorn"] diff --git a/pytest.ini b/pytest.ini index 47be5edaa..9cde8a7a3 100644 --- a/pytest.ini +++ b/pytest.ini @@ -2,6 +2,7 @@ markers = execute: marks tests that do runtime execution (deselect with '-m "not execute"') snowflake: marks tests that require Snowflake credentials + trino: marks tests that require Trino credentials mysql: marks tests that require MySQL credentials postgres: marks tests that require PostgresSQL credentials server: marks tests that require api mock server diff --git a/tests/conftest.py b/tests/conftest.py index 7649fec29..d74a55ffb 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -17,6 +17,7 @@ import httpx import pandas as pd import pytest +import trino from botocore.exceptions import ClientError import pydough @@ -180,6 +181,14 @@ def sf_sample_graph_path() -> str: return f"{os.path.dirname(__file__)}/test_metadata/snowflake_sample_graphs.json" +@pytest.fixture(scope="session") +def trino_graph_path() -> str: + """ + Tuple of the path to the JSON file containing the Trino sample graphs. + """ + return f"{os.path.dirname(__file__)}/test_metadata/trino_graphs.json" + + @pytest.fixture(scope="session") def udf_graph_path() -> str: """ @@ -431,6 +440,7 @@ def sqlite_dialects(request) -> DatabaseDialect: pytest.param(DatabaseDialect.ANSI, id="ansi"), pytest.param(DatabaseDialect.SQLITE, id="sqlite"), pytest.param(DatabaseDialect.SNOWFLAKE, id="snowflake"), + pytest.param(DatabaseDialect.TRINO, id="trino"), pytest.param(DatabaseDialect.MYSQL, id="mysql"), pytest.param(DatabaseDialect.POSTGRES, id="postgres"), ] @@ -559,6 +569,11 @@ def sqlite_tpch_session( id="snowflake", marks=[pytest.mark.snowflake], ), + pytest.param( + "trino", + id="trino", + marks=[pytest.mark.trino], + ), pytest.param( "mysql", id="mysql", @@ -575,6 +590,7 @@ def all_dialects_tpch_db_context( request, get_sample_graph: graph_fetcher, get_sf_sample_graph: graph_fetcher, + get_trino_graphs: graph_fetcher, ) -> tuple[DatabaseContext, GraphMetadata]: """ General fixture providing TPCH database context and graph metadata @@ -594,6 +610,9 @@ def all_dialects_tpch_db_context( sf_conn("SNOWFLAKE_SAMPLE_DATA", "TPCH_SF1"), get_sf_sample_graph("TPCH"), ) + case "trino": + trino_conn = request.getfixturevalue("trino_conn_db_context") + return trino_conn, get_trino_graphs("TPCH") case "mysql": mysql_conn = request.getfixturevalue("mysql_conn_db_context") return mysql_conn("tpch"), get_sample_graph("TPCH") @@ -622,10 +641,11 @@ def impl(name: str) -> GraphMetadata: @pytest.fixture(scope="session") def get_dialect_defog_graphs( - defog_graphs, - get_mysql_defog_graphs, - get_sf_defog_graphs, - get_postgres_defog_graphs, + defog_graphs: graph_fetcher, + get_mysql_defog_graphs: graph_fetcher, + get_sf_defog_graphs: graph_fetcher, + get_trino_graphs: graph_fetcher, + get_postgres_defog_graphs: graph_fetcher, ) -> Callable[[DatabaseDialect, str], GraphMetadata]: """ Returns the graphs for the defog database based on the dialect @@ -638,6 +658,8 @@ def impl(dialect: DatabaseDialect, name: str) -> GraphMetadata: return get_mysql_defog_graphs(name) case DatabaseDialect.SNOWFLAKE: return get_sf_defog_graphs(name) + case DatabaseDialect.TRINO: + return get_trino_graphs(name) case DatabaseDialect.POSTGRES: return get_postgres_defog_graphs(name) case _: @@ -1098,6 +1120,86 @@ def container_is_running(name: str) -> bool: return name in result.stdout.splitlines() +@pytest.fixture(scope="session") +def get_trino_graphs(trino_graph_path: str) -> graph_fetcher: + """ + A function that takes in the name of a graph from the supported Trino graph + names and returns the metadata for that PyDough graph. + """ + + @cache + def impl(name: str) -> GraphMetadata: + return pydough.parse_json_metadata_from_file( + file_path=trino_graph_path, graph_name=name + ) + + return impl + + +@pytest.fixture(scope="session") +def get_trino_defog_graphs() -> graph_fetcher: + """ + Returns the graphs for the defog database in Trino. + """ + + @cache + def impl(name: str) -> GraphMetadata: + path: str = f"{os.path.dirname(__file__)}/test_metadata/trino_defog_graphs.json" + return pydough.parse_json_metadata_from_file(file_path=path, graph_name=name) + + return impl + + +def is_trino_env_set() -> bool: + """ + Check if the Trino environment variables are set. + + Returns: + bool: True if all required Trino environment variables are set, False + otherwise. + """ + # TODO: add environment variables for Trino connection + required_envs: list[str] = [] + return all(os.getenv(env) for env in required_envs) + + +@pytest.fixture +def trino_conn_db_context() -> Callable[[str, str], DatabaseContext]: + """ + This fixture is used to connect to the Trino TPCH database using + a connection object. + Return a DatabaseContext for the Trino TPCH database. + """ + + def _impl(database_name: str, schema_name: str) -> DatabaseContext: + if not is_trino_env_set(): + pytest.skip("Skipping Trino tests: environment variables not set.") + + connection: trino.dbapi.Connection = trino.dbapi.connect( + # TODO: use the keyword arguments fetched from environment variables + ) + + return load_database_context("trino", connection=connection) + + return _impl + + +@pytest.fixture +def trino_params_tpch_db_context() -> DatabaseContext: + """ + This fixture is used to connect to the Trino TPCH database using + parameters instead of a connection object. + Return a DatabaseContext for the Trino TPCH database. + """ + if not is_trino_env_set(): + pytest.skip("Skipping Trino tests: environment variables not set.") + # TODO: add keyword arguments fetched from environment variables + return load_database_context( + "trino", + # TODO: use the keyword arguments + ) + + MYSQL_ENVS = ["MYSQL_USERNAME", "MYSQL_PASSWORD"] """ The MySQL environment variables required for connection. diff --git a/tests/test_metadata/trino_graphs.json b/tests/test_metadata/trino_graphs.json new file mode 100644 index 000000000..1179e4052 --- /dev/null +++ b/tests/test_metadata/trino_graphs.json @@ -0,0 +1,5541 @@ +[ + { + "name": "TPCH", + "version": "V2", + "collections": [ + { + "name": "regions", + "type": "simple table", + "table path": "tpch.REGION", + "unique properties": ["key", "name"], + "properties": [ + { + "name": "key", + "type": "table column", + "column name": "r_regionkey", + "data type": "numeric", + "description": "Unique identifier id for the region", + "sample values": [0, 1, 2, 3, 4], + "synonyms": ["id"] + }, + { + "name": "name", + "type": "table column", + "column name": "r_name", + "data type": "string", + "description": "Uppercase name of the region", + "sample values": ["AFRICA", "AMERICA", "ASIA", "EUROPE", "MIDDLE EAST"] + }, + { + "name": "comment", + "type": "table column", + "column name": "r_comment", + "data type": "string", + "description": "Comment/remark on the region" + } + ], + "description": "The regions of the world", + "synonyms": ["continents", "segments of the world"] + }, + { + "name": "nations", + "type": "simple table", + "table path": "tpch.NATION", + "unique properties": ["key", "name"], + "properties": [ + { + "name": "key", + "type": "table column", + "column name": "n_nationkey", + "data type": "numeric", + "description": "Unique identifier id for the nation", + "sample values": [0, 1, 10, 13, 24], + "synonyms": ["id"] + }, + { + "name": "region_key", + "type": "table column", + "column name": "n_regionkey", + "data type": "numeric", + "description": "Key from the region that the nation belongs to", + "sample values": [0, 1, 2, 3, 4] + }, + { + "name": "name", + "type": "table column", + "column name": "n_name", + "data type": "string", + "description": "Uppercase name of the nation", + "sample values": ["KENYA", "PERU", "JAPAN", "INDIA", "GERMANY"] + }, + { + "name": "comment", + "type": "table column", + "column name": "n_comment", + "data type": "string", + "description": "Comment/remark on the nation" + } + ], + "description": "The nations of the world", + "synonyms": ["countries", "states"] + }, + { + "name": "parts", + "type": "simple table", + "table path": "tpch.PART", + "unique properties": ["key", "name"], + "properties": [ + { + "name": "key", + "type": "table column", + "column name": "p_partkey", + "data type": "numeric", + "description": "Unique identifier id for the part", + "sample values": [0, 103719, 114994, 64760, 2440], + "synonyms": ["id"] + }, + { + "name": "name", + "type": "table column", + "column name": "p_name", + "data type": "string", + "description": "Name of the part, consisting of multiple lowercase colors", + "sample values": ["chiffon plum white linen firebrick", "chocolate steel antique green lavender", "sky frosted cream light blush"], + "synonyms": ["colors", "description"] + }, + { + "name": "manufacturer", + "type": "table column", + "column name": "p_mfgr", + "data type": "string", + "description": "Name of the manufacturer of the part (not the same as the suppliers). The manufacturer number is the same as the first digit of the part's brand number.", + "sample values": ["Manufacturer#1", "Manufacturer#2", "Manufacturer#3", "Manufacturer#4", "Manufacturer#5"] + }, + { + "name": "brand", + "type": "table column", + "column name": "p_brand", + "data type": "string", + "description": "The production brand that the part belongs to. The brand contains two digits where the first digit is the same as the manufacturer number (e.g. Brand#32 is from Manufacturer #3).", + "sample values": ["Brand#11", "Brand#23", "Brand#34", "Brand#45", "Brand#55"] + }, + { + "name": "part_type", + "type": "table column", + "column name": "p_type", + "data type": "string", + "description": "The type of the part, consisting of three uppercase descriptors where the first is a category (e.g. 'STANDARD' or 'PROMO'), the second is a processing state (e.g. 'ANODIZED' or 'PLATED') and the third is a material (e.g. 'STEEL' or 'BRASS').", + "sample values": ["ECONOMY ANODIZED NICKEL", "PROMO BURNISHED COPPER", "STANDARD BRUSHED STEEL", "SMALL PLATED TIN", "LARGE POLISHED BRASS", "MEDIUM PLATED NICKEL"], + "synonyms": ["category", "descriptor", "processing", "material"] + }, + { + "name": "size", + "type": "table column", + "column name": "p_size", + "data type": "numeric", + "description": "The size of the part", + "sample values": [1, 10, 31, 46, 50], + "synonyms": ["dimension", "measurement", "length", "width", "height", "volume"], + "extra semantic info": { + "minimum value": 1, + "maximum value": 50, + "is dense": true, + "distinct values": 50, + "correlated fields": [] + } + }, + { + "name": "container", + "type": "table column", + "column name": "p_container", + "data type": "string", + "description": "The container that the part is stored in. The container consists of two uppercase descriptors where the first is a size (e.g. 'SM' or 'JUMBO') and the second is a type of container (e.g. 'BOX' or 'JAR').", + "sample values": ["SM CASE", "LG BOX", "MED BAG", "JUMBO JAR", "WRAP PKG", "SM PACK", "LG CAN", "MED DRUM"], + "synonyms": ["vessel", "packaging", "receptacle"] + }, + { + "name": "retail_price", + "type": "table column", + "column name": "p_retailprice", + "data type": "numeric", + "description": "The retail price of the part, which it is intended to be sold for before accounting for the price the supplier charges, in US dollars. The price is rounded to the nearest cent, and most of the values are between $900 and $2000.", + "sample values": [901.00, 2098.99, 14499.50, 2080.99, 2050.96, 1476.41], + "synonyms": ["listed selling price", "wholesale value"] + }, + { + "name": "comment", + "type": "table column", + "column name": "p_comment", + "data type": "string", + "description": "Description/commentary on the part" + } + ], + "description": "The various products supplied by various companies in shipments to different customers", + "synonyms": ["products", "components", "items", "goods"], + "extra semantic info": { + "nrows": 200000, + "distinct values": { + "key": 200000, + "name": 200000, + "manufacturer": 5, + "brand": 25, + "part_type": 150, + "size": 50, + "container": 40, + "retail_price": 20899, + "comment": 131753 + }, + "correlations": { + "brand": "each brand is associated with exactly one manufacturer, and each manufacturer has exactly 5 distinct brands" + } + } + }, + { + "name": "suppliers", + "type": "simple table", + "table path": "tpch.SUPPLIER", + "unique properties": ["key", "name", "phone", "address"], + "properties": [ + { + "name": "key", + "type": "table column", + "column name": "s_suppkey", + "data type": "numeric", + "description": "Unique identifier id for the supplier", + "sample values": [2452, 8063, 1, 10000, 5053], + "synonyms": ["id"] + }, + { + "name": "name", + "type": "table column", + "column name": "s_name", + "data type": "string", + "description": "Name of the supplier, which is always Supplier# where the number is the same as the supplier's key, prepended with zeros until it is 9 digits", + "sample values": ["Supplier#000008427", "Supplier#000001917", "Supplier#000000001", "Supplier#000010000", "Supplier#000000893"] + }, + { + "name": "address", + "type": "table column", + "column name": "s_address", + "data type": "string", + "description": "Address of the supplier as a cryptographically encrypted string to anonymize the data.", + "sample values": ["aSYD1SvrdIGV8LxRL QDp5m9dV", "ydl44utgudl6CP46TF7kliIcF5sC8K9,WH,Tj", "J1Vd3lqn1UvN2|4|14-632-452-6847"], + "synonyms": ["location", "street address", "corporate address", "headquarters"] + }, + { + "name": "nation_key", + "type": "table column", + "column name": "s_nationkey", + "data type": "numeric", + "description": "Key from the nation that the supplier belongs to", + "sample values": [0, 1, 10, 13, 24], + "synonyms": ["nation id"] + }, + { + "name": "phone", + "type": "table column", + "column name": "s_phone", + "data type": "string", + "description": "Phone number of the supplier in the format 'CC-XXX-XXX-XXXX' where CC is the country code (each nation has a unique country code).", + "sample values": ["25-995-176-6622", "18-132-649-2520", "30-505-249-4504", "10-132-649-2520", "27-599-541-3605"], + "synonyms": ["contact number", "telephone number"] + }, + { + "name": "account_balance", + "type": "table column", + "column name": "s_acctbal", + "data type": "numeric", + "description": "The account balance of the supplier in US dollars. The balance is rounded to the nearest cent and most of the values are between -$1,000 and +$10,000.", + "sample values": [-998.22, 9999.72, 4510.35, 9125.21, -0.92, 58.93], + "synonyms": ["balance", "credit", "wealth", "debt", "surplus", "cash on hand", "money in bank"] + }, + { + "name": "comment", + "type": "table column", + "column name": "s_comment", + "data type": "string", + "description": "Commentary/remark on the supplier" + } + ], + "description": "The various companies that supply different parts to fulfill purchase orders", + "synonyms": ["companies", "businesses", "vendors"] + }, + { + "name": "lines", + "type": "simple table", + "table path": "tpch.LINEITEM", + "unique properties": [["order_key", "line_number"]], + "properties": [ + { + "name": "order_key", + "type": "table column", + "column name": "l_orderkey", + "data type": "numeric", + "description": "Key from the order that the line item belongs to", + "sample values": [5294597, 19010, 68581, 2710114, 2462791], + "synonyms": ["order id"] + }, + { + "name": "part_key", + "type": "table column", + "column name": "l_partkey", + "data type": "numeric", + "description": "Key from the part that the lineitem describes a purchase/shipment of", + "sample values": [1, 103719, 114994, 64760, 2440], + "synonyms": ["part id"] + }, + { + "name": "supplier_key", + "type": "table column", + "column name": "l_suppkey", + "data type": "numeric", + "description": "Key from the supplier that the lineitem describes a purchase/shipment from", + "sample values": [2452, 8063, 1, 10000, 5053], + "synonyms": ["supplier id"] + }, + { + "name": "line_number", + "type": "table column", + "column name": "l_linenumber", + "data type": "numeric", + "description": "The line number of the lineitem within the order. Each lineitem within an order has its own line number, and represents a purchase of a part from a supplier within the order.", + "sample values": [1, 2, 3, 4, 5, 6, 7], + "synonyms": ["line id", "shipment index within order"] + }, + { + "name": "quantity", + "type": "table column", + "column name": "l_quantity", + "data type": "numeric", + "description": "The number of units of the part that is being purchased in the lineitem, as a number between 1 and 50", + "sample values": [1, 10, 13, 25, 48, 50], + "synonyms": ["amount", "purchase volume", "units", "count", "number of items", "shipment size"] + }, + { + "name": "extended_price", + "type": "table column", + "column name": "l_extendedprice", + "data type": "numeric", + "description": "The extended price of the line item, which is the retail price of the part multiplied by the quantity purchased (before any discounts/taxes are applied). The price is rounded to the nearest cent and most of the values are between $900 and $100,000.", + "sample values": [901.00, 36036.00, 57657.60, 50450.4, 39097.8], + "synonyms": ["raw price", "gross cost", "total value before discount/tax"] + }, + { + "name": "discount", + "type": "table column", + "column name": "l_discount", + "data type": "numeric", + "description": "The discount applied to the line item, which is a ratio between 0 and 1 representing percentage of the extended price. The percentage is always between 0% (0.00) and 10% (0.10)", + "sample values": [0.00, 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.10], + "synonyms": ["markdown", "price reduction"] + }, + { + "name": "tax", + "type": "table column", + "column name": "l_tax", + "data type": "numeric", + "description": "The sales tax applied to the line item, which is a ratio between 0 and 1 representing percentage of the extended price. The percentage is always between 0% (0.00) and 8% (0.08)", + "sample values": [0.00, 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08], + "synonyms": ["levy", "duty", "tariff"] + }, + { + "name": "status", + "type": "table column", + "column name": "l_linestatus", + "data type": "string", + "description": "The status of the line item, which is always 'O' (for orders that have been placed but not yet filled, e.g. pending shipment) or 'F' (for orders that have been filled, e.g. already shipped)", + "sample values": ["O", "F"], + "synonyms": ["pending shipment", "shipment state", "fulfilled"] + }, + { + "name": "ship_date", + "type": "table column", + "column name": "l_shipdate", + "data type": "datetime", + "description": "The date that the line item was shipped from the supplier/warehouse. The date is always between 1992-01-01 and 1998-12-31" + }, + { + "name": "commit_date", + "type": "table column", + "column name": "l_commitdate", + "data type": "datetime", + "description": "The date that the line item was committed to be shipped from the supplier/warehouse. The date is always between 1992-01-01 and 1998-12-31. The ship date is ideally before or on the actual commit date, but can be after it if the shipment was delayed." + }, + { + "name": "receipt_date", + "type": "table column", + "column name": "l_receiptdate", + "data type": "datetime", + "description": "The date that the line item was received by the customer. The date is always between 1992-01-01 and 1998-12-31. The receipt date is after the ship date due to the time to ship the package." + }, + { + "name": "ship_instruct", + "type": "table column", + "column name": "l_shipinstruct", + "data type": "string", + "description": "The shipping instructions for the line item, which is always 'DELIVER IN PERSON', 'TAKE BACK RETURN', 'COLLECT COD' or 'NONE'", + "sample values": ["DELIVER IN PERSON", "TAKE BACK RETURN", "COLLECT COD", "NONE"], + "synonyms": ["shipping instructions", "delivery instructions"] + }, + { + "name": "ship_mode", + "type": "table column", + "column name": "l_shipmode", + "data type": "string", + "description": "The shipping mode for the line item, which is always 'AIR', 'AIR REG', 'FOB', 'MAIL' or 'SHIP'", + "sample values": ["AIR", "AIR REG", "FOB", "MAIL", "SHIP"], + "synonyms": ["shipping method", "delivery method"] + }, + { + "name": "return_flag", + "type": "table column", + "column name": "l_returnflag", + "data type": "string", + "description": "The return flag for the line item, which is always 'R' (for returned items) or 'N' (for non-returned items)", + "sample values": ["R", "N"], + "synonyms": ["return status"] + }, + { + "name": "comment", + "type": "table column", + "column name": "l_comment", + "data type": "string", + "description": "Commentary/remark on the line item" + } + ], + "description": "The line items for shipments within an order. Each line item within an order has its own line number, and represents a purchase of a part from a supplier within the order. The order can contain multiple lineitems from different suppliers for different parts, and the lineitems can each have their own shipping information.", + "synonyms": ["shipments", "packages", "purchases", "deliveries", "order components", "order elements"] + }, + { + "name": "supply_records", + "type": "simple table", + "table path": "tpch.PARTSUPP", + "unique properties": [["part_key", "supplier_key"]], + "properties": [ + { + "name": "part_key", + "type": "table column", + "column name": "ps_partkey", + "data type": "numeric", + "description": "Key from the part that the supply record belongs to", + "sample values": [1, 103719, 114994, 64760, 2440], + "synonyms": ["part id"] + }, + { + "name": "supplier_key", + "type": "table column", + "column name": "ps_suppkey", + "data type": "numeric", + "description": "Key from the supplier that the supply record belongs to", + "sample values": [2452, 8063, 1, 10000, 5053], + "synonyms": ["supplier id"] + }, + { + "name": "available_quantity", + "type": "table column", + "column name": "ps_availqty", + "data type": "numeric", + "description": "The number of units of the part that the supplier has available to supply. The quantity is usually between 1 and 1,000", + "sample values": [4620, 3100, 6547, 3337, 76], + "synonyms": ["available stock", "inventory", "supply", "quantity remaining"] + }, + { + "name": "supply_cost", + "type": "table column", + "column name": "ps_supplycost", + "data type": "numeric", + "description": "The cost that it takes for the supplier to produce a single unit of the part. The cost is rounded to the nearest cent and most of the values are between $1 and $1,000.", + "sample values": [144.43, 772.21, 285.90, 50.12, 983.998], + "synonyms": ["production cost", "manufacturing cost", "cost of goods sold"] + }, + { + "name": "comment", + "type": "table column", + "column name": "ps_comment", + "data type": "string", + "description": "Commentary/remark on the supply record" + } + ], + "description": "Every combination of a supplier and a part that the supplier supplies. Each record contains information about the supplier of the part, the part itself, and the availability of the part from the supplier.", + "synonyms": ["supplier part information", "partsupp info", "manifest", "inventories", "catalog"] + }, + { + "name": "orders", + "type": "simple table", + "table path": "tpch.ORDERS", + "unique properties": ["key"], + "properties": [ + { + "name": "key", + "type": "table column", + "column name": "o_orderkey", + "data type": "numeric", + "description": "Unique identifier id for the order", + "sample values": [317728, 1096707, 5522855, 2624837, 1866566], + "synonyms": ["id"] + }, + { + "name": "customer_key", + "type": "table column", + "column name": "o_custkey", + "data type": "numeric", + "description": "Key from the customer that placed the order", + "sample values": [93721, 65251, 81379, 20663, 42247], + "synonyms": ["customer id"] + }, + { + "name": "order_status", + "type": "table column", + "column name": "o_orderstatus", + "data type": "string", + "description": "The status of the order, which is always 'O' for orders where all line items of the order have status 'O', 'F' for orders where all line items of the order have status 'F', and 'P' otherwise", + "sample values": ["O", "F", "P"], + "synonyms": ["order state", "fulfillment status"] + }, + { + "name": "total_price", + "type": "table column", + "column name": "o_totalprice", + "data type": "numeric", + "description": "The total price of the order after any discounts/taxes are applied, which is the sum of the extended price * (1 - discount) * (1 - tax) for all line items in the order. The price is rounded to the nearest cent and most of the values are between $800 and $600,000.", + "sample values": [857.71, 555285.16, 3618.2, 277554.58, 52737.18], + "synonyms": ["total cost", "total value"] + }, + { + "name": "order_date", + "type": "table column", + "column name": "o_orderdate", + "data type": "datetime", + "description": "The date that the order was placed. The date is always between 1992-01-01 and 1998-12-31", + "synonyms": ["order placed date", "order creation date", "purchase date"] + }, + { + "name": "order_priority", + "type": "table column", + "column name": "o_orderpriority", + "data type": "string", + "description": "The priority of the order, which is always '1-URGENT', '2-HIGH', '3-MEDIUM', '4-NOT SPECIFIED' or '5-LOW'", + "sample values": ["1-URGENT", "2-HIGH", "3-MEDIUM", "4-NOT SPECIFIED", "5-LOW"], + "synonyms": ["urgency", "priority level"] + }, + { + "name": "clerk", + "type": "table column", + "column name": "o_clerk", + "data type": "string", + "description": "The clerk that processed the order, which is always 'Clerk#' where the number is prepended with zeros until it is 9 digits", + "sample values": ["Clerk#000000001", "Clerk#000000090", "Clerk#000000635", "Clerk#000000892", "Clerk#000000073"], + "synonyms": ["salesperson", "representative", "agent", "notary", "officiant", "registrar", "overseer"] + }, + { + "name": "ship_priority", + "type": "table column", + "column name": "o_shippriority", + "data type": "numeric", + "description": "The priority of the order for shipping, which is always 0", + "sample values": [0], + "synonyms": ["shipping priority", "shipment urgency"] + }, + { + "name": "comment", + "type": "table column", + "column name": "o_comment", + "data type": "string", + "description": "Commentary/remark on the order" + } + ], + "description": "The orders that have been placed by customers. Each order can contain multiple lineitems from different suppliers for different parts, and the lineitems can each have their own shipping information.", + "synonyms": ["transactions"] + }, + { + "name": "customers", + "type": "simple table", + "table path": "tpch.CUSTOMER", + "unique properties": ["key", "name", "address"], + "properties": [ + { + "name": "key", + "type": "table column", + "column name": "c_custkey", + "data type": "numeric", + "description": "Unique identifier id for the customer", + "sample values": [93721, 65251, 81379, 20663, 42247], + "synonyms": ["id"] + }, + { + "name": "name", + "type": "table column", + "column name": "c_name", + "data type": "string", + "description": "Name of the customer, which is always Customer# where the number is the same as the customer's key, prepended with zeros until it is 9 digits", + "sample values": ["Customer#000000001", "Customer#000000090", "Customer#000000635", "Customer#000000892", "Customer#000000073"] + }, + { + "name": "address", + "type": "table column", + "column name": "c_address", + "data type": "string", + "description": "Address of the customer as a cryptographically encrypted string to anonymize the data.", + "sample values": ["ZCWMiIFUwRZWX7Vr7BjZ,,BZbGeGOBe7n", "HcpYT5Ag 2I2QY,nSoP5F1LI"], + "synonyms": ["location", "residence", "home address"] + }, + { + "name": "nation_key", + "type": "table column", + "column name": "c_nationkey", + "data type": "numeric", + "description": "Key from the nation that the customer belongs to", + "sample values": [0, 1, 10, 13, 24], + "synonyms": ["nation id"] + }, + { + "name": "phone", + "type": "table column", + "column name": "c_phone", + "data type": "string", + "description": "Phone number of the customer in the format 'CC-XXX-XXX-XXXX' where CC is the country code (each nation has a unique country code).", + "sample values": ["19-962-391-7546", "24-413-105-9570", "31-703-857-4846", "34-591-761-1886"], + "synonyms": ["contact number", "telephone number"] + }, + { + "name": "account_balance", + "type": "table column", + "column name": "c_acctbal", + "data type": "numeric", + "description": "The account balance of the customer in US dollars. The balance is rounded to the nearest cent and most of the values are between -$1,000 and +$10,000.", + "sample values": [-998.22, 9999.72, 4510.35, 9125.21, -0.92, 58.93], + "synonyms": ["balance", "credit", "wealth", "debt", "surplus", "cash on hand", "money in bank"] + }, + { + "name": "market_segment", + "type": "table column", + "column name": "c_mktsegment", + "data type": "string", + "description": "The market segment that the customer belongs to, which is always 'BUILDING', 'FURNITURE', 'AUTOMOBILE', 'MACHINERY' or 'HOUSEHOLD'", + "sample values": ["BUILDING", "FURNITURE", "AUTOMOBILE", "MACHINERY", "HOUSEHOLD"], + "synonyms": ["customer segment", "customer category", "market", "industry", "sector", "vertical"] + }, + { + "name": "comment", + "type": "table column", + "column name": "c_comment", + "data type": "string", + "description": "Commentary/remark on the customer" + } + ], + "description": "The customers in the system within each nation. Each customer can have placed zero, one, or several orders.", + "synonyms": ["citizens", "residents", "inhabitants", "consumers", "users", "buyers", "occupants"] + } + ], + "relationships": [ + { + "type": "simple join", + "name": "nations", + "parent collection": "regions", + "child collection": "nations", + "singular": false, + "always matches": true, + "keys": {"key": ["region_key"]}, + "description": "The nations contained within a region", + "synonyms": ["countries"] + }, + { + "type": "reverse", + "name": "region", + "original parent": "regions", + "original property": "nations", + "singular": true, + "always matches": true, + "description": "The region that a nation is part of", + "synonyms": ["continent", "segment of the world"] + }, + { + "type": "simple join", + "name": "suppliers", + "parent collection": "nations", + "child collection": "suppliers", + "singular": false, + "always matches": true, + "keys": {"key": ["nation_key"]}, + "description": "The suppliers belonging to a nation", + "synonyms": ["companies", "producers", "businesses"] + }, + { + "type": "reverse", + "name": "nation", + "original parent": "nations", + "original property": "suppliers", + "singular": true, + "always matches": true, + "description": "The nation that a supplier belongs to", + "synonyms": ["country", "state"] + }, + { + "type": "simple join", + "name": "customers", + "parent collection": "nations", + "child collection": "customers", + "singular": false, + "always matches": true, + "keys": {"key": ["nation_key"]}, + "description": "The customers belonging to a nation", + "synonyms": ["citizens", "residents", "inhabitants", "consumers", "users", "buyers", "occupants"] + }, + { + "type": "reverse", + "name": "nation", + "original parent": "nations", + "original property": "customers", + "singular": true, + "always matches": true, + "description": "The nation that a customer belongs to", + "synonyms": ["country", "state", "home"] + }, + { + "type": "simple join", + "name": "supply_records", + "parent collection": "parts", + "child collection": "supply_records", + "singular": false, + "always matches": true, + "keys": {"key": ["part_key"]}, + "description": "The records indicating which companies supply the part", + "synonyms": ["producers", "vendors", "suppliers of part"] + }, + { + "type": "reverse", + "name": "part", + "original parent": "parts", + "original property": "supply_records", + "singular": true, + "always matches": true, + "description": "The part that a supply record belongs to", + "synonyms": ["product", "item", "component"] + }, + { + "type": "simple join", + "name": "lines", + "parent collection": "parts", + "child collection": "lines", + "singular": false, + "always matches": false, + "keys": {"key": ["part_key"]}, + "description": "The line items for shipments of the part", + "synonyms": ["shipments", "packages", "purchases", "deliveries", "sales"], + "extra semantic info": { + "unmatched rows": 0, + "min matches per row": 9, + "max matches per row": 57, + "avg matches per row": 30.01, + "classification": "one-to-many" + } + }, + { + "type": "reverse", + "name": "part", + "original parent": "parts", + "original property": "lines", + "singular": true, + "always matches": true, + "description": "The part that a line item contains, e.g. what part is being shipped as part of an order", + "synonyms": ["product", "item", "component"] + }, + { + "type": "simple join", + "name": "supply_records", + "parent collection": "suppliers", + "child collection": "supply_records", + "singular": false, + "always matches": true, + "keys": {"key": ["supplier_key"]}, + "description": "The records indicating which parts the supplier supplies", + "synonyms": ["product catalog", "inventory", "components supplied"] + }, + { + "type": "reverse", + "name": "supplier", + "original parent": "suppliers", + "original property": "supply_records", + "singular": true, + "always matches": true, + "description": "The supplier that a supply record belongs to", + "synonyms": ["company", "producer", "business"] + }, + { + "type": "simple join", + "name": "lines", + "parent collection": "suppliers", + "child collection": "lines", + "singular": false, + "always matches": false, + "keys": {"key": ["supplier_key"]}, + "description": "The line items for shipments from the supplier, e.g. all purchases made from the supplier", + "synonyms": ["shipments", "packages", "purchases", "deliveries", "sales"] + }, + { + "type": "reverse", + "name": "supplier", + "original parent": "suppliers", + "original property": "lines", + "singular": true, + "always matches": true, + "description": "The supplier that a line item contains, e.g. what supplier is the part being purchased from as part of an order", + "synonyms": ["company", "producer", "business"] + }, + { + "type": "simple join", + "name": "part_and_supplier", + "parent collection": "lines", + "child collection": "supply_records", + "singular": true, + "always matches": true, + "keys": {"part_key": ["part_key"], "supplier_key": ["supplier_key"]}, + "description": "The corresponding entry in the supply records detailing more information about the supplier of the purchase and the part that was purchased", + "synonyms": ["supply records", "supplier part information", "partsupp info"] + }, + { + "type": "reverse", + "name": "lines", + "original parent": "lines", + "original property": "part_and_supplier", + "singular": false, + "always matches": false, + "description": "The line item instances of a part/supplier combination being purchased by a customer", + "synonym": ["shipments", "packages", "purchases", "deliveries", "line items"] + }, + { + "type": "simple join", + "name": "order", + "parent collection": "lines", + "child collection": "orders", + "singular": true, + "always matches": true, + "keys": {"order_key": ["key"]}, + "description": "The order that the line item belongs to" + }, + { + "type": "reverse", + "name": "lines", + "original parent": "lines", + "original property": "order", + "singular": false, + "always matches": true, + "description": "The line items that belong to an order, each representing the purchase of a specific part from a specific supplier", + "synonyms": ["items", "order contents", "entries", "line items"] + }, + { + "type": "simple join", + "name": "customer", + "parent collection": "orders", + "child collection": "customers", + "singular": true, + "always matches": true, + "keys": {"customer_key": ["key"]}, + "description": "The customer that placed the order", + "synonyms": ["buyer", "consumer", "user", "client"] + }, + { + "type": "reverse", + "name": "orders", + "original parent": "orders", + "original property": "customer", + "singular": false, + "always matches": false, + "description": "The orders that a customer has placed, each of which contains one or more line items", + "synonyms": ["transactions", "purchases"] + } + ], + "additional definitions": [ + "Revenue for a lineitem is the extended_price * (1 - discount) * (1 - tax) minus quantity * supply_cost from the corresponding supply record", + "A domestic shipment is a lineitem where the customer and supplier are from the same nation", + "Frequent buyers are customers that have placed more than 5 orders in a single year for at least two different years" + ], + "verified pydough analysis": [ + {"question": "How many customers are in China?", "code": "TPCH.CALCULATE(n_chinese_customers=COUNT(customers.WHERE(nation.name == 'CHINA')))"}, + {"question": "What was the most ordered part in 1995, by quantity, by Brazilian customers?", "code": "parts.CALCULATE(name, quantity=SUM(lines.WHERE((YEAR(ship_date) == 1995) & (order.customer.nation.name == 'BRAZIL')).quantity)).TOP_K(1, by=quantity)"}, + {"question": "Who is the wealthiest customer in each nation in Africa?", "code": "nations.WHERE(region.name == 'AFRICA').CALCULATE(nation_name=name, richest_customer=customers.BEST(per='nation', by=account_balance.DESC()).name)"} + ], + "extra semantic info": { + "data source": "TPC-H Benchmark Dataset", + "data generation tool": "TPC-H dbgen tool", + "dataset download link": "https://github.com/lovasoa/TPCH-sqlite/releases/download/v1.0/TPC-H.db", + "schema diagram link": "https://docs.snowflake.com/en/user-guide/sample-data-tpch", + "dataset specification link": "https://www.tpc.org/TPC_Documents_Current_Versions/pdf/TPC-H_v3.0.1.pdf", + "data scale factor": 1, + "intended use": "Simulating decision support systems for complex ad-hoc queries and concurrent data modifications", + "notable characteristics": "Highly normalized schema with multiple tables and relationships, designed to represent a wholesale supplier's business environment", + "data description": "Contains information about orders. Every order has one or more lineitems, each representing the purchase and shipment of a specific part from a specific supplier. Each order is placed by a customer, and both customers and suppliers belong to nations which in turn belong to regions. Additionally, there are supply records indicating every combination of a supplier and the parts they supply." + } + }, + { + "name": "Broker", + "version": "V2", + "collections": [ + { + "name": "customers", + "type": "simple table", + "table path": "main.sbCustomer", + "unique properties": ["_id", "name", "email", "address1"], + "properties": [ + { + "name": "_id", + "type": "table column", + "column name": "sbCustId", + "data type": "string", + "description": "The unique identifying string for the customer", + "sample values": ["C001", "C005", "C014"], + "synonyms": ["customer key"] + }, + { + "name": "name", + "type": "table column", + "column name": "sbCustName", + "data type": "string", + "description": "The name of the customer", + "sample values": ["Ava Wilson", "Bob Johnson", "David Kim", "Emily Davis"], + "synonyms": ["customer name", "first and last name"] + }, + { + "name": "email", + "type": "table column", + "column name": "sbCustEmail", + "data type": "string", + "description": "The email of the customer", + "sample values": ["alex.rodriguez@email.com", "ava.wilson@email.com", "arah.nguyen@email.com", "samantha.lee@email.com"], + "synonyms": ["email address", "customer email"] + }, + { + "name": "phone", + "type": "table column", + "column name": "sbCustPhone", + "data type": "string", + "description": "The phone number of the customer", + "sample values": ["555-123-4567", "555-623-7419", "555-135-7902"], + "synonyms": ["phone number", "customer phone"] + }, + { + "name": "address1", + "type": "table column", + "column name": "sbCustAddress1", + "data type": "string", + "description": "The address of the customer", + "sample values": ["123 Main St", "951 Pine Rd", "246 Elm St", "258 Elm Ave"], + "synonyms": ["first address line", "street address"] + }, + { + "name": "address2", + "type": "table column", + "column name": "sbCustAddress2", + "data type": "string", + "description": "The second address of the customer, if one exists", + "synonyms": ["second address line"] + }, + { + "name": "city", + "type": "table column", + "column name": "sbCustCity", + "data type": "string", + "description": "The city the customer lives in", + "sample values": ["Anytown", "Yourtown", "Someville", "Mytown"], + "synonyms": ["customer address city"] + }, + { + "name": "state", + "type": "table column", + "column name": "sbCustState", + "data type": "string", + "description": "The state the customer lives in, by its capitalized two-letter abbreviation", + "sample values": ["CA", "NY", "TX", "FL", "NJ"], + "synonyms": ["customer address state"] + }, + { + "name": "country", + "type": "table column", + "column name": "sbCustCountry", + "data type": "string", + "description": "The country the customer lives in", + "sample values": ["USA"], + "synonyms": ["customer address country"] + }, + { + "name": "postal_code", + "type": "table column", + "column name": "sbCustPostalCode", + "data type": "string", + "description": "The postal/zip code of the customer's address", + "sample values": ["90001", "10002", "08801"], + "synonyms": ["zip code", "customer address postal code"] + }, + { + "name": "join_date", + "type": "table column", + "column name": "sbCustJoinDate", + "data type": "datetime", + "description": "The date the customer joined the brokerage", + "synonyms": ["signup date", "customer join date", "account creation date"] + }, + { + "name": "status", + "type": "table column", + "column name": "sbCustStatus", + "data type": "string", + "description": "The state of the customer's account, which is either 'active', 'inactive', or 'suspended'", + "sample values": ["active", "inactive", "suspended"], + "synonyms": ["customer account status", "activity state"] + } + ], + "description": "The customers who have accounts with the the brokerage", + "synonyms": ["users", "clients", "members", "subscribers"] + }, + { + "name": "tickers", + "type": "simple table", + "table path": "main.sbTicker", + "unique properties": ["_id"], + "properties": [ + { + "name": "_id", + "type": "table column", + "column name": "sbTickerId", + "data type": "string", + "description": "The unique identifying string for each ticker", + "sample values": ["T001", "T003", "T007", "T0018"], + "synonyms": ["ticker key"] + }, + { + "name": "symbol", + "type": "table column", + "column name": "sbTickerSymbol", + "data type": "string", + "description": "The symbol used to abbreviate the ticker name in a recognizable manner, e.g. APPL for Apple, or GOOG for Google", + "sample values": ["APPL", "AMZN", "BRK.B", "FB", "GOOG", "NFLX", "MSFT"], + "synonyms": ["ticker symbol", "ticker abbreviation"] + }, + { + "name": "name", + "type": "table column", + "column name": "sbTickerName", + "data type": "string", + "description": "The full name of the ticker, e.g. the company name", + "sample values": ["Berkshire Hathaway Inc.", "Alphabet Inc.", "Apple Inc.", "Microsoft Corporation"], + "synonyms": ["company name", "business name"] + }, + { + "name": "ticker_type", + "type": "table column", + "column name": "sbTickerType", + "data type": "string", + "description": "The category of ticker, which is either 'stock', 'etf', or 'mutual fund'", + "sample values": ["stock", "etf", "mutual fund"], + "synonyms": ["category", "classification"] + }, + { + "name": "exchange", + "type": "table column", + "column name": "sbTickerExchange", + "data type": "string", + "description": "The exchange that the ticker is listed on, which is either 'NASDAQ', 'NYSE', 'NYSE Arca', or 'Vanguard'", + "sample values": ["NASDAQ", "NYSE", "NYSE Arca", "Vanguard"], + "synonyms": [] + }, + { + "name": "currency", + "type": "table column", + "column name": "sbTickerCurrency", + "data type": "string", + "description": "The currency that the ticker is traded in, which is always 'USD'", + "sample values": ["USD"] + }, + { + "name": "db2x", + "type": "table column", + "column name": "sbTickerDb2x", + "data type": "string", + "description": "The two-letter exchange code corresponding to the ticker's exchange, which is either 'NQ' for NASDAQ, 'NY' for NYSE, 'NX' for 'NYSE Arca', or 'VA' for Vanguard", + "sample values": ["NQ", "NY", "NX", "VG"], + "synonyms": ["exchange code"] + }, + { + "name": "is_active", + "type": "table column", + "column name": "sbTickerIsActive", + "data type": "bool", + "description": "Whether the ticker is still active", + "synonyms": ["ticker in use"] + } + ], + "description": "All of the tickers that the brokerage trades", + "synonyms": ["stocks", "etfs", "mutual funds"] + }, + { + "name": "daily_prices", + "type": "simple table", + "table path": "main.sbDailyPrice", + "unique properties": [["ticker_id", "date"]], + "properties": [ + { + "name": "ticker_id", + "type": "table column", + "column name": "sbDpTickerId", + "data type": "string", + "description": "The ticker id from the ticker whose price is being record", + "sample values": ["T002", "T003", "T005", "T007", "T011"], + "synonyms": ["ticker key"] + }, + { + "name": "date", + "type": "table column", + "column name": "sbDpDate", + "data type": "datetime", + "description": "The date of the price record", + "synonyms": ["record datetime", "price update date"] + }, + { + "name": "_open", + "type": "table column", + "column name": "sbDpOpen", + "data type": "numeric", + "description": "The opening price of the ticker on the day of the record", + "sample values": [150, 3200, 2500, 280], + "synonyms": ["opening price"] + }, + { + "name": "high", + "type": "table column", + "column name": "sbDpHigh", + "data type": "numeric", + "description": "The highest price of the ticker during the day of the record", + "sample values": [152.5, 282.75, 3225, 185, 2525], + "synonyms": ["high price", "high-point", "maximum price during day"] + }, + { + "name": "low", + "type": "table column", + "column name": "sbDpLow", + "data type": "numeric", + "description": "The lowest price of the ticker during the day of the record", + "sample values": [148.75, 279.5, 3180, 178.5, 2475], + "synonyms": ["low price", "low-point", "minimum price during day"] + }, + { + "name": "close", + "type": "table column", + "column name": "sbDpClose", + "data type": "numeric", + "description": "The close price of the ticker on the day of the record", + "sample values": [151.25, 281, 3210, 184.25, 2510], + "synonyms": ["closing price"] + }, + { + "name": "volume", + "type": "table column", + "column name": "sbDpVolume", + "data type": "numeric", + "description": "The total number of shares of the ticker traded during the day", + "sample values": [75000000, 4000000, 1500000, 35000000], + "synonyms": ["daily trading volume"] + }, + { + "name": "epoch_ms", + "type": "table column", + "column name": "sbDpEpochMs", + "data type": "numeric", + "description": "The number of milliseconds since the epoch (January 1, 1970) that the record was created", + "sample values": [1680336000000, 1680336000000, 1680336000000], + "synonyms": ["created timestamp in epoch milliseconds"] + }, + { + "name": "source", + "type": "table column", + "column name": "sbDpSource", + "data type": "string", + "description": "The exchange where the price record originated from, which is either 'NASDAQ', 'NYSE' or 'Vanguard'", + "sample values": ["NYSE", "NASDAQ", "Vanguard"], + "synonyms": ["price record origin"] + } + ], + "description": "The daily updates on tickers including their price information within a single day", + "synonyms": ["historical stock prices", "daily ticker price data"] + }, + { + "name": "transactions", + "type": "simple table", + "table path": "main.sbTransaction", + "unique properties": ["transaction_id", "kpx"], + "properties": [ + { + "name": "transaction_id", + "type": "table column", + "column name": "sbTxId", + "data type": "string", + "description": "The unique identifying string for each transaction", + "sample values": ["TX035", "TX001", "TX008", "TX033"], + "synonyms": ["transaction key"] + }, + { + "name": "customer_id", + "type": "table column", + "column name": "sbTxCustId", + "data type": "string", + "description": "The id of the customer who made the transaction", + "sample values": ["C003", "C005", "C007", "C001", "C008"], + "synonyms": ["customer key"] + }, + { + "name": "ticker_id", + "type": "table column", + "column name": "sbTxTickerId", + "data type": "string", + "description": "The id of the ticker being bought/sold in the transaction", + "sample values": ["T001", "T007", "T008"], + "synonyms": ["ticker key"] + }, + { + "name": "date_time", + "type": "table column", + "column name": "sbTxDateTime", + "data type": "datetime", + "description": "The timestamp that the transaction was made at", + "synonyms": ["transaction timestamp", "buy/sell datetime"] + }, + { + "name": "transaction_type", + "type": "table column", + "column name": "sbTxType", + "data type": "string", + "description": "The type of transaction, which is either 'buy' or 'sell'", + "sample values": ["buy", "sell"], + "synonyms": ["category", "classification"] + }, + { + "name": "shares", + "type": "table column", + "column name": "sbTxShares", + "data type": "numeric", + "description": "The number of shares being bought/sold in the transaction", + "sample values": [150, 120, 110], + "synonyms": ["volume traded", "transaction size"] + }, + { + "name": "price", + "type": "table column", + "column name": "sbTxPrice", + "data type": "numeric", + "description": "The price that the ticker was bought/sold at in the transaction", + "sample values": [180, 200, 220, 150, 130], + "synonyms": ["buy price", "sell price", "cost", "per-share value"] + }, + { + "name": "amount", + "type": "table column", + "column name": "sbTxAmount", + "data type": "numeric", + "description": "The total monetary value of the transaction (cost for buy, revenue for sell), which is the price multiplied by the number of shares", + "sample values": [27000, 24000, 24200, 15000, 13000], + "synonyms": ["total cost", "total revenue", "transaction value"] + }, + { + "name": "currency", + "type": "table column", + "column name": "sbTxCcy", + "data type": "string", + "description": "The currency that the transaction was made in, which is always 'USD'", + "sample values": ["USD"] + }, + { + "name": "tax", + "type": "table column", + "column name": "sbTxTax", + "data type": "numeric", + "description": "The tax applied by the brokerage onto the transaction", + "sample values": [135, 120, 121, 75, 65], + "synonyms": ["levy", "duty", "tariff"] + }, + { + "name": "commission", + "type": "table column", + "column name": "sbTxCommission", + "data type": "numeric", + "description": "The commission taken by the brokerage for the transaction", + "sample values": [20, 15, 10], + "synonyms": ["brokerage fee", "transaction fee"] + }, + { + "name": "kpx", + "type": "table column", + "column name": "sbTxKpx", + "data type": "string", + "description": "The internal code used for the transaction, which corresponds to the transaction id", + "sample values": ["KP056", "KP053", "KP011"], + "synonyms": [] + }, + { + "name": "settlement_date_str", + "type": "table column", + "column name": "sbTxSettlementDateStr", + "data type": "string", + "description": "The date the transaction was settled, which is a string in the form YYYYMMDD HH:MM:SS, or null if not settled yet", + "sample values": ["20230401 09:30:00", "20230401 10:15:00", "20230401 11:00:00"], + "synonyms": ["settlement timestamp", "datetime of transaction finalization"] + }, + { + "name": "status", + "type": "table column", + "column name": "sbTxStatus", + "data type": "string", + "description": "The status of the transaction, which is either 'success', 'fail', or 'pending'", + "sample values": ["success", "fail", "pending"], + "synonyms": ["transaction state"] + } + ], + "description": "All transactions made through the brokerage", + "synonyms": ["exchanges", "trades", "buy/sell orders"] + } + ], + "relationships": [ + { + "name": "transactions_made", + "type": "simple join", + "parent collection": "customers", + "child collection": "transactions", + "singular": false, + "always matches": false, + "keys": { + "_id": [ + "customer_id" + ] + }, + "description": "All of the transactions made by the customer, including both buys and sells", + "synonyms": ["exchanges made", "trades made", "stock purchases and sales"] + }, + { + "name": "customer", + "type": "reverse", + "original parent": "customers", + "original property": "transactions_made", + "singular": true, + "always matches": true, + "description": "The customer who made the transaction", + "synonyms": ["user", "client", "stock buyer/seller"] + }, + { + "name": "transactions_of", + "type": "simple join", + "parent collection": "tickers", + "child collection": "transactions", + "singular": false, + "always matches": false, + "keys": { + "_id": [ + "ticker_id" + ] + }, + "description": "The transactions made of a ticker, including both buys and sells of the ticker", + "synonyms": ["purchases", "sales", "exchanges", "trades of ticker"] + }, + { + "name": "ticker", + "type": "reverse", + "original parent": "tickers", + "original property": "transactions_of", + "singular": true, + "always matches": true, + "description": "The ticker that the transaction was made of", + "synonyms": ["stock", "etf", "fund", "company"] + }, + { + "name": "ticker", + "type": "simple join", + "parent collection": "daily_prices", + "child collection": "tickers", + "singular": true, + "always matches": true, + "keys": { + "ticker_id": [ + "_id" + ] + }, + "description": "The ticker that the daily price record corresponds to", + "synonyms": ["stock", "etf", "fund", "company"] + }, + { + "name": "daily_prices", + "type": "reverse", + "original parent": "daily_prices", + "original property": "ticker", + "singular": false, + "always matches": false, + "description": "The daily price records for the ticker", + "synonyms": ["historical prices", "price updates"] + } + ], + "additional definitions": [], + "verified pydough analysis": [], + "extra semantic info": {} + }, + { + "name": "Dealership", + "version": "V2", + "collections": [ + { + "name": "cars", + "type": "simple table", + "table path": "main.cars", + "unique properties": ["_id", "vin_number"], + "properties": [ + { + "name": "_id", + "type": "table column", + "column name": "_id", + "data type": "numeric", + "description": "Unique identifier for each car", + "sample values": [1, 3, 5, 6, 18], + "synonyms": ["car key", "car id"] + }, + { + "name": "make", + "type": "table column", + "column name": "make", + "data type": "string", + "description": "The make of the car, e.g. Ford, Toyota, etc.", + "sample values": ["Toyota", "Honda", "Ford", "Tesla", "Nissan"], + "synonyms": ["car brand", "car manufacturer"] + }, + { + "name": "model", + "type": "table column", + "column name": "model", + "data type": "string", + "description": "The model name of the car, e..g Civic, Model 3, A4, RX350, Equinox, etc.", + "sample values": ["Camry", "Accord", "Mustang", "Civic", "Altima", "Tiguan"], + "synonyms": ["car model", "car type"] + }, + { + "name": "year", + "type": "table column", + "column name": "year", + "data type": "numeric", + "description": "Year the car was manufactured", + "sample values": [2021, 2022, 2023], + "synonyms": ["year of production", "manufacturing year"] + }, + { + "name": "color", + "type": "table column", + "column name": "color", + "data type": "string", + "description": "Color of the car", + "sample values": ["Silver", "platinum/grey", "blue", "Titan Silver", "black", "ebony black", "baby blue", "Black", "Jade", "Fiery red"], + "synonyms": ["paint color"] + }, + { + "name": "vin_number", + "type": "table column", + "column name": "vin_number", + "data type": "string", + "description": "Vehicle identification number, a unique code used to identify individual motor vehicles", + "sample values": ["1C4HJXDG3NW123456", "1C6SRFFT3NN123456", "3VV2B7AX1NM123456"], + "synonyms": ["VIN", "vehicle ID"] + }, + { + "name": "engine_type", + "type": "table column", + "column name": "engine_type", + "data type": "string", + "description": "Type of engine (e.g., V6, V8, Electric)", + "sample values": [ + "Boxer 4", + "Electric", + "Flat 6", + "Inline 4", + "Inline 6", + "V6", + "V8" + ], + "synonyms": ["motor", "engine model"] + }, + { + "name": "transmission", + "type": "table column", + "column name": "transmission", + "data type": "string", + "description": "Type of transmission, which is always either 'Automatic', 'Manual', or 'CVT'", + "sample values": ["Automatic", "Manual", "CVT"], + "synonyms": ["gearbox", "transmission type"] + }, + { + "name": "cost", + "type": "table column", + "column name": "cost", + "data type": "numeric", + "description": "Cost of the car", + "sample values": [38000.0, 45000.0, 22000.0, 32000.0], + "synonyms": ["value"] + }, + { + "name": "crtd_ts", + "type": "table column", + "column name": "crtd_ts", + "data type": "datetime", + "description": "Timestamp when the car was added to the system", + "synonyms": ["created timestamp", "system entry date"] + } + ], + "description": "All cars recorded in the dealership system", + "synonyms": ["vehicles", "automobiles"] + }, + { + "name": "salespeople", + "type": "simple table", + "table path": "main.salespersons", + "unique properties": ["_id", "email", "phone", ["first_name", "last_name"]], + "properties": [ + { + "name": "_id", + "type": "table column", + "column name": "_id", + "data type": "numeric", + "description": "Unique identifier for each salesperson in the dealership", + "sample values": [1, 5, 10, 13], + "synonyms": ["salesperson key", "salesperson id", "salesperson number"] + }, + { + "name": "first_name", + "type": "table column", + "column name": "first_name", + "data type": "string", + "description": "The first name of the salesperson", + "sample values": ["John", "Jane", "David", "Sarah"] + }, + { + "name": "last_name", + "type": "table column", + "column name": "last_name", + "data type": "string", + "description": "The last name of the salesperson", + "sample values": ["Doe", "Smith", "Taylor", "Thomas"], + "synonyms": ["surname"] + }, + { + "name": "email", + "type": "table column", + "column name": "email", + "data type": "string", + "description": "Work email of the salesperson", + "sample values": ["john.doe@autonation.com", "michael.johnson@autonation.com", "jennifer.davis@directauto.com"], + "synonyms": ["email address", "work email"] + }, + { + "name": "phone", + "type": "table column", + "column name": "phone", + "data type": "string", + "description": "Contact phone number of the salesperson, where tte first 3 digits are the area code. Format: (123)-456-7890", + "sample values": ["(555)-123-4567", "(444)-333-4444", "(001)-415-5678", "(555)-789-0123"], + "synonyms": ["phone number", "contact information"] + }, + { + "name": "hire_date", + "type": "table column", + "column name": "hire_date", + "data type": "datetime", + "description": "The date the salesperson was hired", + "synonyms": ["employment date", "start date"] + }, + { + "name": "termination_date", + "type": "table column", + "column name": "termination_date", + "data type": "datetime", + "description": "The date the salesperson left the dealership, if applicable (null if still working there)", + "synonyms": ["firing date", "exit date", "quitting date", "end date"] + }, + { + "name": "crtd_ts", + "type": "table column", + "column name": "crtd_ts", + "data type": "datetime", + "description": "Timestamp when the salesperson record was created", + "synonyms": ["created timestamp", "system entry date"] + } + ], + "description": "The salespeople working at the dealership, current and former", + "synonyms": ["dealership employees", "sales staff", "sales associates"] + }, + { + "name": "customers", + "type": "simple table", + "table path": "main.customers", + "unique properties": ["_id", "email", "phone", "address", ["first_name", "last_name"]], + "properties": [ + { + "name": "_id", + "type": "table column", + "column name": "_id", + "data type": "numeric", + "description": "Unique identifier for each customer", + "sample values": [1, 5, 9, 12], + "synonyms": ["customer key", "customer id"] + }, + { + "name": "first_name", + "type": "table column", + "column name": "first_name", + "data type": "string", + "description": "The first name of the customer", + "sample values": ["William", "Ava", "Sophia", "Mia"] + }, + { + "name": "last_name", + "type": "table column", + "column name": "last_name", + "data type": "string", + "description": "The last name of the customer", + "sample values": ["Davis", "Miller", "Jackson", "Lee"], + "synonyms": ["surname"] + }, + { + "name": "email", + "type": "table column", + "column name": "email", + "data type": "string", + "description": "The email address of the customer", + "sample values": ["william.davis@example.com", "charlotte.anderson@example.com", "sophia.lee@microsoft.com", "christopher.brown@ai.com", "abigail.harris@company.io"], + "synonyms": ["contact email", "email address"] + }, + { + "name": "phone", + "type": "table column", + "column name": "phone", + "data type": "string", + "description": "The phone number of the customer listed with the dealership", + "sample values": ["555-888-9999", "555-666-5555", "555-456-7890", "555-444-3333"], + "synonyms": ["contact information", "phone number"] + }, + { + "name": "address", + "type": "table column", + "column name": "address", + "data type": "string", + "description": "The home address of the customer given to the dealership", + "sample values": ["123 Main St", "456 Oak Ave", "987 Birch Dr", "951 Oak Ln"], + "synonyms": ["street address", "home address", "street name and street number"] + }, + { + "name": "city", + "type": "table column", + "column name": "city", + "data type": "string", + "description": "The city of the customer's home address", + "sample values": ["New York", "Los Angeles", "Chicago", "Dallas"] + }, + { + "name": "state", + "type": "table column", + "column name": "state", + "data type": "string", + "description": "The state of the customer's home address, by its capitalized two-letter abbreviation", + "sample values": ["NY", "CA", "TX", "IL"] + }, + { + "name": "zip_code", + "type": "table column", + "column name": "zip_code", + "data type": "string", + "description": "The zip-code of the customer's home address", + "sample values": ["10001", "60007", "95101", "94101"], + "synonyms": [] + }, + { + "name": "crtd_ts", + "type": "table column", + "column name": "crtd_ts", + "data type": "datetime", + "description": "Timestamp when the customer record was created", + "synonyms": ["created timestamp", "system entry date"] + } + ], + "description": "All the customers who are registered with the dealership", + "synonyms": ["clients", "users", "buyers", "consumers"] + }, + { + "name": "payments_made", + "type": "simple table", + "table path": "main.payments_made", + "unique properties": ["_id", "invoice_number"], + "properties": [ + { + "name": "_id", + "type": "table column", + "column name": "_id", + "data type": "numeric", + "description": "Unique identifier for each payment made", + "sample values": [1, 11, 16, 17], + "synonyms": ["payment key", "payment id"] + }, + { + "name": "vendor_name", + "type": "table column", + "column name": "vendor_name", + "data type": "string", + "description": "Name of the vendor to whom the payment was made", + "sample values": ["Chevrolet Auto", "Marketing Agency", "Car Manufacturer Inc"], + "synonyms": ["payment recipient", "vendor"] + }, + { + "name": "payment_date", + "type": "table column", + "column name": "payment_date", + "data type": "datetime", + "description": "Date that the payment was made" + }, + { + "name": "payment_amount", + "type": "table column", + "column name": "payment_amount", + "data type": "numeric", + "description": "The amount that was paid in the payment in USD", + "sample values": [150000.0, 1500.0, 22000.0], + "synonyms": ["payment sum", "dollar value"] + }, + { + "name": "payment_method", + "type": "table column", + "column name": "payment_method", + "data type": "string", + "description": "Method of payment, which is either 'bank_transfer', 'credit_card', 'check', or 'credit_card'", + "sample values": ["bank_transfer", "credit_card", "check", "credit_card"], + "synonyms": ["transfer medium", "payment type"] + }, + { + "name": "invoice_number", + "type": "table column", + "column name": "invoice_number", + "data type": "string", + "description": "Invoice number associated with the payment", + "sample values": ["INV-001", "INV-017", "INV-008"], + "synonyms": ["invoice id", "invoice key", "invoice serial"] + }, + { + "name": "invoice_date", + "type": "table column", + "column name": "invoice_date", + "data type": "datetime", + "description": "Date of the invoice", + "synonyms": ["date of payment"] + }, + { + "name": "due_date", + "type": "table column", + "column name": "due_date", + "data type": "datetime", + "description": "Due date of the invoice", + "synonyms": ["deadline", "expected payment date"] + }, + { + "name": "crtd_ts", + "type": "table column", + "column name": "crtd_ts", + "data type": "datetime", + "description": "Timestamp when the payment made record was created", + "synonyms": ["system entry date", "created timestamp"] + } + ], + "description": "Records of payments made by the dealership to vendors", + "synonyms": ["vendor payments", "vendor transactions", "vendor expenditures"] + }, + { + "name": "payments_received", + "type": "simple table", + "table path": "main.payments_received", + "unique properties": ["_id"], + "properties": [ + { + "name": "_id", + "type": "table column", + "column name": "_id", + "data type": "numeric", + "description": "Unique identifier for each payment received record", + "sample values": [1, 2, 10, 19, 23], + "synonyms": ["payment key", "payment id"] + }, + { + "name": "sale_id", + "type": "table column", + "column name": "sale_id", + "data type": "numeric", + "description": "The id of the sale that the payment is associated with", + "sample values": [1, 5, 9, 20, 21], + "synonyms": ["sale key", "sale id"] + }, + { + "name": "payment_date", + "type": "table column", + "column name": "payment_date", + "data type": "datetime", + "description": "Date when the payment was received. Can take place after the sale date, or in installments.", + "synonyms": ["transaction date", "installment date"] + }, + { + "name": "payment_amount", + "type": "table column", + "column name": "payment_amount", + "data type": "numeric", + "description": "Amount of the payment received. Can be less than the sale price if the payment is made in installments.", + "sample values": [115000.0, 5000.0, 44000.0, 26000.0], + "synonyms": ["cash received", "payment sum"] + }, + { + "name": "payment_method", + "type": "table column", + "column name": "payment_method", + "data type": "string", + "description": "Method used to make the payment, which can be 'check', 'financing', 'credit_card', 'debit_card', or 'cash'", + "sample values": ["check", "financing", "credit_card", "debit_card", "cash"], + "synonyms": ["transfer method", "payment type"] + }, + { + "name": "crtd_ts", + "type": "table column", + "column name": "crtd_ts", + "data type": "datetime", + "description": "Timestamp when the payment received record was created", + "synonyms": ["system entry date", "created timestamp"] + } + ], + "description": "Records of payments received by the dealership from customers as part of a sale", + "synonyms": ["car purchase payments", "customer payments", "customer transactions", "car purchase installments"] + }, + { + "name": "sales", + "type": "simple table", + "table path": "main.sales", + "unique properties": ["_id"], + "properties": [ + { + "name": "_id", + "type": "table column", + "column name": "_id", + "data type": "numeric", + "description": "Unique identifier for each car sale", + "sample values": [5, 11, 12, 18, 22], + "synonyms": ["sale key", "sale id"] + }, + { + "name": "car_id", + "type": "table column", + "column name": "car_id", + "data type": "numeric", + "description": "The id of the car being purchased in the sale", + "sample values": [1, 3, 5, 6, 18], + "synonyms": ["car key", "car id"] + }, + { + "name": "salesperson_id", + "type": "table column", + "column name": "salesperson_id", + "data type": "numeric", + "description": "The id of the salesperson who brokered the sale", + "sample values": [1, 5, 10, 13], + "synonyms": ["salesperson key", "salesperson id"] + }, + { + "name": "customer_id", + "type": "table column", + "column name": "customer_id", + "data type": "numeric", + "description": "The id of the customer who purchased the car in the sale", + "sample values": [1, 5, 9, 12], + "synonyms": ["customer key", "customer id"] + }, + { + "name": "sale_price", + "type": "table column", + "column name": "sale_price", + "data type": "numeric", + "description": "Price at which the car was sold", + "sample values": [43500.0, 30500.0, 38000.0, 49000.0, 26500.0], + "synonyms": ["purchase cost", "sale amount", "dollar value"] + }, + { + "name": "sale_date", + "type": "table column", + "column name": "sale_date", + "data type": "datetime", + "description": "Date when the car was sold", + "synonyms": ["purchase date", "transaction date"] + }, + { + "name": "crtd_ts", + "type": "table column", + "column name": "crtd_ts", + "data type": "datetime", + "description": "Timestamp when the sale record was created", + "synonyms": ["system entry date", "created timestamp"] + } + ], + "description": "Records of cars being purchased from the dealership from a customer via one of the salespeople", + "synonyms": ["car purchases", "sale records", "vehicle sale transactions"] + }, + { + "name": "inventory_snapshots", + "type": "simple table", + "table path": "main.inventory_snapshots", + "unique properties": ["_id", ["car_id", "snapshot_date"]], + "properties": [ + { + "name": "_id", + "type": "table column", + "column name": "_id", + "data type": "numeric", + "description": "Unique identifier for each inventory snapshot entry", + "sample values": [1, 4, 13, 19, 23], + "synonyms": ["inventory snapshot key", "snapshot id"] + }, + { + "name": "snapshot_date", + "type": "table column", + "column name": "snapshot_date", + "data type": "datetime", + "description": "Date of the inventory snapshot", + "synonyms": ["date of snapshot", "inventory date"] + }, + { + "name": "car_id", + "type": "table column", + "column name": "car_id", + "data type": "numeric", + "description": "The key of the car that the inventory snapshot is for", + "sample values": [2, 10, 1, 4, 9], + "synonyms": ["car key", "car id"] + }, + { + "name": "is_in_inventory", + "type": "table column", + "column name": "is_in_inventory", + "data type": "bool", + "description": "Whether that specific car was in the dealership's inventory at the time of the snapshot", + "synonyms": ["car in inventory", "car availability"] + }, + { + "name": "crtd_ts", + "type": "table column", + "column name": "crtd_ts", + "data type": "datetime", + "description": "Timestamp when the inventory snapshot record was created", + "synonyms": ["system entry date", "created timestamp"] + } + ], + "description": "Records of whether specific individual cars were in the dealership's inventory as of specific dates", + "synonyms": ["inventory records", "stock records", "car availability manifest"] + } + ], + "relationships": [ + { + "name": "sale_records", + "type": "simple join", + "parent collection": "cars", + "child collection": "sales", + "singular": false, + "always matches": false, + "keys": { + "_id": [ + "car_id" + ] + }, + "description": "All sales made where the car was sold", + "synonyms": ["purchases of vehicle", "car sales", "vehicle transactions"] + }, + { + "name": "car", + "type": "reverse", + "original parent": "cars", + "original property": "sale_records", + "singular": true, + "always matches": true, + "description": "The car being purchased in the sale", + "synonyms": ["vehicle", "automobile"] + }, + { + "name": "inventory_snapshots", + "type": "simple join", + "parent collection": "cars", + "child collection": "inventory_snapshots", + "singular": false, + "always matches": false, + "keys": { + "_id": [ + "car_id" + ] + }, + "description": "The inventory snapshots for the car, which show whether it was in the dealership's inventory at specific dates", + "synonyms": ["availability records", "inventory history"] + }, + { + "name": "car", + "type": "reverse", + "original parent": "cars", + "original property": "inventory_snapshots", + "singular": true, + "always matches": true, + "description": "The car that the inventory snapshot belongs to", + "synonyms": ["vehicle", "automobile"] + }, + { + "name": "sales_made", + "type": "simple join", + "parent collection": "salespeople", + "child collection": "sales", + "singular": false, + "always matches": false, + "keys": { + "_id": [ + "salesperson_id" + ] + }, + "description": "All the sales brokered by the salesperson", + "synonyms": ["sales records", "transactions made", "deals closed"] + }, + { + "name": "salesperson", + "type": "reverse", + "original parent": "salespeople", + "original property": "sales_made", + "singular": true, + "always matches": true, + "description": "The salesperson who brokered the car purchase deal", + "synonyms": ["broker", "sales associate", "sales staff", "dealer"] + }, + { + "name": "car_purchases", + "type": "simple join", + "parent collection": "customers", + "child collection": "sales", + "singular": false, + "always matches": false, + "keys": { + "_id": [ + "customer_id" + ] + }, + "description": "All car purchases made by the customer", + "synonyms": ["vehicle purchase records", "car transactions", "automobile purchases"] + }, + { + "name": "customer", + "type": "reverse", + "original parent": "customers", + "original property": "car_purchases", + "singular": true, + "always matches": true, + "description": "The customer who purchased the car", + "synonyms": ["client", "buyer", "purchaser", "consumer", "user"] + }, + { + "name": "payment", + "type": "simple join", + "parent collection": "sales", + "child collection": "payments_received", + "singular": false, + "always matches": false, + "keys": { + "_id": [ + "sale_id" + ] + }, + "description": "The payment installments made for the sale, which can be one or multiple installments for a single sale", + "synonyms": ["sale purchase installments", "payment records"] + }, + { + "name": "sale_record", + "type": "reverse", + "original parent": "sales", + "original property": "payment", + "singular": true, + "always matches": true, + "description": "The sale that the payment is a part of", + "synonyms": ["purchase record", "vehicle sale", "car purchase"] + } + ], + "functions": [ + { + "name": "ADD_MONTHS", + "type": "sql macro", + "macro text": "DATE_ADD('month', {0}, {1})", + "description": "Adds a number of months to a date value in trino", + "input signature": {"type": "fixed arguments", "value": ["datetime", "numeric"]}, + "output signature": {"type": "constant", "value": "datetime"} + } + ], + "additional definitions": [], + "verified pydough analysis": [], + "extra semantic info": {} + }, + { + "name": "DermTreatment", + "version": "V2", + "collections": [ + { + "name": "doctors", + "type": "simple table", + "table path": "main.doctors", + "unique properties": ["doc_id", "first_name", "last_name", "board_certification_number"], + "properties": [ + { + "name": "doc_id", + "type": "table column", + "column name": "doc_id", + "data type": "numeric", + "description": "Unique identifier for each doctor in the system", + "sample values": [1, 2, 4, 8, 10], + "synonyms": ["doctor key", "doctor id"] + }, + { + "name": "first_name", + "type": "table column", + "column name": "first_name", + "data type": "string", + "description": "The first name of the doctor", + "sample values": ["John", "Michael", "Daniel", "Olivia"] + }, + { + "name": "last_name", + "type": "table column", + "column name": "last_name", + "data type": "string", + "description": "The last name of the doctor", + "sample values": ["Smith", "Brown", "Wilson", "Garcia"], + "synonyms": ["surname"] + }, + { + "name": "specialty", + "type": "table column", + "column name": "specialty", + "data type": "string", + "description": "The specialty of the doctor, which can be one of the following: dermatology, immunology, general, or oncology", + "sample values": ["dermatology", "immunology", "general", "oncology"], + "synonyms": ["focus", "area", "specialization", "expertise", "type of doctor"] + }, + { + "name": "year_reg", + "type": "table column", + "column name": "year_reg", + "data type": "numeric", + "description": "Year the doctor was registered and obtained license", + "sample values": [1998, 2006, 2012, 2015, 2018], + "synonyms": ["registration year", "year received license"] + }, + { + "name": "med_school_name", + "type": "table column", + "column name": "med_school_name", + "data type": "string", + "description": "The name of the medical school where the doctor graduated", + "sample values": ["University of Pennsylvania", "Johns Hopkins University", "Columbia University"], + "synonyms": ["alma matter", "medical school", "graduated from"] + }, + { + "name": "city", + "type": "table column", + "column name": "loc_city", + "data type": "string", + "description": "The city where the doctor is located", + "sample values": ["San Francisco", "Durham", "Ann Arbour", "New York"], + "synonyms": ["city of practice", "city of work", "location city", "city of residence"] + }, + { + "name": "state", + "type": "table column", + "column name": "loc_state", + "data type": "string", + "description": "The state where the doctor is located, by its capitalized two-letter abbreviation", + "sample values": ["CA", "NC", "NY", "MI"], + "synonyms": ["state of practice", "state of work", "location state", "state of residence"] + }, + { + "name": "zip_code", + "type": "table column", + "column name": "loc_zip", + "data type": "string", + "description": "The zip code where the doctor is located", + "sample values": ["21201", "02115", "94304", "10027"], + "synonyms": ["zip code of practice", "zip code of work", "location zip code", "zip code of residence"] + }, + { + "name": "board_certification_number", + "type": "table column", + "column name": "bd_cert_num", + "data type": "string", + "description": "The board certification number of the doctor", + "sample values": ["YZA890", "ABC123", "GHI012", "JKL345"], + "synonyms": ["board certification number", "certification number"] + } + ], + "description": "All the doctors registered in the system", + "synonyms": ["practitioners", "physicians", "medical professionals"] + }, + { + "name": "patients", + "type": "simple table", + "table path": "main.patients", + "unique properties": ["patient_id", "email", "phone", "first_name"], + "properties": [ + { + "name": "patient_id", + "type": "table column", + "column name": "patient_id", + "data type": "numeric", + "description": "The unique identifier for each patient in the system", + "sample values": [1, 3, 5, 7, 9], + "synonyms": ["patient key", "patient id"] + }, + { + "name": "first_name", + "type": "table column", + "column name": "first_name", + "data type": "string", + "description": "The first name of the patient", + "sample values": ["Alice", "Bob", "Carol", "David", "Eve"] + }, + { + "name": "last_name", + "type": "table column", + "column name": "last_name", + "data type": "string", + "description": "The last name of the patient", + "sample values": ["Smith", "Richter", "Martinez", "Taylor"], + "synonyms": ["surname"] + }, + { + "name": "date_of_birth", + "type": "table column", + "column name": "date_of_birth", + "data type": "datetime", + "description": "The date the patient was born", + "synonyms": ["birthday"] + }, + { + "name": "date_of_registration", + "type": "table column", + "column name": "date_of_registration", + "data type": "datetime", + "description": "The date the patient was registered in the system", + "synonyms": ["registration date", "system entry date"] + }, + { + "name": "gender", + "type": "table column", + "column name": "gender", + "data type": "string", + "description": "The gender of the patient, which can either 'Male', 'Female', or 'Others'", + "sample values": ["Male", "Female", "Others"], + "synonyms": ["sex"] + }, + { + "name": "email", + "type": "table column", + "column name": "email", + "data type": "string", + "description": "The email of the patient used for contact information", + "sample values": ["alice@email.com", "eve@email.com", "frank@email.com", "john@qwik.com'"], + "synonyms": ["email address", "contact email"] + }, + { + "name": "phone", + "type": "table column", + "column name": "phone", + "data type": "string", + "description": "The phone number of the patient used for contact information", + "sample values": ["555-123-4567", "555-987-6543", "555-123-4567"], + "synonyms": ["contact phone", "phone number"] + }, + { + "name": "street_address", + "type": "table column", + "column name": "addr_street", + "data type": "string", + "description": "The street address of the patient's home, including street name and number", + "sample values": ["987 Birch Dr", "753 Walnut Ave", "951 Spruce Blvd"], + "synonyms": ["street name and number", "home address"] + }, + { + "name": "city", + "type": "table column", + "column name": "addr_city", + "data type": "string", + "description": "The city of the patient's address", + "sample values": ["Anytown", "Somecity", "Somewhere", "Somewhere"], + "synonyms": ["home city", "town", "city of residence"] + }, + { + "name": "state", + "type": "table column", + "column name": "addr_state", + "data type": "string", + "description": "The state of the patient's address, by its capitalized two-letter abbreviation", + "sample values": ["CA", "TX", "FL", "OH"], + "synonyms": ["home state", "state of residence"] + }, + { + "name": "zip_code", + "type": "table column", + "column name": "addr_zip", + "data type": "string", + "description": "The zip code of the patient's address", + "sample values": ["12345", "54321", "86420"], + "synonyms": ["home zip code", "postal code", "zip code of residence"] + }, + { + "name": "insurance_type", + "type": "table column", + "column name": "ins_type", + "data type": "string", + "description": "The type of insurance the patient has, which can be one of the following: private, medicare, medicaid, or uninsured", + "sample values": ["private", "medicare", "medicaid", "uninsured"], + "synonyms": [] + }, + { + "name": "insurance_policy_number", + "type": "table column", + "column name": "ins_policy_num", + "data type": "string", + "description": "The policy number of the patient's insurance (null if uninsured)", + "sample values": ["ABC123456", "XYZ789012", "JKL567890", "PQR135790"], + "synonyms": ["policy number"] + }, + { + "name": "height", + "type": "table column", + "column name": "height_cm", + "data type": "numeric", + "description": "The height of the patient in centimeters", + "sample values": [165, 180, 160, 178], + "synonyms": ["centimeter height"] + }, + { + "name": "weight", + "type": "table column", + "column name": "weight_kg", + "data type": "numeric", + "description": "The weight of the patient in kilograms", + "sample values": [60, 78, 90, 60], + "synonyms": ["kilogram weight"] + } + ], + "description": "The personal & registration information about every patient registered in the system", + "synonyms": ["patient records", "patient information", "patient data", "clients", "trial subjects", "participants"] + }, + { + "name": "drugs", + "type": "simple table", + "table path": "main.drugs", + "unique properties": ["drug_id", "national_drug_code", "drug_name"], + "properties": [ + { + "name": "drug_id", + "type": "table column", + "column name": "drug_id", + "data type": "numeric", + "description": "The unique identifier for each drug in the system", + "sample values": [1, 2, 4, 8], + "synonyms": ["drug key", "drug id"] + }, + { + "name": "drug_name", + "type": "table column", + "column name": "drug_name", + "data type": "string", + "description": "The brand name of the drug", + "sample values": ["Drugalin", "Topizol", "Biologic-X", "Topicalin"], + "synonyms": ["brand name", "pharmaceutical name", "medication name"] + }, + { + "name": "manufacturer", + "type": "table column", + "column name": "manufacturer", + "data type": "string", + "description": "The name of the company who produces the drug", + "sample values": ["Pharma Inc", "Acme Pharma", "Derma Rx", "BioMed Ltd"], + "synonyms": ["producer", "drug manufacturer", "pharmaceutical company", "drug company"] + }, + { + "name": "drug_type", + "type": "table column", + "column name": "drug_type", + "data type": "string", + "description": "The category of drug, which can be one of the following: 'biologic', 'small molecule', or 'topical'", + "sample values": ["biologic", "small molecule", "topical"], + "synonyms": ["category", "classification", "type of drug"] + }, + { + "name": "mechanism_of_activation", + "type": "table column", + "column name": "moa", + "data type": "string", + "description": "The mechanism of activation of the drug", + "sample values": ["TNF-alpha inhibitor", "PDE4 inhibitor", "IL-12/23 inhibitor"], + "synonyms": ["moa"] + }, + { + "name": "fda_approval_date", + "type": "table column", + "column name": "fda_appr_dt", + "data type": "datetime", + "description": "The date the drug was approved by the FDA (null if not approved because it is still under trial)", + "synonyms": ["approval date"] + }, + { + "name": "administration_route", + "type": "table column", + "column name": "admin_route", + "data type": "string", + "description": "The means used to administer the drug, which can be one of the following: 'oral', 'injection' or 'topical'", + "sample values": ["oral", "injection", "topical"], + "synonyms": ["method of procedure", "administration method", "delivery means"] + }, + { + "name": "recommended_dosage_amount", + "type": "table column", + "column name": "dos_amt", + "data type": "numeric", + "description": "The recommended amount of the drug to be taken per dose, where the unit is specified in the dosage_units property", + "sample values": [40, 30, 15, 5], + "synonyms": ["dose size", "quantity per dose", "amount taken per administration"] + }, + { + "name": "dosage_units", + "type": "table column", + "column name": "dos_unit", + "data type": "string", + "description": "The unit used by recommended_dosage_amount, which is either `mg` (for milligrams) or `g` (for grams)", + "sample values": ["mg", "g"], + "synonyms": ["units of measurement", "dosage unit", "dose quantity scale factor"] + }, + { + "name": "dose_frequency_hours", + "type": "table column", + "column name": "dos_freq_hrs", + "data type": "numeric", + "description": "The recommended number of hours between dosages", + "sample values": [336, 24, 12, 672], + "synonyms": ["gap between doses", "administration hour intervals", "time between doses"] + }, + { + "name": "national_drug_code", + "type": "table column", + "column name": "ndc", + "data type": "string", + "description": "The national drug code of the drug, which is a unique identifier assigned to each drug by the FDA", + "sample values": ["12345-678-90", "13579-246-80", "95146-753-19"], + "synonyms": [] + } + ], + "description": "The drugs listed in the system, which are used for treatment of patients", + "synonyms": ["medications", "pharmaceuticals"] + }, + { + "name": "diagnoses", + "type": "simple table", + "table path": "main.diagnoses", + "unique properties": ["_id", "code", "name", "description"], + "properties": [ + { + "name": "_id", + "type": "table column", + "column name": "diag_id", + "data type": "numeric", + "description": "The unique identifier for each diagnosis in the system", + "sample values": [1, 3, 5, 7, 9], + "synonyms": ["diagnosis key", "diagnosis id"] + }, + { + "name": "code", + "type": "table column", + "column name": "diag_code", + "data type": "string", + "description": "The code of the diagnosis, which is a unique identifier assigned to each diagnosis by the ICD-10 system", + "sample values": ["L40.0", "L40.59", "F40.8", "L40.50"], + "synonyms": ["diagnosis code", "ICD-10 code"] + }, + { + "name": "name", + "type": "table column", + "column name": "diag_name", + "data type": "string", + "description": "Name of the diagnosis, in medical terms", + "sample values": ["Psoriasis vulgaris", "Generalized pustular psoriasis", "Psoriasis, unspecified", "Other psoriatic arthropathy", "Psoriatic arthritis mutilans"], + "synonyms": ["title"] + }, + { + "name": "description", + "type": "table column", + "column name": "diag_desc", + "data type": "string", + "description": "An elaboration of the diagnosis, in more layman's terms and/or with some symptoms included", + "sample values": ["Plaque psoriasis, the most common form", "Psoriasis not further specified", "Includes flexural, erythrodermic, and other rare types", "Small, teardrop-shaped lesions", "Widespread pustules on top of red skin"], + "synonyms": ["details", "symptoms", "explanation"] + } + ], + "description": "Records of each diagnosis in the system, which are identified when treating patients", + "synonyms": ["medical conditions", "diagnostic codes"] + }, + { + "name": "treatments", + "type": "simple table", + "table path": "main.treatments", + "unique properties": ["treatment_id"], + "properties": [ + { + "name": "treatment_id", + "type": "table column", + "column name": "treatment_id", + "data type": "numeric", + "description": "Unique identifier for each treatment record in the system", + "sample values": [1, 4, 7, 16, 26], + "synonyms": ["treatment id", "treatment key"] + }, + { + "name": "patient_id", + "type": "table column", + "column name": "patient_id", + "data type": "numeric", + "description": "The id of the patient who is receiving the treatment", + "sample values": [1, 3, 5, 7, 9], + "synonyms": ["patient key", "patient id"] + }, + { + "name": "doc_id", + "type": "table column", + "column name": "doc_id", + "data type": "numeric", + "description": "The id of the doctor who is administering the treatment", + "sample values": [1, 2, 4, 8], + "synonyms": ["doctor key", "doctor id"] + }, + { + "name": "drug_id", + "type": "table column", + "column name": "drug_id", + "data type": "numeric", + "description": "The id of the drug being used in the treatment", + "sample values": [1, 2, 3, 5], + "synonyms": ["drug key", "drug id"] + }, + { + "name": "diagnosis_id", + "type": "table column", + "column name": "diag_id", + "data type": "numeric", + "description": "The id of the diagnosis the patient is being treated for", + "sample values": [2, 4, 5, 6, 8], + "synonyms": ["diagnosis key", "diagnosis id"] + }, + { + "name": "start_date", + "type": "table column", + "column name": "start_dt", + "data type": "datetime", + "description": "The date the treatment began", + "synonyms": ["treatment start date", "treatment initiation date"] + }, + { + "name": "end_date", + "type": "table column", + "column name": "end_dt", + "data type": "datetime", + "description": "The date the treatment ended (null if ongoing)", + "synonyms": ["treatment end date", "treatment completion date"] + }, + { + "name": "is_placebo", + "type": "table column", + "column name": "is_placebo", + "data type": "bool", + "description": "True if the treatment is a placebo, false otherwise", + "synonyms": ["placebo treatment", "dummy treatment"] + }, + { + "name": "total_drug_amount", + "type": "table column", + "column name": "tot_drug_amt", + "data type": "numeric", + "description": "The total quantity of the drug used in the treatment, where the unit is specified in the drug_unit property", + "sample values": [240, 180, 720, 360], + "synonyms": ["amount of drug used", "total drug quantity", "total dosage"] + }, + { + "name": "drug_unit", + "type": "table column", + "column name": "drug_unit", + "data type": "string", + "description": "The unit used by total_drug_amount, which is either `mg` (for milligrams) or `g` (for grams)", + "sample values": ["mg", "g"], + "synonyms": ["dosage unit", "unit of measurement", "drug quantity scale factor"] + } + ], + "description": "The records of each treatment administered to patients in the system, which can be either a drug or a placebo", + "synonyms": ["treatment records", "medical treatments", "therapies"] + }, + { + "name": "outcomes", + "type": "simple table", + "table path": "main.outcomes", + "unique properties": ["outcome_id", ["treatment_id", "assessment_date"]], + "properties": [ + { + "name": "outcome_id", + "type": "table column", + "column name": "outcome_id", + "data type": "numeric", + "description": "The unique identifier for each treatment outcome in the system", + "sample values": [1, 5, 16, 20, 21], + "synonyms": ["outcome key", "outcome id"] + }, + { + "name": "treatment_id", + "type": "table column", + "column name": "treatment_id", + "data type": "numeric", + "description": "The id for the treatment that the outcome is related to", + "sample values": [3, 5, 9, 25, 26], + "synonyms": ["treatment key", "treatment id"] + }, + { + "name": "assessment_date", + "type": "table column", + "column name": "assess_dt", + "data type": "datetime", + "description": "The date when the outcome assessment was made", + "synonyms": ["date of assessment", "evaluation date"] + }, + { + "name": "day7_lesion_count", + "type": "table column", + "column name": "day7_lesion_cnt", + "data type": "numeric", + "description": "The number of lesions counted on the patient's skin at day 7 after treatment started", + "sample values": [20, 25, 18, 30, 22], + "synonyms": ["lesions observed on day 7"] + }, + { + "name": "day30_lesion_count", + "type": "table column", + "column name": "day30_lesion_cnt", + "data type": "numeric", + "description": "The number of lesions counted on the patient's skin at day 30 after treatment started (null if not enough time has passed)", + "sample values": [15, 18, 12, 25, 16], + "synonyms": ["lesions observed on day 30"] + }, + { + "name": "day100_lesion_count", + "type": "table column", + "column name": "day100_lesion_cnt", + "data type": "numeric", + "description": "The number of lesions counted on the patient's skin at day 100 after treatment started (null if not enough time has passed)", + "sample values": [5, 8, 3, 12, 6], + "synonyms": ["lesions observed on day 100"] + }, + { + "name": "day7_pasi_score", + "type": "table column", + "column name": "day7_pasi_score", + "data type": "numeric", + "description": "The PASI score calculated at day 7 after treatment started, (Psoriasis Area and Severity Index) which is a measure of the severity of psoriasis from 0 to 72", + "sample values": [12.5, 15.0, 10.8, 18.2, 13.1], + "synonyms": ["PASI on day 7"] + }, + { + "name": "day30_pasi_score", + "type": "table column", + "column name": "day30_pasi_score", + "data type": "numeric", + "description": "The PASI score calculated at day 30 after treatment started, (Psoriasis Area and Severity Index) which is a measure of the severity of psoriasis from 0 to 72 (null if not enough time has passed)", + "sample values": [8.2, 10.1, 6.4, 18.2, 13.1], + "synonyms": ["PASI on day 30"] + }, + { + "name": "day100_pasi_score", + "type": "table column", + "column name": "day100_pasi_score", + "data type": "numeric", + "description": "The PASI score calculated at day 100 after treatment started, (Psoriasis Area and Severity Index) which is a measure of the severity of psoriasis from 0 to 72 (null if not enough time has passed)", + "sample values": [2.1, 3.5, 1.2, 5.8, 2.6], + "synonyms": ["PASI on day 100"] + }, + { + "name": "day7_tewl", + "type": "table column", + "column name": "day7_tewl", + "data type": "numeric", + "description": "TEWL score (Transepidermal Water Loss) calculated at day 7 after treatment started, which is a measure of skin hydration", + "sample values": [18.2, 20.1, 16.5, 22.4], + "synonyms": ["TEWL on day 7"] + }, + { + "name": "day30_tewl", + "type": "table column", + "column name": "day30_tewl", + "data type": "numeric", + "description": "TEWL score (Transepidermal Water Loss) calculated at day 30 after treatment started, which is a measure of skin hydration, in n g/m^2/h (null if not enough time has passed)", + "sample values": [15.6, 17.2, 14.0, 19.1], + "synonyms": ["TEWL on day 30"] + }, + { + "name": "day100_tewl", + "type": "table column", + "column name": "day100_tewl", + "data type": "numeric", + "description": "TEWL score (Transepidermal Water Loss) calculated at day 100 after treatment started, which is a measure of skin hydration, in n g/m^2/h (null if not enough time has passed)", + "sample values": [12.1, 13.5, 10.8, 15.2], + "synonyms": ["TEWL on day 100"] + }, + { + "name": "day7_itch_vas", + "type": "table column", + "column name": "day7_itch_vas", + "data type": "numeric", + "description": "The itch visual analog scale (VAS) score at day 7 after treatment started, which is a measure of itch severity from 0 to 100", + "sample values": [60, 70, 55, 80, 65], + "synonyms": ["VAS on day 7"] + }, + { + "name": "day30_itch_vas", + "type": "table column", + "column name": "day30_itch_vas", + "data type": "numeric", + "description": "The itch visual analog scale (VAS) score at day 30 after treatment started, which is a measure of itch severity from 0 to 100 (null if not enough time has passed)", + "sample values": [40, 50, 35, 60, 45], + "synonyms": ["VAS on day 30"] + }, + { + "name": "day100_itch_vas", + "type": "table column", + "column name": "day100_itch_vas", + "data type": "numeric", + "description": "The itch visual analog scale (VAS) score at day 100 after treatment started, which is a measure of itch severity from 0 to 100 (null if not enough time has passed)", + "sample values": [20, 30, 15, 40, 25], + "synonyms": ["VAS on day 100"] + }, + { + "name": "day7_hfg", + "type": "table column", + "column name": "day7_hfg", + "data type": "numeric", + "description": "Hair growth factor (HGF) score at day 7 after treatment started, on a scale from 0 to 5", + "sample values": [1.5, 1.0, 2.0, 0.5, 1.2], + "synonyms": ["HFG on day 7"] + }, + { + "name": "day30_hfg", + "type": "table column", + "column name": "day30_hfg", + "data type": "numeric", + "description": "Hair growth factor (HGF) score at day 30 after treatment started, on a scale from 0 to 5 (null if not enough time has passed)", + "sample values": [2.5, 2.0, 3.0, 1.5, 2.2], + "synonyms": ["HFG on day 30"] + }, + { + "name": "day100_hfg", + "type": "table column", + "column name": "day100_hfg", + "data type": "numeric", + "description": "Hair growth factor (HGF) score at day 100 after treatment started, on a scale from 0 to 5 (null if not enough time has passed)", + "sample values": [4.0, 3.5, 4.5, 3.0, 3.8], + "synonyms": ["HFG on day 100"] + } + ], + "description": "The recorded outcomes of treatments, which can include the same treatment at different points in time if not enough time has elapsed to record the complete outcome for a treatment", + "synonyms": ["results", "treatment evaluations", "assessments"] + }, + { + "name": "concomitant_meds", + "type": "simple table", + "table path": "main.concomitant_meds", + "unique properties": ["_id", ["treatment_id", "medicine_name"]], + "properties": [ + { + "name": "_id", + "type": "table column", + "column name": "_id", + "data type": "numeric", + "description": "The unique identifier for each concomitant medication record in the system", + "sample values": [1, 5, 12, 14, 15], + "synonyms": ["concomitant drug key", "concomitant drug id"] + }, + { + "name": "treatment_id", + "type": "table column", + "column name": "treatment_id", + "data type": "numeric", + "description": "The id of the treatment that the concomitant medication record is related to", + "sample values": [1, 4, 8, 11, 13, 14], + "synonyms": [] + }, + { + "name": "medicine_name", + "type": "table column", + "column name": "med_name", + "data type": "string", + "description": "The name of the medicine being taken by the patient as a concomitant drug", + "sample values": ["Acetaminophen", "Ibuprofen", "Loratadine", "Multivitamin", "Calcium supplement", "Vitamin D", "Hydrocortisone cream"], + "synonyms": ["concomitant drug name"] + }, + { + "name": "start_date", + "type": "table column", + "column name": "start_dt", + "data type": "string", + "description": "Date the concomitant drug treatment was started", + "synonyms": ["concomitant drug start date", "first date"] + }, + { + "name": "end_date", + "type": "table column", + "column name": "end_dt", + "data type": "string", + "description": "Date the concomitant drug treatment finished (null if ongoing)", + "synonyms": ["last date", "concomitant drug end date"] + }, + { + "name": "dose_amount", + "type": "table column", + "column name": "dose_amt", + "data type": "numeric", + "description": "The amount of the medicine taken per dose, where the unit is specified in the dose_unit property", + "sample values": [500, 200, 10, 1, 50], + "synonyms": ["amount of dose", "quantity per dose", "dose size"] + }, + { + "name": "dose_unit", + "type": "table column", + "column name": "dose_unit", + "data type": "string", + "description": "The unit used by dose_amount", + "sample values": ["mg", "g", "tablet", "IU"], + "synonyms": [] + }, + { + "name": "dose_frequency", + "type": "table column", + "column name": "freq_hrs", + "data type": "numeric", + "description": "The frequency of the dose in hours (null if one-time use)", + "sample values": [6, 24, 8, 12], + "synonyms": ["gap between doses", "administration hour intervals", "time between doses"] + } + ], + "description": "Records of medications that are taken by patients in addition to the treatment they are receiving during their treatment", + "synonyms": ["additional drugs", "ancillary medications", "concomitant drugs", "accompanying medications", "concurrent drugs"] + }, + { + "name": "adverse_events", + "type": "simple table", + "table path": "main.adverse_events", + "unique properties": ["_id", "treatment_id"], + "properties": [ + { + "name": "_id", + "type": "table column", + "column name": "_id", + "data type": "numeric", + "description": "The unique identifier for each adverse event record in the system", + "sample values": [1, 3, 5, 7, 8], + "synonyms": ["adverse event key", "adverse event id"] + }, + { + "name": "treatment_id", + "type": "table column", + "column name": "treatment_id", + "data type": "numeric", + "description": "The id for the treatment that the adverse event occurred during", + "sample values": [2, 5, 9, 11, 14], + "synonyms": ["treatment key", "treatment id"] + }, + { + "name": "reported_date", + "type": "table column", + "column name": "reported_dt", + "data type": "datetime", + "description": "The date the adverse event was reported", + "synonyms": ["incident date", "date of reaction"] + }, + { + "name": "description", + "type": "table column", + "column name": "description", + "data type": "string", + "description": "Descriptor of the adverse event, which can be a single word or a phrase", + "sample values": ["Mild injection site reaction", "Headache, nausea", "Severe allergic reaction, hospitalization required", "Diarrhea"], + "synonyms": [] + } + ], + "description": "All records of adverse events that have been reported in the system related to a treatment", + "synonyms": ["incidents during treatment", "adverse reactions", "side effects"] + } + ], + "relationships": [ + { + "name": "doctor", + "type": "simple join", + "parent collection": "treatments", + "child collection": "doctors", + "singular": true, + "always matches": true, + "keys": { + "doc_id": [ + "doc_id" + ] + }, + "description": "The doctor who administered the treatment", + "synonyms": ["medical professional", "treatment administrator", "prescribing doctor"] + }, + { + "name": "prescribed_treatments", + "type": "reverse", + "original parent": "treatments", + "original property": "doctor", + "singular": false, + "always matches": false, + "description": "The treatments prescribed/administered by the doctor", + "synonyms": ["regimens", "treatment plans", "medical prescriptions"] + }, + { + "name": "patient", + "type": "simple join", + "parent collection": "treatments", + "child collection": "patients", + "singular": true, + "always matches": true, + "keys": { + "patient_id": [ + "patient_id" + ] + }, + "description": "The patient who the treatment was administered to", + "synonyms": ["trial subject", "participant", "client", "drug recipient"] + }, + { + "name": "treatments_received", + "type": "reverse", + "original parent": "treatments", + "original property": "patient", + "singular": false, + "always matches": false, + "description": "The drug treatment regimens the patient has been a part of", + "synonyms": ["Drug regimens", "medical treatments", "therapies", "treatment history"] + }, + { + "name": "drug", + "type": "simple join", + "parent collection": "treatments", + "child collection": "drugs", + "singular": true, + "always matches": true, + "keys": { + "drug_id": [ + "drug_id" + ] + }, + "description": "The drug being used as part of the treatment", + "synonyms": ["medication", "pharmaceutical", "drug product", "therapy"] + }, + { + "name": "treatments_used_in", + "type": "reverse", + "original parent": "treatments", + "original property": "drug", + "singular": false, + "always matches": false, + "description": "The treatments the drug was used in", + "synonyms": ["therapies", "uses", "regimens", "prescriptions"] + }, + { + "name": "diagnosis", + "type": "simple join", + "parent collection": "treatments", + "child collection": "diagnoses", + "singular": true, + "always matches": true, + "keys": { + "diagnosis_id": [ + "_id" + ] + }, + "description": "The diagnosis associated with the treatment which the drug is treating", + "synonyms": ["medical condition", "associated condition", "cause of treatment"] + }, + { + "name": "treatments_for", + "type": "reverse", + "original parent": "treatments", + "original property": "diagnosis", + "singular": false, + "always matches": false, + "description": "The drug treatment regimens targeted at a patient with this diagnosis", + "synonyms": [] + }, + { + "name": "outcome_records", + "type": "simple join", + "parent collection": "treatments", + "child collection": "outcomes", + "singular": false, + "always matches": false, + "keys": { + "treatment_id": [ + "treatment_id" + ] + }, + "description": "The reported outcomes of the treatment, which can include the same treatment at different points in time if not enough time has elapsed to record the complete outcome for a treatment", + "synonyms": ["quality reports", "evaluations", "assessments"] + }, + { + "name": "treatment", + "type": "reverse", + "original parent": "treatments", + "original property": "outcome_records", + "singular": true, + "always matches": true, + "description": "The treatment that the outcome record is reporting on", + "synonyms": ["regimen", "prescription", "medical trial"] + }, + { + "name": "concomitant_meds", + "type": "simple join", + "parent collection": "treatments", + "child collection": "concomitant_meds", + "singular": false, + "always matches": false, + "keys": { + "treatment_id": [ + "treatment_id" + ] + }, + "description": "The ancillary medications that were taken by a patient in addition to the treatment they are receiving during their treatment", + "synonyms": ["additional drugs", "ancillary medications", "concomitant drugs", "accompanying medications", "concurrent drugs"] + }, + { + "name": "treatment", + "type": "reverse", + "original parent": "treatments", + "original property": "concomitant_meds", + "singular": true, + "always matches": true, + "description": "The treatment that the administration of the concomitant medication was related to", + "synonyms": ["medical trial", "regimen", "main prescription"] + }, + { + "name": "adverse_events", + "type": "simple join", + "parent collection": "treatments", + "child collection": "adverse_events", + "singular": false, + "always matches": false, + "keys": { + "treatment_id": [ + "treatment_id" + ] + }, + "description": "Any incident events recorded in association with the treatment", + "synonyms": ["side effects", "medical incidents"] + }, + { + "name": "treatment", + "type": "reverse", + "original parent": "treatments", + "original property": "adverse_events", + "singular": true, + "always matches": true, + "description": "The treatment that the adverse event occurred during", + "synonyms": ["regimen", "medical trial", "prescription"] + } + ], + "additional definitions": [], + "verified pydough analysis": [], + "extra semantic info": {} + }, + { + "name": "Ewallet", + "version": "V2", + "collections": [ + { + "name": "users", + "type": "simple table", + "table path": "main.users", + "unique properties": ["uid", "username"], + "properties": [ + { + "name": "uid", + "type": "table column", + "column name": "uid", + "data type": "numeric", + "description": "The unique identifier for each user in the system", + "sample values": [1, 3, 5, 10, 11], + "synonyms": ["user id", "user key"] + }, + { + "name": "username", + "type": "table column", + "column name": "username", + "data type": "string", + "description": "The username for the user", + "sample values": ["john_doe", "michael_brown", "bizuser", "huang2143"], + "synonyms": ["name", "account profile"] + }, + { + "name": "email", + "type": "table column", + "column name": "email", + "data type": "string", + "description": "The email for the user", + "sample values": ["john.doe@email.com", "lisa.jones@email.com", "customerserv@shopsmart.biz", "huang2143@example.com", "contact@business.co"], + "synonyms": ["email address", "contact email"] + }, + { + "name": "phone_number", + "type": "table column", + "column name": "phone_number", + "data type": "string", + "description": "The phone number for the user (null if not provided), which always starts with a `+` and the country code and does not have dashes", + "sample values": ["+1234567890", "+9876543210", "+8091017161", "+8612345678901", "+6123456789"], + "synonyms": ["phone", "mobile number", "contact number"] + }, + { + "name": "created_at", + "type": "table column", + "column name": "created_at", + "data type": "datetime", + "description": "The timestamp when the user account was created", + "synonyms": ["account creation date", "registration datetime"] + }, + { + "name": "last_login_at", + "type": "table column", + "column name": "last_login_at", + "data type": "datetime", + "description": "The last time the user logged into their account (null if never logged in)", + "synonyms": ["most recent login datetime", "latest access timestmap"] + }, + { + "name": "user_type", + "type": "table column", + "column name": "user_type", + "data type": "string", + "description": "The type of user, which can be either `individual` or `business`", + "sample values": ["individual", "business"], + "synonyms": ["category", "classification"] + }, + { + "name": "status", + "type": "table column", + "column name": "status", + "data type": "string", + "description": "The status of the user account, which can be either `active`, `inactive`, `deleted` or `suspended`", + "sample values": ["active", "inactive", "suspended", "deleted"], + "synonyms": ["account state", "activity", "user status"] + }, + { + "name": "country", + "type": "table column", + "column name": "country", + "data type": "string", + "description": "The two-letter country code for the country of the user", + "sample values": ["US", "CA", "FR", "GB", "AU"], + "synonyms": ["nation"] + }, + { + "name": "address_billing", + "type": "table column", + "column name": "address_billing", + "data type": "string", + "description": "The billing address for the user, including street address, city, sometimes province/country, and postal code", + "sample values": ["123 Main St, Anytown US 12345", "456 Oak Rd, Toronto ON M1M2M2", "12 Rue Baptiste, Paris 75001", "12 Mardon Rd, Wellington 6012"], + "synonyms": ["billing location", "billing street address"] + }, + { + "name": "address_delivery", + "type": "table column", + "column name": "address_delivery", + "data type": "string", + "description": "The delivery address for the user, including street address, city, sometimes province/country, and postal code, or null if not provided", + "sample values": ["123 Main St, Anytown US 12345", "25 London Road, Manchester M12 4XY", "19 Smith St, Brunswick VIC 3056"], + "synonyms": [] + }, + { + "name": "kyc_status", + "type": "table column", + "column name": "kyc_status", + "data type": "string", + "description": "The KYC status, which can be either `approved`, `pending`, or `rejected`", + "sample values": ["approved", "pending", "rejected"], + "synonyms": ["approval status", "verification status"] + }, + { + "name": "kyc_verified_at", + "type": "table column", + "column name": "kyc_verified_at", + "data type": "datetime", + "description": "The date when the kyc status became verified (null if not provided)", + "synonyms": ["date of approval", "date of verification"] + } + ], + "description": "Every user in the system, which can be either an individual or a business", + "synonyms": ["accounts", "user profiles", "user accounts", "clients", "consumers"] + }, + { + "name": "merchants", + "type": "simple table", + "table path": "main.merchants", + "unique properties": ["mid", "name"], + "properties": [ + { + "name": "mid", + "type": "table column", + "column name": "mid", + "data type": "numeric", + "description": "Unique identifier for each merchant in the system", + "sample values": [1, 5, 10, 12, 15], + "synonyms": ["merchant key", "merchant id"] + }, + { + "name": "name", + "type": "table column", + "column name": "name", + "data type": "string", + "description": "The name of the merchant", + "sample values": ["TechMart", "FitLifeGear", "UrbanDining", "KidzPlayhouse", "GameRush"], + "synonyms": ["merchant name", "business name", "company name"] + }, + { + "name": "description", + "type": "table column", + "column name": "description", + "data type": "string", + "description": "The detailed description of what the merchant does or sells", + "sample values": ["Leading electronics retailer", "SaaS productivity tools for developers", "Cosmetics and beauty supplies", "Books and reading accessories"], + "synonyms": ["business industry description", "merchant product types", "market segment description", "company description"] + }, + { + "name": "website_url", + "type": "table column", + "column name": "website_url", + "data type": "string", + "description": "The URL to the merchant's website", + "sample values": ["https://fitlifegear.com", "https://hometechsolutions.net", "https://bookworms.co.uk", "https://www.zenhomegoods.com'"], + "synonyms": ["website link", "web address"] + }, + { + "name": "logo_url", + "type": "table column", + "column name": "logo_url", + "data type": "string", + "description": "The URL to the merchant's logo image", + "sample values": ["https://www.techmart.com/logo.png", "https://www.fashiontrend.com/logo.png", "https://gamerush.co/gr-logo.png", "https://kidzplayhouse.com/logo.png"], + "synonyms": ["image link", "logo image address"] + }, + { + "name": "created_at", + "type": "table column", + "column name": "created_at", + "data type": "datetime", + "description": "The timestamp when the merchant was registered in the system", + "synonyms": ["merchant registration date", "merchant creation date"] + }, + { + "name": "country", + "type": "table column", + "column name": "country", + "data type": "string", + "description": "The two-letter capitalized country code for the country of the merchant", + "sample values": ["US", "CA", "IT", "FR", "UK"], + "synonyms": ["nation"] + }, + { + "name": "state", + "type": "table column", + "column name": "state", + "data type": "string", + "description": "The state for the merchant within the country, or null if not applicable", + "sample values": ["California", "Ontario", "Catalonia", "New South Wales", "British Columbia"], + "synonyms": ["province"] + }, + { + "name": "city", + "type": "table column", + "column name": "city", + "data type": "string", + "description": "The city for the merchant's location within the country", + "sample values": ["Los Angeles", "Berlin", "Rome", "Vancouver", "Sydney"] + }, + { + "name": "postal_code", + "type": "table column", + "column name": "postal_code", + "data type": "string", + "description": "The postal/zip code of the merchant's location within the city", + "sample values": ["90011", "M5V2J2", "1010", "94105"], + "synonyms": ["zip code"] + }, + { + "name": "address", + "type": "table column", + "column name": "address", + "data type": "string", + "description": "The full address of the merchant, including street address, city, sometimes province, and postal code", + "sample values": ["645 Wilshire Blvd, Los Angeles CA 90011", "159 Franklin St, Melbourne VIC 3004", "350 Mission St, San Francisco CA 94105", "35 Rue du Faubourg Saint-Antoine, 75011 Paris", "Passeig de Gracia 35, Barcelona 08003"], + "synonyms": ["street address", "full location"] + }, + { + "name": "status", + "type": "table column", + "column name": "status", + "data type": "string", + "description": "The status of the merchant account, which can be either `active`, `inactive` or `suspended`", + "sample values": ["active", "inactive", "suspended"], + "synonyms": ["account state", "activity", "merchant status"] + }, + { + "name": "category", + "type": "table column", + "column name": "category", + "data type": "string", + "description": "The blanket category/industry/market segment that the merchant is in", + "sample values": ["retail (hardware)", "Food & Dining", "Travel & Hospitality", "Retail", "Business Services"], + "synonyms": ["industry", "high-level market segment"] + }, + { + "name": "sub_category", + "type": "table column", + "column name": "sub_category", + "data type": "string", + "description": "The more specific category/industry/market segment that the merchant is in within the category", + "sample values": ["Electronics", "Sporting GOods", "Restaurants", "Accommodation", "Toys & Games", "Books", "Pets", "Groceries"], + "synonyms": ["sub-industry", "specific market segment"] + }, + { + "name": "merchant_category_code", + "type": "table column", + "column name": "mcc", + "data type": "numeric", + "description": "The category code for the merchant, which is a 4-digit number that identifies the type of business", + "sample values": [5734, 5977, 7011, 5719, 5732], + "synonyms": [] + }, + { + "name": "contact_name", + "type": "table column", + "column name": "contact_name", + "data type": "string", + "description": "The full name of the individual who is the primary contact for the merchant", + "sample values": ["John Jacobs", "Daniel Lee", "Marco Rossi", "Sophia Turner"], + "synonyms": ["contact person", "contact individual"] + }, + { + "name": "contact_email", + "type": "table column", + "column name": "contact_email", + "data type": "string", + "description": "The email of the contact person for the merchant", + "sample values": ["jjacobs@techmart.com", "dlee@greengourmet.com", "ebrown@hometechsolutions.net", "sturner@bookworms.co.uk", "mschmidt@codesuite.io", "agarcia@handycraft.store"], + "synonyms": ["contact email", "contact person email"] + }, + { + "name": "contact_phone", + "type": "table column", + "column name": "contact_phone", + "data type": "string", + "description": "The phone number of the contact person, which always starts with a `+` and the country code and does not have dashes", + "sample values": ["+15551234567", "+49301234567", "+441612345678", "+61298765432", "+442078912345"], + "synonyms": ["contact phone number", "contact person phone"] + } + ], + "description": "The merchant accounts registered in the system", + "synonyms": ["companies", "businesses", "vendors", "retailers", "stores"] + }, + { + "name": "coupons", + "type": "simple table", + "table path": "main.coupons", + "unique properties": ["cid"], + "properties": [ + { + "name": "cid", + "type": "table column", + "column name": "cid", + "data type": "numeric", + "description": "The unique identifier for each coupon in the system", + "sample values": [1, 4, 5, 7, 10], + "synonyms": ["coupon key", "coupon id"] + }, + { + "name": "merchant_id", + "type": "table column", + "column name": "merchant_id", + "data type": "numeric", + "description": "The id of the merchant who issues the coupon", + "sample values": [2, 5, 7, 8, 9], + "synonyms": ["merchant key", "merchant id", "issuing business id"] + }, + { + "name": "code", + "type": "table column", + "column name": "code", + "data type": "string", + "description": "The code used to enter the coupon", + "sample values": ["TECH20", "DINEDISCOUNT", "HOME15", "GLOWUP", "GAMERALERT"], + "synonyms": ["coupon code", "discount code", "promo code"] + }, + { + "name": "description", + "type": "table column", + "column name": "description", + "data type": "string", + "description": "A verbal description of what hte coupon is for", + "sample values": ["20% off tech and electronics", "Buy 2 get 1 free on cosmetics'", "Get 25% off accessories", "$10 off $75+ purchase"], + "synonyms": [] + }, + { + "name": "start_date", + "type": "table column", + "column name": "start_date", + "data type": "datetime", + "description": "The date when the coupon first became valid", + "synonyms": ["available since", "valid from"] + }, + { + "name": "end_date", + "type": "table column", + "column name": "end_date", + "data type": "datetime", + "description": "The date when the coupon is no longer valid", + "synonyms": ["available until", "valid until", "expiration date"] + }, + { + "name": "discount_type", + "type": "table column", + "column name": "discount_type", + "data type": "string", + "description": "The category of the coupon, which can be either `percentage` or `fixed_amount`", + "sample values": ["percentage", "fixed_amount"], + "synonyms": ["classification", "category"] + }, + { + "name": "discount_value", + "type": "table column", + "column name": "discount_value", + "data type": "numeric", + "description": "The value of the discount, which is the percentage off if the discount type is `percentage`, or the number of dollars off if `fixed_amount`", + "sample values": [20.0, 30.0, 10.0, 15.0, 25.0], + "synonyms": ["dollars off", "percentage off", "amount", "size"] + }, + { + "name": "min_purchase_amount", + "type": "table column", + "column name": "min_purchase_amount", + "data type": "numeric", + "description": "The minimum amount of money that must be spent on a purchase in USD to be eligible for the coupon (null if not applicable)", + "sample values": [100.0, 50.0, 1000.0, 150.0], + "synonyms": ["minimum spend", "minimum purchase", "minimum order price"] + }, + { + "name": "max_discount_amount", + "type": "table column", + "column name": "max_discount_amount", + "data type": "numeric", + "description": "The maximum amount of money that can be saved using the coupon in USD (null if not applicable)", + "sample values": [50.0, 300.0, 10.0, 15.0], + "synonyms": ["promotion limit", "discount cap", "maximum savings"] + }, + { + "name": "redemption_limit", + "type": "table column", + "column name": "redemption_limit", + "data type": "numeric", + "description": "The maximum number of times the coupon can be redeemed before expiring/becoming invalid (null if not applicable)", + "sample values": [500, 1000, 200, 300, 750], + "synonyms": ["maximum number of uses", "redemption cap", "usage limit"] + }, + { + "name": "status", + "type": "table column", + "column name": "status", + "data type": "string", + "description": "The status of the coupon, which can be either `active`, `inactive`, or `expired`", + "sample values": ["active", "inactive", "expired"], + "synonyms": ["availability", "usability", "activity state"] + }, + { + "name": "created_at", + "type": "table column", + "column name": "created_at", + "data type": "datetime", + "description": "Timestamp when the coupon record was created in the system", + "synonyms": ["creation datetime"] + }, + { + "name": "updated_at", + "type": "table column", + "column name": "updated_at", + "data type": "datetime", + "description": "Timestamp when the coupon record was last updated in the system (null if never updated after creation)", + "synonyms": ["update datetime"] + } + ], + "description": "The records of coupons that have been issued by merchants in the system", + "synonyms": ["discounts", "promotions", "special offers"] + }, + { + "name": "transactions", + "type": "simple table", + "table path": "main.wallet_transactions_daily", + "unique properties": ["txid", "transaction_ref"], + "properties": [ + { + "name": "txid", + "type": "table column", + "column name": "txid", + "data type": "numeric", + "description": "The unique identifier for each transaction in the system (the original transaction & coupon applied are different rows with different values of this key)", + "sample values": [1, 3, 17, 21, 26], + "synonyms": ["transaction key", "transaction id"] + }, + { + "name": "sender_id", + "type": "table column", + "column name": "sender_id", + "data type": "numeric", + "description": "The id of the key of the individual who sent the transaction, which is either the id of a user or a merchant, depending on sender_type", + "sample values": [1, 3, 8, 4, 10], + "synonyms": [] + }, + { + "name": "sender_type", + "type": "table column", + "column name": "sender_type", + "data type": "numeric", + "description": "Which type of individual sent the transaction: 0 for user (meaning sender_id is a uid from users) or 1 for merchant (meaning sender_id is a mid from merchants)", + "sample values": [0, 1], + "synonyms": ["sent by merchant"] + }, + { + "name": "receiver_id", + "type": "table column", + "column name": "receiver_id", + "data type": "numeric", + "description": "The id of the key of the individual who received the transaction, which is either the id of a user or a merchant, depending on receiver_type", + "sample values": [1, 9, 10, 2, 5], + "synonyms": [] + }, + { + "name": "receiver_type", + "type": "table column", + "column name": "receiver_type", + "data type": "numeric", + "description": "Which type of individual received the transaction: 0 for user (meaning receiver_id is a uid from users) or 1 for merchant (meaning receiver_id is a mid from merchants)", + "sample values": [0, 1], + "synonyms": ["received by merchant"] + }, + { + "name": "amount", + "type": "table column", + "column name": "amount", + "data type": "numeric", + "description": "The dollar amount of the transaction, which is the total amount for a record without a coupon applied, or the amount saved/rebated when the coupon is applied", + "sample values": [99.99, 20.0, 16.0, 125.50, 10.0], + "synonyms": [] + }, + { + "name": "status", + "type": "table column", + "column name": "status", + "data type": "string", + "description": "The status of the transaction, which can be either `success`, `failed`, `refunded`, or `pending`", + "sample values": ["success", "failed", "pending", "refunded"], + "synonyms": ["outcome", "result", "transaction status"] + }, + { + "name": "transaction_type", + "type": "table column", + "column name": "type", + "data type": "string", + "description": "The means the transaction was made, which can be either `credit` or `debit", + "sample values": ["credit", "debit"], + "synonyms": ["payment method", "credit or debit"] + }, + { + "name": "description", + "type": "table column", + "column name": "description", + "data type": "string", + "description": "The category of transaction, which either says what was purchased or 'Coupon discount' if the transaction is a coupon discount being applied to a previous transaction in the system", + "sample values": ["Online purchase", "Coupon discount", "Product purchase", "Order #438721", "Villa rental deposit", "Refund on order #1234"], + "synonyms": ["description of purchase", "purchase subject"] + }, + { + "name": "coupon_id", + "type": "table column", + "column name": "coupon_id", + "data type": "numeric", + "description": "The id of the coupon being used in the transaction, or null if the transaction is not a coupon discount", + "sample values": [1, 3, 9, 10], + "synonyms": ["coupon key", "coupon id"] + }, + { + "name": "created_at", + "type": "table column", + "column name": "created_at", + "data type": "datetime", + "description": "The timestamp when the transfer was created in the system", + "synonyms": ["creation datetime"] + }, + { + "name": "completed_at", + "type": "table column", + "column name": "completed_at", + "data type": "datetime", + "description": "The timestamp when the transfer was completed (null if not completed yet)", + "synonyms": ["completion datetime"] + }, + { + "name": "transaction_ref", + "type": "table column", + "column name": "transaction_ref", + "data type": "string", + "description": "Randomly generated uuid4 for users' reference", + "sample values": ["ad154bf7-8185-4230-a8d8-3ef59b4e0012", "kd454bf7-428d-eig2-a8d8-3ef59b4e0012", "a7659c81-0cd0-4635-af6c-cf68d2c15ab2'"], + "synonyms": ["transaction uuid", "reference number"] + }, + { + "name": "gateway_name", + "type": "table column", + "column name": "gateway_name", + "data type": "string", + "description": "The portal through which the transaction was made, which can be either 'Stripe', 'PayPal', 'Checkout.com', 'Braintree' or 'Adyen'", + "sample values": ["Stripe", "PayPal", "Checkout.com", "Braintree", "Adyen"], + "synonyms": ["payment gateway", "payment processor", "payment portal"] + }, + { + "name": "gateway_ref", + "type": "table column", + "column name": "gateway_ref", + "data type": "string", + "description": "A reference number generated by the payment gateway for the transaction, which is used to track the transaction in the payment processor's system", + "sample values": ["tx_123abc456def", "rfnd_xkt521", "sub_pjj908", "stripe_ref_11_1", "paypal_ref_12_1"], + "synonyms": ["gateway reference number", "payment processor reference"] + }, + { + "name": "device_id", + "type": "table column", + "column name": "device_id", + "data type": "string", + "description": "The unique identifier for the device used to make the transaction", + "sample values": ["mobile_8fh2k1", "web_8902wknz", "web_zld22f", "device_11_1", "mobile_1av8p0"], + "synonyms": ["device key", "device identifier", "device number"] + }, + { + "name": "ip_address", + "type": "table column", + "column name": "ip_address", + "data type": "string", + "description": "The IP address of the device used to make the transaction", + "sample values": ["199.59.148.201", "199.59.148.201", "70.121.39.25", "8.26.53.165"], + "synonyms": ["IP"] + }, + { + "name": "user_agent", + "type": "table column", + "column name": "user_agent", + "data type": "string", + "description": "The user agent used to make the transaction, which is a string that identifies the browser, operating system, and device type of the device used to make the transaction", + "sample values": ["Mozilla/5.0 (iPhone; CPU iPhone OS 16_3_1 like Mac OS X) ...", "Mozilla/5.0 (Linux; Android 13; SM-S901B) ...", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 ..."], + "synonyms": ["device information", "browser information", "device details"] + } + ], + "description": "Every transaction that has occurred in the system, where the source can be either a user or a merchant and the destination can be either a user or a merchant. If a coupon is used, there will be two identical transaction records, one for the original transaction (where the coupon id is null) and one for the coupon discount applied (where the coupon id is provided)", + "synonyms": ["transfers", "payments", "purchases"] + }, + { + "name": "user_balances", + "type": "simple table", + "table path": "main.wallet_user_balance_daily", + "unique properties": [["user_id", "updated_at"]], + "properties": [ + { + "name": "user_id", + "type": "table column", + "column name": "user_id", + "data type": "numeric", + "description": "The id of the user whose account balance is being recorded", + "sample values": [1, 4, 9, 10, 5], + "synonyms": ["user id", "user key"] + }, + { + "name": "balance", + "type": "table column", + "column name": "balance", + "data type": "numeric", + "description": "The balance of the user's account in USD on the date of the record", + "sample values": [525.80, -725.55, -55.99, -559.98, 0.00, 2733.92], + "synonyms": ["amount in account", "user balance", "account value"] + }, + { + "name": "updated_at", + "type": "table column", + "column name": "updated_at", + "data type": "datetime", + "description": "The timestamp when the user's balance was recorded", + "synonyms": ["update datetime", "balance record date"] + } + ], + "description": "The records of users' account balances in the system, which can include negative balances and multiple records for the same user made at different points in time", + "synonyms": ["user balance histories", "user balance records", "user account balances"] + }, + { + "name": "merchant_balances", + "type": "simple table", + "table path": "main.wallet_merchant_balance_daily", + "unique properties": [["merchant_id", "updated_at"]], + "properties": [ + { + "name": "merchant_id", + "type": "table column", + "column name": "merchant_id", + "data type": "numeric", + "description": "The id of the merchant whose account balance is being recorded", + "sample values": [2, 4, 5, 9, 1], + "synonyms": ["merchant key", "merchant id"] + }, + { + "name": "balance", + "type": "table column", + "column name": "balance", + "data type": "numeric", + "description": "The balance of the user's account in USD on the date of the record", + "sample values": [3897.99, 29.95, 0.00, 89.99], + "synonyms": ["amount in account", "merchant balance", "account value"] + }, + { + "name": "updated_at", + "type": "table column", + "column name": "updated_at", + "data type": "datetime", + "description": "The timestamp when the merchant's balance was recorded", + "synonyms": ["update datetime", "balance record date"] + } + ], + "description": "The records of merchants' account balances in the system, which can include negative balances and multiple records for the same merchant made at different points in time", + "synonyms": ["merchant balance histories", "merchant balance records", "merchant account balances"] + }, + { + "name": "notifications", + "type": "simple table", + "table path": "main.notifications", + "unique properties": ["notif_id"], + "properties": [ + { + "name": "notif_id", + "type": "table column", + "column name": "_id", + "data type": "numeric", + "description": "The unique identifier for each notification in the system", + "sample values": [1, 5, 10, 14, 16], + "synonyms": ["notification key", "notification id"] + }, + { + "name": "user_id", + "type": "table column", + "column name": "user_id", + "data type": "numeric", + "description": "The id for the user who received the notification", + "sample values": [3, 5, 9, 10], + "synonyms": ["user key", "user id"] + }, + { + "name": "message", + "type": "table column", + "column name": "message", + "data type": "string", + "description": "The message contained in the notification sent to the user", + "sample values": ["Your order #123abc has been shipped!", "Your CodeSuite subscription will renew on 7/1", "Here''s $10 to start your glow up!'", "Reminder: Your FitLife membership expires in 7 days", "An order from UrbanDining was unsuccessful", "Weekend Flash Sale: 25% off all activewear!"], + "synonyms": ["contents", "text", "description"] + }, + { + "name": "notification_type", + "type": "table column", + "column name": "type", + "data type": "string", + "description": "The type of notification message, which can be either `transaction`, `promotion`, or `general`", + "sample values": ["transaction", "promotion", "general"], + "synonyms": ["message category", "alert classification"] + }, + { + "name": "status", + "type": "table column", + "column name": "status", + "data type": "string", + "description": "The status of the notification, which can be either `read`, `unread`, or `archived`", + "sample values": ["read", "unread", "archived"], + "synonyms": ["has been read"] + }, + { + "name": "created_at", + "type": "table column", + "column name": "created_at", + "data type": "datetime", + "description": "The timestamp when the notification was created", + "synonyms": ["creation datetime", "notification creation date"] + }, + { + "name": "read_at", + "type": "table column", + "column name": "read_at", + "data type": "datetime", + "description": "The timestamp when the notification was read (null if not read yet)", + "synonyms": ["datetime read"] + }, + { + "name": "device_type", + "type": "table column", + "column name": "device_type", + "data type": "string", + "description": "The type of device/medium the notification was sent through, which can be either `mobile_app`, `email`, `sms`, or `web_app`", + "sample values": ["mobile_app", "email", "sms", "web_app"], + "synonyms": [] + }, + { + "name": "device_id", + "type": "table column", + "column name": "device_id", + "data type": "string", + "description": "The id of the device the notification was sent to, or null if the device type does not have device ids", + "sample values": ["mobile_8fh2k1", "web_d8180kaf", "mobile_8fh2k1"], + "synonyms": [] + }, + { + "name": "action_url", + "type": "table column", + "column name": "action_url", + "data type": "string", + "description": "The URL included in hte notification that the user can click on to take action; can be external https or deeplink url within the app", + "sample values": ["app://orders/123abc", "https://zenhomesurvey.com/order/c51e10d1", "https://kidzplayhouse.com/new-arrivals", "https://techmart.com/promo/TECH20"], + "synonyms": ["link", "notification hyperlink"] + } + ], + "description": "All notifications sent to users in the system, such as alerts, reminders, and promotional messages", + "synonyms": ["alerts", "messages"] + }, + { + "name": "user_sessions", + "type": "simple table", + "table path": "main.user_sessions", + "unique properties": [["user_id", "session_start"], ["user_id", "session_end"]], + "properties": [ + { + "name": "user_id", + "type": "table column", + "column name": "user_id", + "data type": "numeric", + "description": "The id of the user who the session belongs to", + "sample values": [1, 2, 3, 8, 10], + "synonyms": ["user id", "user key"] + }, + { + "name": "session_start", + "type": "table column", + "column name": "session_start_ts", + "data type": "datetime", + "description": "The timestamp when the user session started", + "synonyms": ["beginning timestamp", "session initialization datetime"] + }, + { + "name": "session_end", + "type": "table column", + "column name": "session_end_ts", + "data type": "datetime", + "description": "The timestamp when the user session ended", + "synonyms": ["ending timestamp", "session termination datetime"] + }, + { + "name": "device_type", + "type": "table column", + "column name": "device_type", + "data type": "string", + "description": "The type of device the session was made with, which can be either `web_app` or `mobile_app`", + "sample values": ["web_app", "mobile_app"], + "synonyms": ["session device", "web or mobile"] + }, + { + "name": "device_id", + "type": "table column", + "column name": "device_id", + "data type": "string", + "description": "The id of the device the session was made with", + "sample values": ["web_d8180kaf", "mobile_g3mjfz", "web_zz91p44l", "web_8902wknz"], + "synonyms": [] + } + ], + "description": "The records of user sessions in the system, which can include multiple sessions for the same user made at different points in time, each session with a start/end timestamp", + "synonyms": ["system logs", "user activity logs", "user session records"] + }, + { + "name": "user_setting_snapshots", + "type": "simple table", + "table path": "main.user_setting_snapshot", + "unique properties": [["user_id", "snapshot_date"]], + "properties": [ + { + "name": "user_id", + "type": "table column", + "column name": "user_id", + "data type": "numeric", + "description": "The id for the user whose settings are being recorded", + "sample values": [1, 2, 3, 5, 9], + "synonyms": ["user key", "user id"] + }, + { + "name": "snapshot_date", + "type": "table column", + "column name": "snapshot_date", + "data type": "datetime", + "description": "The date when the setting snapshot was taken", + "synonyms": ["date of setting update", "recorded date"] + }, + { + "name": "daily_transaction_limit", + "type": "table column", + "column name": "tx_limit_daily", + "data type": "numeric", + "description": "The cap in settings of how much the user account allows to be spent in a single day", + "sample values": [1000.0, 500.0, 50.0, 250.0], + "synonyms": ["daily spending cap"] + }, + { + "name": "monthly_transaction_limit", + "type": "table column", + "column name": "tx_limit_monthly", + "data type": "numeric", + "description": "The cap in settings of how much the user account allows to be spent in a single month", + "sample values": [5000.0, 500.0, 2000.0, 1000.0], + "synonyms": ["monthly spending cap"] + }, + { + "name": "membership_status", + "type": "table column", + "column name": "membership_status", + "data type": "numeric", + "description": "The member's status level within the system: 0 for bronze, 1 for silver, 2 for gold, 3 for platinum, 4 for VIP", + "sample values": [0, 1, 2, 3, 4], + "synonyms": ["rank", "membership level", "membership tier"] + }, + { + "name": "password_hash", + "type": "table column", + "column name": "password_hash", + "data type": "string", + "description": "The hashed password of the user, which is used to verify the user's identity when they log in", + "sample values": ["bcryptHash($2yz9!&ka1)", "bcryptHash($2yz9!&ka1)", "bcryptHash(C0d3Rul3z!99)"], + "synonyms": [] + }, + { + "name": "api_key", + "type": "table column", + "column name": "api_key", + "data type": "string", + "description": "The user's API key, which is used to authenticate the user when they make API requests (null if not provided)", + "sample values": ["9d61c49b-8977-4914-a36b-80d1445e38fa", "6c03c175-9ac9-4854-b064-a3fff2c62e31"], + "synonyms": ["api token", "application programming interface key"] + }, + { + "name": "verified_devices", + "type": "table column", + "column name": "verified_devices", + "data type": "string", + "description": "A comma-separated list of the ids of the devices that have been verified for the user (null if not provided)", + "sample values": ["mobile_8fh2k1", "mobile_yjp08q, mobile_1av8p0", "web_k29qjd, mobile_x28qlj", "web_d8180kaf, mobile_q3mz8n"], + "synonyms": ["device list", "device ids"] + }, + { + "name": "verified_ips", + "type": "table column", + "column name": "verified_ips", + "data type": "string", + "description": "A comma-sparated list of the IP addresses that have been verified for the user (null if not provided)", + "sample values": ["8.26.53.165, 68.85.32.201", "203.96.81.36", "192.168.0.1", "198.51.100.233, 70.121.39.25"], + "synonyms": ["IP addresses", "IP list"] + }, + { + "name": "mfa_enabled", + "type": "table column", + "column name": "mfa_enabled", + "data type": "bool", + "description": "Whether the user has enabled multi-factor authentication (MFA) for their account", + "synonyms": ["has mfa"] + }, + { + "name": "marketing_opt_in", + "type": "table column", + "column name": "marketing_opt_in", + "data type": "bool", + "description": "Whether the user has opted in to receive marketing communications", + "synonyms": ["allowed marketing"] + }, + { + "name": "created_at", + "type": "table column", + "column name": "created_at", + "data type": "datetime", + "description": "The timestamp when the record was added to the system", + "synonyms": ["system entry date"] + } + ], + "description": "The records of user settings in the system, which can include multiple updates for the same user made at different points in time", + "synonyms": ["user settings history", "user settings records"] + } + ], + "relationships": [ + { + "name": "transactions_sent", + "type": "simple join", + "parent collection": "users", + "child collection": "transactions", + "singular": false, + "always matches": false, + "keys": { + "uid": [ + "sender_id" + ] + }, + "description": "All transactions made where the user is the sender", + "synonyms": ["transactions from user"] + }, + { + "name": "sending_user", + "type": "reverse", + "original parent": "users", + "original property": "transactions_sent", + "singular": true, + "always matches": false, + "description": "The user who sent the transaction, if it was sent by a user (not found if it was sent by a merchant)", + "synonyms": ["source user", "originating user"] + }, + { + "name": "transactions_received", + "type": "simple join", + "parent collection": "users", + "child collection": "transactions", + "singular": false, + "always matches": false, + "keys": { + "uid": [ + "receiver_id" + ] + }, + "description": "The transactions where the user was the recipient", + "synonyms": ["transactions to user"] + }, + { + "name": "receiving_user", + "type": "reverse", + "original parent": "users", + "original property": "transactions_received", + "singular": true, + "always matches": false, + "description": "The user who received the transaction, if it was received by a user (not found if it was received by a merchant)", + "synonyms": ["destination user", "receiving user account"] + }, + { + "name": "balances", + "type": "simple join", + "parent collection": "users", + "child collection": "user_balances", + "singular": false, + "always matches": false, + "keys": { + "uid": [ + "user_id" + ] + }, + "description": "The historical account balance for the user, which can include multiple updates over time", + "synonyms": ["historical account balance", "user balance history"] + }, + { + "name": "user", + "type": "reverse", + "original parent": "users", + "original property": "balances", + "singular": true, + "always matches": true, + "description": "The user who the account balance belongs to", + "synonyms": ["customer"] + }, + { + "name": "notifications", + "type": "simple join", + "parent collection": "users", + "child collection": "notifications", + "singular": false, + "always matches": false, + "keys": { + "uid": [ + "user_id" + ] + }, + "description": "The notifications that have been sent to the user", + "synonyms": ["user notifications", "user alerts"] + }, + { + "name": "user", + "type": "reverse", + "original parent": "users", + "original property": "notifications", + "singular": true, + "always matches": true, + "description": "The user who received the notification", + "synonyms": ["customer", "client", "user account"] + }, + { + "name": "sessions", + "type": "simple join", + "parent collection": "users", + "child collection": "user_sessions", + "singular": false, + "always matches": false, + "keys": { + "uid": [ + "user_id" + ] + }, + "description": "The login sessions by the user", + "synonyms": ["user login sessions", "session history"] + }, + { + "name": "user", + "type": "reverse", + "original parent": "users", + "original property": "sessions", + "singular": true, + "description": "The user who the login session information belongs to", + "synonyms": ["customer", "client", "user account"] + }, + { + "name": "setting_snapshots", + "type": "simple join", + "parent collection": "users", + "child collection": "user_setting_snapshots", + "singular": false, + "always matches": false, + "keys": { + "uid": [ + "user_id" + ] + }, + "description": "The setting snapshot history for the user, which can include multiple updates over time", + "synonyms": ["setting history", "user setting snapshots"] + }, + { + "name": "user", + "type": "reverse", + "original parent": "users", + "original property": "setting_snapshots", + "singular": true, + "description": "The user who the setting snapshot belongs to", + "synonyms": ["customer", "client", "user account"] + }, + { + "name": "transactions_sent", + "type": "simple join", + "parent collection": "merchants", + "child collection": "transactions", + "singular": false, + "always matches": false, + "keys": { + "mid": [ + "sender_id" + ] + }, + "description": "The transactions sent by the merchant", + "synonyms": ["merchant transactions sent", "merchant outgoing transactions"] + }, + { + "name": "sending_merchant", + "type": "reverse", + "original parent": "merchants", + "original property": "transactions_sent", + "singular": true, + "always matches": false, + "description": "The merchant who sent the transaction, if it was sent by a merchant (not found if it was sent by a user)", + "synonyms": ["merchant source", "merchant origin"] + }, + { + "name": "transactions_received", + "type": "simple join", + "parent collection": "merchants", + "child collection": "transactions", + "singular": false, + "always matches": false, + "keys": { + "mid": [ + "receiver_id" + ] + }, + "description": "The transactions received by the merchant", + "synonyms": ["merchant transactions received", "merchant incoming transactions"] + }, + { + "name": "receiving_merchant", + "type": "reverse", + "original parent": "merchants", + "original property": "transactions_received", + "singular": true, + "always matches": false, + "description": "The merchant who received the transaction, if it was received by a merchant (not found if it was received by a user)", + "synonyms": ["merchant destination", "merchant recipient"] + }, + { + "name": "balances", + "type": "simple join", + "parent collection": "merchants", + "child collection": "merchant_balances", + "singular": false, + "always matches": false, + "keys": { + "mid": [ + "merchant_id" + ] + }, + "description": "The historical account balance for the merchant, which can include multiple updates over time", + "synonyms": ["historical account balance", "merchant balance history"] + }, + { + "name": "merchant", + "type": "reverse", + "original parent": "merchants", + "original property": "balances", + "singular": true, + "always matches": true, + "description": "The merchant who the account balance information belongs to", + "synonyms": ["business", "company", "merchant account"] + }, + { + "name": "coupons", + "type": "simple join", + "parent collection": "merchants", + "child collection": "coupons", + "singular": false, + "always matches": false, + "keys": { + "mid": [ + "merchant_id" + ] + }, + "description": "The coupons issued by the merchant", + "synonyms": ["merchant coupons", "merchant discount codes"] + }, + { + "name": "merchant", + "type": "reverse", + "original parent": "merchants", + "original property": "coupons", + "singular": true, + "always matches": true, + "description": "The merchant who issued the coupon", + "synonyms": ["issuing company", "business"] + }, + { + "name": "transaction_used_in", + "type": "simple join", + "parent collection": "coupons", + "child collection": "transactions", + "singular": false, + "always matches": false, + "keys": { + "cid": [ + "coupon_id" + ] + }, + "description": "The transactions that the coupon was used in", + "synonyms": ["uses", "transactions with coupon"] + }, + { + "name": "coupon", + "type": "reverse", + "original parent": "coupons", + "original property": "transaction_used_in", + "singular": true, + "always matches": false, + "description": "The coupon that was used in the transaction, if one exists", + "synonyms": ["coupon used", "discount used"] + } + ], + "additional definitions": [], + "verified pydough analysis": [], + "extra semantic info": {} + }, + { + "name": "Academic", + "version": "V2", + "collections": [ + { + "name": "authors", + "type": "simple table", + "table path": "main.author", + "unique properties": [ + "author_id" + ], + "properties": [ + { + "name": "author_id", + "type": "table column", + "column name": "aid", + "data type": "numeric", + "description": "Unique identifier for each author", + "sample values": [1, 3, 5], + "synonyms": ["author id", "author identification"] + }, + { + "name": "homepage", + "type": "table column", + "column name": "homepage", + "data type": "string", + "description": "The hostname used for the author's website", + "sample values": ["www.larry.com", "www.noam.com"], + "synonyms": ["author url", "author website"] + }, + { + "name": "name", + "type": "table column", + "column name": "name", + "data type": "string", + "description": "The name and lastname of the author", + "sample values": ["Larry Summers", "Noam Shazeer"], + "synonyms": ["full name"] + }, + { + "name": "organization_id", + "type": "table column", + "column name": "oid", + "data type": "numeric", + "description": "The id of the organization the author is related to", + "sample values": [1, 2, 5], + "synonyms": ["oid", "org_id"] + } + ], + "description": "Contains details about the authors, including their id, name, homepage, and organization id.", + "synonyms": ["writer", "researcher"] + }, + { + "name": "citations", + "type": "simple table", + "table path": "main.cite", + "unique properties": [ + ["cited", + "citing"] + ], + "properties": [ + { + "name": "cited", + "type": "table column", + "column name": "cited", + "data type": "numeric", + "description": "The id of the publication being referenced or cited.", + "sample values": [1, 2, 4], + "synonyms": ["reference"] + }, + { + "name": "citing", + "type": "table column", + "column name": "citing", + "data type": "numeric", + "description": "The id of the publication that is doing the referencing (the source paper that contains the citation).", + "sample values": [1, 2, 3], + "synonyms": ["source paper"] + } + ], + "description": "Records of citation relationships between publications.", + "synonyms": ["citation records", "references"] + }, + { + "name": "conferences", + "type": "simple table", + "table path": "main.conference", + "unique properties": [ + "conference_id" + ], + "properties": [ + { + "name": "conference_id", + "type": "table column", + "column name": "cid", + "data type": "numeric", + "description": "Unique identifier for the conference.", + "sample values": [1, 2, 3], + "synonyms": ["conference id"] + }, + { + "name": "homepage", + "type": "table column", + "column name": "homepage", + "data type": "string", + "description": "The URL or homepage of the conference.", + "sample values": ["www.isa.com", "www.icml.com"], + "synonyms": ["conference website"] + }, + { + "name": "name", + "type": "table column", + "column name": "name", + "data type": "string", + "description": "The full name or abbreviation of the conference.", + "sample values": ["ISA", "ICML"], + "synonyms": ["conference name", "abbreviation"] + } + ], + "description": "Contains details about academic conferences.", + "synonyms": ["meeting", "symposium"] + }, + { + "name": "domains", + "type": "simple table", + "table path": "main.domain", + "unique properties": [ + "domain_id" + ], + "properties": [ + { + "name": "domain_id", + "type": "table column", + "column name": "did", + "data type": "numeric", + "description": "Unique identifier for the academic domain or field.", + "sample values": [1, 3, 5], + "synonyms": ["identification", "domain key"] + }, + { + "name": "name", + "type": "table column", + "column name": "name", + "data type": "string", + "description": "The name of the academic domain", + "sample values": ["Data Science", "Computer Science"], + "synonyms": ["field name", "subject"] + } + ], + "description": "Records of different academic domains or areas of study", + "synonyms": ["field", "subject"] + }, + { + "name": "domains_authors", + "type": "simple table", + "table path": "main.domain_author", + "unique properties": [ + ["author_id", + "domain_id"] + ], + "properties": [ + { + "name": "author_id", + "type": "table column", + "column name": "aid", + "data type": "numeric", + "description": "The id of the author related to this domain", + "sample values": [1, 3, 5], + "synonyms": ["author key" ] + }, + { + "name": "domain_id", + "type": "table column", + "column name": "did", + "data type": "numeric", + "description": "The id of the domain related to the author", + "sample values": [1, 3, 5], + "synonyms": ["domain key"] + } + ], + "description": "The records of each author being associated with domains", + "synonyms": ["author field", "author subject"] + }, + { + "name": "domains_conferences", + "type": "simple table", + "table path": "main.domain_conference", + "unique properties": [ + ["conference_id", + "domain_id"] + ], + "properties": [ + { + "name": "conference_id", + "type": "table column", + "column name": "cid", + "data type": "numeric", + "description": "The id for the conference related to this domain", + "sample values": [1, 2, 3], + "synonyms": ["conference key"] + }, + { + "name": "domain_id", + "type": "table column", + "column name": "did", + "data type": "numeric", + "description": "The id for the domain related to the conference.", + "sample values": [2, 4, 5], + "synonyms": ["domain key"] + } + ], + "description": "Describes the domains related to the different conferences", + "synonyms": ["conference subject"] + }, + { + "name": "domains_journals", + "type": "simple table", + "table path": "main.domain_journal", + "unique properties": [ + ["domain_id", + "journal_id"] + ], + "properties": [ + { + "name": "domain_id", + "type": "table column", + "column name": "did", + "data type": "numeric", + "description": "The id of the domain realated to this journal", + "sample values": [1,2,5], + "synonyms": ["domain key"] + }, + { + "name": "journal_id", + "type": "table column", + "column name": "jid", + "data type": "numeric", + "description": "The id of the jornal that published about this domain", + "sample values": [2, 3, 4], + "synonyms": ["journal id", "journal key"] + } + ], + "description": "Records of domains being used by a journal", + "synonyms": ["journal domain", "journal field"] + }, + { + "name": "domains_keywords", + "type": "simple table", + "table path": "main.domain_keyword", + "unique properties": [ + ["domain_id", + "keyword_id"] + ], + "properties": [ + { + "name": "domain_id", + "type": "table column", + "column name": "did", + "data type": "numeric", + "description": "The id of the domain which is related to this keyword", + "sample values": [1, 2], + "synonyms": ["domain key"] + }, + { + "name": "keyword_id", + "type": "table column", + "column name": "kid", + "data type": "numeric", + "description": "The id of the keyword used or connected to this domain or field", + "sample values": [2, 3], + "synonyms": ["keyword key"] + } + ], + "description": "Contains records of keywords that is related to a domain", + "synonyms": ["keyword domain"] + }, + { + "name": "domains_publications", + "type": "simple table", + "table path": "main.domain_publication", + "unique properties": [["domain_id", "publication_id"]], + "properties": [ + { + "name": "domain_id", + "type": "table column", + "column name": "did", + "data type": "numeric", + "description": "The id of the domain related to this publication", + "sample values": [1, 2, 3], + "synonyms": ["domain key"] + }, + { + "name": "publication_id", + "type": "table column", + "column name": "pid", + "data type": "numeric", + "description": "The id of the publication about this domain", + "sample values": [2, 4, 5], + "synonyms": ["publication key"] + } + ], + "description": "Records of the publication and its related domain or field", + "synonyms": ["publication field"] + }, + { + "name": "journals", + "type": "simple table", + "table path": "main.journal", + "unique properties": [ + "journal_id" + ], + "properties": [ + { + "name": "homepage", + "type": "table column", + "column name": "homepage", + "data type": "string", + "description": "The url of the journal", + "sample values": ["www.aijournal.com", "www.ml.com"], + "synonyms": ["website", "url"] + }, + { + "name": "journal_id", + "type": "table column", + "column name": "jid", + "data type": "numeric", + "description": "The id of the journal", + "sample values": [1, 2, 3], + "synonyms": ["journal id", "identification"] + }, + { + "name": "name", + "type": "table column", + "column name": "name", + "data type": "string", + "description": "The name of the journal", + "sample values": ["Nature", "Journal of Machine Learning Research"], + "synonyms": ["journal title", "journal name"] + } + ], + "description": "Contains details about the journals", + "synonyms": ["magazines", "newspapers"] + }, + { + "name": "keywords", + "type": "simple table", + "table path": "main.keyword", + "unique properties": [ + "keyword_id" + ], + "properties": [ + { + "name": "keyword", + "type": "table column", + "column name": "keyword", + "data type": "string", + "description": "The keyword itself", + "sample values": ["Neuroscience", "AI"], + "synonyms": ["topic", "subject"] + }, + { + "name": "keyword_id", + "type": "table column", + "column name": "kid", + "data type": "numeric", + "description": "The identificator of the keyword", + "sample values": [1, 2, 4], + "synonyms": ["keyword id"] + } + ], + "description": "Collection of keywords used in publications and domains", + "synonyms": ["keyword collection", "topics"] + }, + { + "name": "organizations", + "type": "simple table", + "table path": "main.organization", + "unique properties": [ + "organization_id" + ], + "properties": [ + { + "name": "continent", + "type": "table column", + "column name": "continent", + "data type": "string", + "description": "The continent where the organization resides", + "sample values": ["North America", "Asia"], + "synonyms": ["location"] + }, + { + "name": "homepage", + "type": "table column", + "column name": "homepage", + "data type": "string", + "description": "The url of the organization's website", + "sample values": ["www.organization2.com", "www.epfl.com"], + "synonyms": ["website link"] + }, + { + "name": "name", + "type": "table column", + "column name": "name", + "data type": "string", + "description": "The name of the organization", + "sample values": ["École Polytechnique Fédérale de Lausanne 4", "Organization 2"], + "synonyms": ["oranization name"] + }, + { + "name": "organization_id", + "type": "table column", + "column name": "oid", + "data type": "numeric", + "description": "The identifier for the organization", + "sample values": [1, 3, 5], + "synonyms": ["id", "oid"] + } + ], + "description": "Records of the existing organizations, including continent, name, homepage and id", + "synonyms": ["institutions"] + }, + { + "name": "publications", + "type": "simple table", + "table path": "main.publication", + "unique properties": [ + "publication_id" + ], + "properties": [ + { + "name": "abstract", + "type": "table column", + "column name": "abstract", + "data type": "string", + "description": "Summary of the publication", + "sample values": ["Abstract 1", "Abstract 2"], + "synonyms": ["summary"] + }, + { + "name": "conference_id", + "type": "table column", + "column name": "cid", + "data type": "numeric", + "description": "The id of the conference related to this publication", + "sample values": [1, 2, 3], + "synonyms": ["presented at conference"] + }, + { + "name": "citation_num", + "type": "table column", + "column name": "citation_num", + "data type": "numeric", + "description": "The number of times this publication was cited", + "sample values": [4, 2, 0], + "synonyms": ["number of citations"] + }, + { + "name": "journal_id", + "type": "table column", + "column name": "jid", + "data type": "numeric", + "description": "The id of the journal that publish this publication", + "sample values": [1, 2, 4], + "synonyms": ["journal id", "journal key"] + }, + { + "name": "publication_id", + "type": "table column", + "column name": "pid", + "data type": "numeric", + "description": "The unique identifier for the publication", + "sample values": [1, 2, 5], + "synonyms": ["publication id"] + }, + { + "name": "reference_num", + "type": "table column", + "column name": "reference_num", + "data type": "numeric", + "description": "The total number of external references made by this publication.", + "sample values": [0, 1, 4], + "synonyms": ["number of references"] + }, + { + "name": "title", + "type": "table column", + "column name": "title", + "data type": "string", + "description": "The title of the paper or article.", + "sample values": ["Attention is all you need", "Optimizing GPU Throughput"], + "synonyms": ["paper title", "article title"] + }, + { + "name": "year", + "type": "table column", + "column name": "year", + "data type": "numeric", + "description": "The year when the publication was published", + "sample values": [2021, 2020], + "synonyms": ["year of publication"] + } + ], + "description": "Stores core information about academic papers, articles, or other publications.", + "synonyms": ["articles", "papers"] + }, + { + "name": "publications_keywords", + "type": "simple table", + "table path": "main.publication_keyword", + "unique properties": [["publication_id", "keyword_id"]], + "properties": [ + { + "name": "publication_id", + "type": "table column", + "column name": "pid", + "data type": "numeric", + "description": "The id of the publication related to the keyword", + "sample values": [1, 2], + "synonyms": ["publication key"] + }, + { + "name": "keyword_id", + "type": "table column", + "column name": "kid", + "data type": "numeric", + "description": "The id of the keyword used in the publication", + "sample values": [2, 3], + "synonyms": ["keyword id"] + } + ], + "description": "Records of keyword used in publication", + "synonyms": ["keyword usage"] + }, + { + "name": "writes", + "type": "simple table", + "table path": "main.writes", + "unique properties": [["author_id", "publication_id"]], + "properties": [ + { + "name": "author_id", + "type": "table column", + "column name": "aid", + "data type": "numeric", + "description": "The author or co-author of the publication", + "sample values": [1, 2, 3], + "synonyms": ["author", "co-author"] + }, + { + "name": "publication_id", + "type": "table column", + "column name": "pid", + "data type": "numeric", + "description": "The publication related to the author", + "sample values": [1, 2, 3], + "synonyms": ["publication key"] + } + ], + "description": "Records of authors and its publications", + "synonyms": ["publication authors", "authorship"] + } + ], + "relationships": [ + { + "type": "simple join", + "name": "authors", + "parent collection": "organizations", + "child collection": "authors", + "singular": false, + "always matches": false, + "keys": { + "organization_id": [ + "organization_id" + ] + }, + "description": "All authors that belongs to this organization", + "synonyms": ["authors of the organization"] + }, + { + "type": "reverse", + "name": "organization", + "original parent": "organizations", + "original property": "authors", + "singular": true, + "always matches": false, + "description": "The organization that the author belongs to", + "synonyms": ["organization of the author"] + }, + { + "type": "simple join", + "name": "references", + "parent collection": "publications", + "child collection": "citations", + "singular": true, + "always matches": false, + "keys": { + "publication_id": [ + "citing" + ] + }, + "description": "All the citations this publication contains", + "synonyms": ["references made", "outgoing citations", "cited works"] + }, + { + "type": "reverse", + "name": "publication_citing", + "original parent": "publications", + "original property": "references", + "singular": true, + "always matches": true, + "description": "The publication citing", + "synonyms": ["citing publication", "source publication"] + }, + { + "type": "simple join", + "name": "cited_by", + "parent collection": "publications", + "child collection": "citations", + "singular": false, + "always matches": false, + "keys": { + "publication_id": [ + "cited" + ] + }, + "description": "All citations of the publication", + "synonyms": ["incoming citations", "referenced by", "cited by publications"] + }, + { + "type": "reverse", + "name": "publication_cited", + "original parent": "publications", + "original property": "cited_by", + "singular": true, + "always matches": true, + "description": "The publication cited", + "synonyms": ["referenced publication", "cited work"] + }, + { + "type": "simple join", + "name": "domain_authors", + "parent collection": "domains", + "child collection": "domains_authors", + "singular": false, + "always matches": false, + "keys": { + "domain_id": [ + "domain_id" + ] + }, + "description": "The id of the authors related to the domain", + "synonyms": ["authors in domain", "domain's authors"] + }, + { + "type": "reverse", + "name": "domain", + "original parent": "domains", + "original property": "domain_authors", + "singular": true, + "always matches": true, + "description": "The domain related to the author id", + "synonyms": ["author's domain", "field of author"] + }, + { + "type": "simple join", + "name": "author_domains", + "parent collection": "authors", + "child collection": "domains_authors", + "singular": false, + "always matches": false, + "keys": { + "author_id": [ + "author_id" + ] + }, + "description": "The domains id for this author", + "synonyms": ["fields of author", "author's domains"] + }, + { + "type": "reverse", + "name": "author", + "original parent": "authors", + "original property": "author_domains", + "singular": true, + "always matches": true, + "description": "The author related to this domain_author", + "synonyms": ["domain's author", "field researcher"] + }, + { + "type": "simple join", + "name": "domain_conferences", + "parent collection": "domains", + "child collection": "domains_conferences", + "singular": false, + "always matches": true, + "keys": { + "domain_id": [ + "domain_id" + ] + }, + "description": "The domain_conferences related to the domain", + "synonyms": ["conferences in domain", "domain's conferences"] + }, + { + "type": "reverse", + "name": "domain", + "original parent": "domains", + "original property": "domain_conferences", + "singular": true, + "always matches": true, + "description": "The domain related to the domain_conference", + "synonyms": ["conference's domain", "field of conference"] + }, + { + "type": "simple join", + "name": "conference_domains", + "parent collection": "conferences", + "child collection": "domains_conferences", + "singular": false, + "always matches": false, + "keys": { + "conference_id": [ + "conference_id" + ] + }, + "description": "The domain_conference related to the conference", + "synonyms": ["conference fields", "conference domains"] + }, + { + "type": "reverse", + "name": "conference", + "original parent": "conferences", + "original property": "conference_domains", + "singular": true, + "always matches": true, + "description": "The conference related to the domain_conference", + "synonyms": ["domain's conference", "field conference"] + }, + { + "type": "simple join", + "name": "journal_domains", + "parent collection": "journals", + "child collection": "domains_journals", + "singular": false, + "always matches": false, + "keys": { + "journal_id": [ + "journal_id" + ] + }, + "description": "The domains_journal related to the journal", + "synonyms": ["journal fields", "journal domains"] + }, + { + "type": "reverse", + "name": "journal", + "original parent": "journals", + "original property": "journal_domains", + "singular": true, + "always matches": true, + "description": "The journal related to the domain_journal", + "synonyms": ["domain's journal", "field journal"] + }, + { + "type": "simple join", + "name": "domain_journals", + "parent collection": "domains", + "child collection": "domains_journals", + "singular": false, + "always matches": false, + "keys": { + "domain_id": [ + "domain_id" + ] + }, + "description": "The domain_journal related to the domain", + "synonyms": ["journals in domain", "domain's journals"] + }, + { + "type": "reverse", + "name": "domain", + "original parent": "domains", + "original property": "domain_journals", + "singular": true, + "always matches": true, + "description": "The domain linked with the domain_journal", + "synonyms": ["journal's domain", "field of journal"] + }, + { + "type": "simple join", + "name": "keyword_domains", + "parent collection": "keywords", + "child collection": "domains_keywords", + "singular": false, + "always matches": false, + "keys": { + "keyword_id": [ + "keyword_id" + ] + }, + "description": "The domain_keywords related to the keyword", + "synonyms": ["keyword fields", "domains for keyword"] + }, + { + "type": "reverse", + "name": "keyword", + "original parent": "keywords", + "original property": "keyword_domains", + "singular": true, + "always matches": true, + "description": "The keyword related to the domain_keyword", + "synonyms": ["domain's keyword", "field keyword"] + }, + { + "type": "simple join", + "name": "domain_keywords", + "parent collection": "domains", + "child collection": "domains_keywords", + "singular": false, + "always matches": false, + "keys": { + "domain_id": [ + "domain_id" + ] + }, + "description": "The domain_keywords related to the domain", + "synonyms": ["keywords in domain", "domain's keywords"] + }, + { + "type": "reverse", + "name": "domain", + "original parent": "domains", + "original property": "domain_keywords", + "singular": true, + "always matches": true, + "description": "The keyword linked to the domain_keyword", + "synonyms": ["keyword's domain", "field for keyword"] + }, + { + "type": "simple join", + "name": "publication_domains", + "parent collection": "publications", + "child collection": "domains_publications", + "singular": false, + "always matches": false, + "keys": { + "publication_id": [ + "publication_id" + ] + }, + "description": "The domain_publications related to the publication", + "synonyms": ["fields of publication", "publication domains"] + }, + { + "type": "reverse", + "name": "publication", + "original parent": "publications", + "original property": "publication_domains", + "singular": true, + "always matches": true, + "description": "The publication related to the domain_publication", + "synonyms": ["domain's publication", "field publication"] + }, + { + "type": "simple join", + "name": "domain_publications", + "parent collection": "domains", + "child collection": "domains_publications", + "singular": false, + "always matches": false, + "keys": { + "domain_id": [ + "domain_id" + ] + }, + "description": "The domain_publications related to the domain", + "synonyms": ["publications in domain", "domain's publications"] + }, + { + "type": "reverse", + "name": "domain", + "original parent": "domains", + "original property": "domain_publications", + "singular": true, + "always matches": true, + "description": "The domain related to the domain_publication", + "synonyms": ["publication's domain", "field for publication"] + }, + { + "type": "simple join", + "name": "archives", + "parent collection": "journals", + "child collection": "publications", + "singular": false, + "always matches": false, + "keys": { + "journal_id": [ + "journal_id" + ] + }, + "description": "The publications related to the journal", + "synonyms": ["journal publications", "journal articles"] + }, + { + "type": "reverse", + "name": "publisher", + "original parent": "journals", + "original property": "archives", + "singular": true, + "always matches": true, + "description": "The journal the published the publication", + "synonyms": ["publication's journal", "article publisher"] + }, + { + "type": "simple join", + "name": "proceedings", + "parent collection": "conferences", + "child collection": "publications", + "singular": false, + "always matches": false, + "keys": { + "conference_id": [ + "conference_id" + ] + }, + "description": "The publications presented in the conference", + "synonyms": ["conference papers", "conference proceedings"] + }, + { + "type": "reverse", + "name": "conference", + "original parent": "conferences", + "original property": "proceedings", + "singular": true, + "always matches": false, + "description": "The conference where the publication was presented", + "synonyms": ["publication's conference", "presenting conference"] + }, + { + "type": "simple join", + "name": "keyword_publications", + "parent collection": "keywords", + "child collection": "publications_keywords", + "singular": false, + "always matches": false, + "keys": { + "keyword_id": [ + "keyword_id" + ] + }, + "description": "All publication_keywords related to the keyword", + "synonyms": ["publications with keyword", "keyword usage in publications"] + }, + { + "type": "reverse", + "name": "keyword", + "original parent": "keywords", + "original property": "keyword_publications", + "singular": true, + "always matches": true, + "description": "The keyword linked to the publication_keyword", + "synonyms": ["publication's keyword", "used keyword"] + }, + { + "type": "simple join", + "name": "publication_keywords", + "parent collection": "publications", + "child collection": "publications_keywords", + "singular": false, + "always matches": false, + "keys": { + "publication_id": [ + "publication_id" + ] + }, + "description": "All publication_keywords related to the publication", + "synonyms": ["keywords in publication", "publication's keywords"] + }, + { + "type": "reverse", + "name": "publication", + "original parent": "publications", + "original property": "publication_keywords", + "singular": true, + "always matches": true, + "description": "The publication linked to the publication_keyword", + "synonyms": ["keyword's publication", "keyworded publication"] + }, + { + "type": "simple join", + "name": "publication_authors", + "parent collection": "publications", + "child collection": "writes", + "singular": false, + "always matches": false, + "keys": { + "publication_id": [ + "publication_id" + ] + }, + "description": "All authors id related to the publication", + "synonyms": ["authors of publication", "publication's authors"] + }, + { + "type": "reverse", + "name": "publication", + "original parent": "publications", + "original property": "publication_authors", + "singular": true, + "always matches": true, + "description": "The publication linked to the writes", + "synonyms": ["author's publication", "written publication"] + }, + { + "type": "simple join", + "name": "author_publications", + "parent collection": "authors", + "child collection": "writes", + "singular": false, + "always matches": false, + "keys": { + "author_id": [ + "author_id" + ] + }, + "description": "The publications' id of the write related to the author", + "synonyms": ["publications by author", "author's works"] + }, + { + "type": "reverse", + "name": "author", + "original parent": "authors", + "original property": "author_publications", + "singular": true, + "always matches": true, + "description": "The author related to the written", + "synonyms": ["publication's author", "work's author"] + } + ] + }, + { + "name": "Restaurants", + "version": "V2", + "collections": [ + { + "name": "cities", + "type": "simple table", + "table path": "main.geographic", + "unique properties": [ + "city_name" + ], + "properties": [ + { + "name": "city_name", + "type": "table column", + "column name": "city_name", + "data type": "string", + "description": "The name of the city", + "sample values": ["Los Angeles", "Miami"], + "synonyms": ["city"] + }, + { + "name": "county", + "type": "table column", + "column name": "county", + "data type": "string", + "description": "The name of the county", + "sample values": ["Miami-Dade", "Cook", "New York"], + "synonyms": ["division"] + }, + { + "name": "region", + "type": "table column", + "column name": "region", + "data type": "string", + "description": "The name of the region", + "sample values": ["California", "New York"], + "synonyms": ["territory"] + } + ], + "description": "Contains records of cities locations including its name, county and region", + "synonyms": ["locations", "places"] + }, + { + "name": "locations", + "type": "simple table", + "table path": "main.location", + "unique properties": [ + ["restaurant_id", "house_number", "street_name","city_name"] + ], + "properties": [ + { + "name": "restaurant_id", + "type": "table column", + "column name": "restaurant_id", + "data type": "numeric", + "description": "Unique identifier for each restaurant", + "sample values": [1, 2, 3], + "synonyms": ["restaurant_id", "diner_id"] + }, + { + "name": "house_number", + "type": "table column", + "column name": "house_number", + "data type": "numeric", + "description": "The number assigned to the building where the restaurant is located", + "sample values": [123, 789, 12], + "synonyms": ["street_number"] + }, + { + "name": "street_name", + "type": "table column", + "column name": "street_name", + "data type": "string", + "description": "The name of the street where the restaurant is located", + "sample values": ["Main St", "Oak St", "Pine Ave"], + "synonyms": ["avenue"] + }, + { + "name": "city_name", + "type": "table column", + "column name": "city_name", + "data type": "string", + "description": "The name of the city where the restaurant is located", + "sample values": ["New York", "Los Angeles", "Miami"], + "synonyms": [] + } + ], + "description": "Contains the location of each restaurant", + "synonyms": ["address"] + }, + { + "name": "restaurants", + "type": "simple table", + "table path": "main.restaurant", + "unique properties": [ + "id_" + ], + "properties": [ + { + "name": "id_", + "type": "table column", + "column name": "id", + "data type": "numeric", + "description": "Unique identifier for each restaurant", + "sample values": [1, 2, 3], + "synonyms": ["identifier"] + }, + { + "name": "name", + "type": "table column", + "column name": "name", + "data type": "string", + "description": "The name of the restaurant", + "sample values": ["The Pasta House", "The Burger Joint", "The Seafood Shack"], + "synonyms": ["restaurant"] + }, + { + "name": "food_type", + "type": "table column", + "column name": "food_type", + "data type": "string", + "description": "The type of food served at the restaurant", + "sample values": ["Seafood", "American", "Japanese"], + "synonyms": ["specialty", "menu type"] + }, + { + "name": "city_name", + "type": "table column", + "column name": "city_name", + "data type": "string", + "description": "The city where the restaurant is located", + "sample values": ["San Francisco", "New York", "Miami"], + "synonyms": ["locality", "town"] + }, + { + "name": "rating", + "type": "table column", + "column name": "rating", + "data type": "numeric", + "description": "The rating of the restaurant on a scale of 0 to 5", + "sample values": [4.2, 3.9, 4.5], + "synonyms": ["score", "review"] + } + ], + "description": "Contains the information of the restaurants", + "synonyms": ["diner"] + } + ], + "relationships": [ + { + "type": "simple join", + "name": "restaurant_locations", + "parent collection": "cities", + "child collection": "locations", + "singular": false, + "always matches": false, + "keys": { + "city_name": [ + "city_name" + ] + }, + "description": "All restaurant locations within the city", + "synonyms": ["restaurant locations"] + }, + { + "type": "reverse", + "name": "city", + "original parent": "cities", + "original property": "restaurant_locations", + "singular": true, + "always matches": false, + "description": "The geographic information for the city that the location belongs to", + "synonyms": ["geography", "town"] + }, + { + "type": "simple join", + "name": "location", + "parent collection": "restaurants", + "child collection": "locations", + "singular": true, + "always matches": true, + "keys": { + "id_": [ + "restaurant_id" + ] + }, + "description": "The location of this restaurant", + "synonyms": ["place", "diner"] + }, + { + "type": "reverse", + "name": "restaurant", + "original parent": "restaurants", + "original property": "location", + "singular": true, + "always matches": true, + "description": "The restaurant related to this location", + "synonyms": ["franchise", "chain"] + }, + { + "type": "simple join", + "name": "restaurants", + "parent collection": "cities", + "child collection": "restaurants", + "singular": false, + "always matches": false, + "keys": { + "city_name": [ + "city_name" + ] + }, + "description": "The restaurants located in this geographic territory", + "synonyms": ["diners"] + }, + { + "type": "reverse", + "name": "city", + "original parent": "cities", + "original property": "restaurants", + "singular": true, + "always matches": true, + "description": "The geographic territory related to this restaurant", + "synonyms": ["location", "territory"] + } + ] + } +] diff --git a/tests/test_pipeline_trino.py b/tests/test_pipeline_trino.py new file mode 100644 index 000000000..5fbd88f2b --- /dev/null +++ b/tests/test_pipeline_trino.py @@ -0,0 +1,164 @@ +""" +Integration tests for the PyDough workflow on various queries using Trino. +""" + +# ruff: noqa +# mypy: ignore-errors +# ruff & mypy should not try to typecheck or verify any of this + +import trino +import pytest +from pydough.configs import PyDoughConfigs +from pydough.database_connectors import DatabaseContext +from tests.test_pydough_functions.tpch_outputs import ( + tpch_q16_output, +) +from tests.test_pydough_functions.tpch_test_functions import ( + impl_tpch_q16, +) + +from tests.test_pydough_functions.simple_pydough_functions import week_offset + +from tests.testing_utilities import ( + graph_fetcher, + PyDoughSQLComparisonTest, +) + +from .conftest import tpch_custom_test_data_dialect_replacements + +from .test_pipeline_defog_custom import defog_custom_pipeline_test_data +from .test_pipeline_defog import defog_pipeline_test_data +from .test_pipeline_custom_datasets import custom_datasets_test_data # noqa +from .test_pipeline_tpch_custom import tpch_custom_pipeline_test_data # noqa + +from .testing_utilities import PyDoughPandasTest + + +@pytest.mark.trino +@pytest.mark.execute +def test_pipeline_e2e_tpch_trino_conn( + tpch_pipeline_test_data: PyDoughPandasTest, + get_trino_graphs: graph_fetcher, + trino_conn_db_context: DatabaseContext, +): + """ + Test executing the TPC-H queries from the original code generation, + with Trino as the executing database. + Using the `connection` as keyword argument to the DatabaseContext. + """ + tpch_pipeline_test_data.run_e2e_test( + get_trino_graphs, + trino_conn_db_context, + coerce_types=True, + ) + + +@pytest.mark.trino +@pytest.mark.execute +def test_pipeline_e2e_tpch_trino_params( + get_trino_graphs: graph_fetcher, + trino_params_tpch_db_context: DatabaseContext, +): + """ + Test executing the TPC-H queries from the original code generation, + with Trino as the executing database. Using `host`, `port`, + `user`, `catalog`, `schema`, and `warehouse` as keyword arguments to the + DatabaseContext. Only tests using TPC-H query 16, since the rest of the + tests are already covered with the trino connection test. + """ + test_data: PyDoughPandasTest = PyDoughPandasTest( + impl_tpch_q16, + "TPCH", + tpch_q16_output, + "tpch_q16_params", + ) + test_data.run_e2e_test( + get_trino_graphs, trino_params_tpch_db_context, coerce_types=True + ) + + +@pytest.mark.trino +@pytest.mark.execute +def test_pipeline_e2e_trino_tpch_custom( + tpch_custom_pipeline_test_data: PyDoughPandasTest, # noqa: F811 + get_trino_graphs: graph_fetcher, + trino_conn_db_context: DatabaseContext, +): + """ + Test executing the TPC-H custom queries from the original code generation on + Trino. + """ + tpch_custom_pipeline_test_data = tpch_custom_test_data_dialect_replacements( + trino_conn_db_context.dialect, + tpch_custom_pipeline_test_data, + ) + + tpch_custom_pipeline_test_data.run_e2e_test( + get_trino_graphs, + trino_conn_db_context, + coerce_types=True, + ) + + +@pytest.mark.trino +@pytest.mark.execute +def test_defog_e2e( + defog_pipeline_test_data: PyDoughSQLComparisonTest, + get_trino_graphs: graph_fetcher, + trino_conn_db_context: DatabaseContext, + defog_config: PyDoughConfigs, + sqlite_defog_connection: DatabaseContext, +) -> None: + """ + Test executing the defog analytical questions on the sqlite database, + comparing against the result of running the reference SQL query text on the + same database connector. Run on the defog.ai queries. + NOTE: passing SQLite connection as reference database so that refsol + is executed using SQLite. + This is needed because refsol uses SQLite SQL syntax to obtain + the correct results. + """ + defog_pipeline_test_data.run_e2e_test( + get_trino_graphs, + trino_conn_db_context, + defog_config, + reference_database=sqlite_defog_connection, + coerce_types=True, + ) + + +@pytest.mark.trino +@pytest.mark.execute +def test_pipeline_trino_e2e_defog_custom( + defog_custom_pipeline_test_data: PyDoughPandasTest, + get_trino_graphs: graph_fetcher, + defog_config: PyDoughConfigs, + trino_conn_db_context: DatabaseContext, +): + """ + Test executing the defog analytical queries with Trino database. + """ + defog_custom_pipeline_test_data.run_e2e_test( + get_trino_graphs, + trino_conn_db_context, + config=defog_config, + coerce_types=True, + ) + + +@pytest.mark.trino +@pytest.mark.execute +def test_pipeline_e2e_trino_custom_datasets( + custom_datasets_test_data: PyDoughPandasTest, # noqa: F811 + get_custom_datasets_graph: graph_fetcher, + trino_conn_db_context: DatabaseContext, +): + """ + Test executing the the custom queries with the custom datasets against the + refsol DataFrame. + """ + custom_datasets_test_data.run_e2e_test( + get_custom_datasets_graph, + trino_conn_db_context, + coerce_types=True, + ) diff --git a/tests/test_sql_refsols/agg_partition_trino.sql b/tests/test_sql_refsols/agg_partition_trino.sql new file mode 100644 index 000000000..69276ccdd --- /dev/null +++ b/tests/test_sql_refsols/agg_partition_trino.sql @@ -0,0 +1,10 @@ +WITH _t0 AS ( + SELECT + COUNT(*) AS n_rows + FROM tpch.orders + GROUP BY + YEAR(CAST(o_orderdate AS TIMESTAMP)) +) +SELECT + MAX(n_rows) AS best_year +FROM _t0 diff --git a/tests/test_sql_refsols/agg_simplification_1_trino.sql b/tests/test_sql_refsols/agg_simplification_1_trino.sql new file mode 100644 index 000000000..9264d86e7 --- /dev/null +++ b/tests/test_sql_refsols/agg_simplification_1_trino.sql @@ -0,0 +1,237 @@ +WITH _t1 AS ( + SELECT + sbtickerexchange, + CASE + WHEN ABS( + ( + ROW_NUMBER() OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) ORDER BY '1') - 1.0 + ) - ( + CAST(( + COUNT(1) OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca'))) - 1.0 + ) AS DOUBLE) / 2.0 + ) + ) < 1.0 + THEN 1 + ELSE NULL + END AS expr_72, + CASE + WHEN ABS( + ( + ROW_NUMBER() OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) ORDER BY '1') - 1.0 + ) - ( + CAST(( + COUNT(2) OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca'))) - 1.0 + ) AS DOUBLE) / 2.0 + ) + ) < 1.0 + THEN 2 + ELSE NULL + END AS expr_73, + CASE + WHEN ABS( + ( + ROW_NUMBER() OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) ORDER BY '1') - 1.0 + ) - ( + CAST(( + COUNT(-1) OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca'))) - 1.0 + ) AS DOUBLE) / 2.0 + ) + ) < 1.0 + THEN -1 + ELSE NULL + END AS expr_74, + CASE + WHEN ABS( + ( + ROW_NUMBER() OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) ORDER BY '1') - 1.0 + ) - ( + CAST(( + COUNT(-3) OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca'))) - 1.0 + ) AS DOUBLE) / 2.0 + ) + ) < 1.0 + THEN -3 + ELSE NULL + END AS expr_75, + CASE + WHEN ABS( + ( + ROW_NUMBER() OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) ORDER BY '1') - 1.0 + ) - ( + CAST(( + COUNT(0) OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca'))) - 1.0 + ) AS DOUBLE) / 2.0 + ) + ) < 1.0 + THEN 0 + ELSE NULL + END AS expr_76, + CASE + WHEN ABS( + ( + ROW_NUMBER() OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) ORDER BY '1') - 1.0 + ) - ( + CAST(( + COUNT(0.5) OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca'))) - 1.0 + ) AS DOUBLE) / 2.0 + ) + ) < 1.0 + THEN 0.5 + ELSE NULL + END AS expr_77, + CASE + WHEN ABS( + ( + ROW_NUMBER() OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) ORDER BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) DESC) - 1.0 + ) - ( + CAST(( + COUNT(LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca'))) OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca'))) - 1.0 + ) AS DOUBLE) / 2.0 + ) + ) < 1.0 + THEN LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) + ELSE NULL + END AS expr_79, + CASE + WHEN FLOOR(0.9 * COUNT(1) OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')))) < ROW_NUMBER() OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) ORDER BY '1') + THEN 1 + ELSE NULL + END AS expr_80, + CASE + WHEN FLOOR(0.8 * COUNT(2) OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')))) < ROW_NUMBER() OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) ORDER BY '1') + THEN 2 + ELSE NULL + END AS expr_81, + CASE + WHEN FLOOR( + 0.7 * COUNT(-1) OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca'))) + ) < ROW_NUMBER() OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) ORDER BY '1') + THEN -1 + ELSE NULL + END AS expr_82, + CASE + WHEN FLOOR( + 0.6 * COUNT(-3) OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca'))) + ) < ROW_NUMBER() OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) ORDER BY '1') + THEN -3 + ELSE NULL + END AS expr_83, + CASE + WHEN FLOOR(0.5 * COUNT(0) OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')))) < ROW_NUMBER() OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) ORDER BY '1') + THEN 0 + ELSE NULL + END AS expr_84, + CASE + WHEN FLOOR( + 0.4 * COUNT(0.5) OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca'))) + ) < ROW_NUMBER() OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) ORDER BY '1') + THEN 0.5 + ELSE NULL + END AS expr_85, + CASE + WHEN FLOOR( + 0.2 * COUNT(LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca'))) OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca'))) + ) < ROW_NUMBER() OVER (PARTITION BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) ORDER BY LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) DESC) + THEN LENGTH(NULLIF(sbtickerexchange, 'NYSE Arca')) + ELSE NULL + END AS expr_87 + FROM main.sbticker +), _t0 AS ( + SELECT + LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) AS aug_exchange, + AVG(expr_72) AS avg_expr_72, + AVG(expr_73) AS avg_expr_73, + AVG(expr_74) AS avg_expr_74, + AVG(expr_75) AS avg_expr_75, + AVG(expr_76) AS avg_expr_76, + AVG(expr_77) AS avg_expr_77, + AVG(expr_79) AS avg_expr_79, + MAX(expr_80) AS max_expr_80, + MAX(expr_81) AS max_expr_81, + MAX(expr_82) AS max_expr_82, + MAX(expr_83) AS max_expr_83, + MAX(expr_84) AS max_expr_84, + MAX(expr_85) AS max_expr_85, + MAX(expr_87) AS max_expr_87, + COUNT(*) AS n_rows + FROM _t1 + GROUP BY + 1 +) +SELECT + aug_exchange, + n_rows AS su1, + n_rows * 2 AS su2, + n_rows * -1 AS su3, + n_rows * -3 AS su4, + 0 AS su5, + n_rows * 0.5 AS su6, + 0 AS su7, + COALESCE(aug_exchange, 0) AS su8, + n_rows AS co1, + n_rows AS co2, + n_rows AS co3, + n_rows AS co4, + n_rows AS co5, + n_rows AS co6, + 0 AS co7, + n_rows * IF(NOT aug_exchange IS NULL, 1, 0) AS co8, + 1 AS nd1, + 1 AS nd2, + 1 AS nd3, + 1 AS nd4, + 1 AS nd5, + 1 AS nd6, + 0 AS nd7, + CAST(NOT aug_exchange IS NULL AS BIGINT) AS nd8, + 1 AS av1, + 2 AS av2, + -1 AS av3, + -3 AS av4, + 0 AS av5, + 0.5 AS av6, + NULL AS av7, + aug_exchange AS av8, + 1 AS mi1, + 2 AS mi2, + -1 AS mi3, + -3 AS mi4, + 0 AS mi5, + 0.5 AS mi6, + NULL AS mi7, + aug_exchange AS mi8, + 1 AS ma1, + 2 AS ma2, + -1 AS ma3, + -3 AS ma4, + 0 AS ma5, + 0.5 AS ma6, + NULL AS ma7, + aug_exchange AS ma8, + 1 AS an1, + 2 AS an2, + -1 AS an3, + -3 AS an4, + 0 AS an5, + 0.5 AS an6, + NULL AS an7, + aug_exchange AS an8, + avg_expr_72 AS me1, + avg_expr_73 AS me2, + avg_expr_74 AS me3, + avg_expr_75 AS me4, + avg_expr_76 AS me5, + avg_expr_77 AS me6, + NULL AS me7, + avg_expr_79 AS me8, + max_expr_80 AS qu1, + max_expr_81 AS qu2, + max_expr_82 AS qu3, + max_expr_83 AS qu4, + max_expr_84 AS qu5, + max_expr_85 AS qu6, + NULL AS qu7, + max_expr_87 AS qu8 +FROM _t0 +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/agg_simplification_2_trino.sql b/tests/test_sql_refsols/agg_simplification_2_trino.sql new file mode 100644 index 000000000..09e1bffec --- /dev/null +++ b/tests/test_sql_refsols/agg_simplification_2_trino.sql @@ -0,0 +1,16 @@ +SELECT + sbcuststate AS state, + COUNT(DISTINCT sbcustcity) AS a1, + COUNT(*) AS a2, + COUNT(CASE WHEN STARTS_WITH(LOWER(sbcustname), 'j') THEN sbcustname ELSE NULL END) AS a3, + COALESCE(SUM(CAST(sbcustpostalcode AS BIGINT)), 0) AS a4, + MIN(sbcustphone) AS a5, + MAX(sbcustphone) AS a6, + ARBITRARY(LOWER(sbcuststate)) AS a7, + ARBITRARY(LOWER(sbcuststate)) AS a8, + ARBITRARY(LOWER(sbcuststate)) AS a9 +FROM main.sbcustomer +GROUP BY + 1 +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/aggregation_analytics_1_trino.sql b/tests/test_sql_refsols/aggregation_analytics_1_trino.sql new file mode 100644 index 000000000..bc8478236 --- /dev/null +++ b/tests/test_sql_refsols/aggregation_analytics_1_trino.sql @@ -0,0 +1,45 @@ +WITH _t1 AS ( + SELECT + s_name, + s_suppkey + FROM tpch.supplier + WHERE + s_name = 'Supplier#000009450' +), _s11 AS ( + SELECT + partsupp.ps_partkey, + partsupp.ps_suppkey, + SUM( + lineitem.l_extendedprice * ( + 1 - lineitem.l_discount + ) * ( + 1 - lineitem.l_tax + ) - lineitem.l_quantity * partsupp.ps_supplycost + ) AS sum_revenue + FROM tpch.partsupp AS partsupp + JOIN _t1 AS _t4 + ON _t4.s_suppkey = partsupp.ps_suppkey + JOIN tpch.part AS part + ON STARTS_WITH(part.p_container, 'LG') AND part.p_partkey = partsupp.ps_partkey + JOIN tpch.lineitem AS lineitem + ON YEAR(CAST(lineitem.l_shipdate AS TIMESTAMP)) IN (1995, 1996) + AND lineitem.l_partkey = partsupp.ps_partkey + AND lineitem.l_suppkey = partsupp.ps_suppkey + GROUP BY + 1, + 2 +) +SELECT + part.p_name AS part_name, + ROUND(COALESCE(_s11.sum_revenue, 0), 2) AS revenue_generated +FROM tpch.partsupp AS partsupp +JOIN _t1 AS _t1 + ON _t1.s_suppkey = partsupp.ps_suppkey +JOIN tpch.part AS part + ON STARTS_WITH(part.p_container, 'LG') AND part.p_partkey = partsupp.ps_partkey +LEFT JOIN _s11 AS _s11 + ON _s11.ps_partkey = partsupp.ps_partkey AND _s11.ps_suppkey = partsupp.ps_suppkey +ORDER BY + 2 NULLS FIRST, + 1 NULLS FIRST +LIMIT 8 diff --git a/tests/test_sql_refsols/aggregation_analytics_2_trino.sql b/tests/test_sql_refsols/aggregation_analytics_2_trino.sql new file mode 100644 index 000000000..b1175910e --- /dev/null +++ b/tests/test_sql_refsols/aggregation_analytics_2_trino.sql @@ -0,0 +1,34 @@ +WITH _s6 AS ( + SELECT + partsupp.ps_partkey, + SUM( + lineitem.l_extendedprice * ( + 1 - lineitem.l_discount + ) * ( + 1 - lineitem.l_tax + ) - lineitem.l_quantity * partsupp.ps_supplycost + ) AS sum_revenue + FROM tpch.partsupp AS partsupp + JOIN tpch.supplier AS supplier + ON partsupp.ps_suppkey = supplier.s_suppkey + AND supplier.s_name = 'Supplier#000000182' + JOIN tpch.part AS part + ON STARTS_WITH(part.p_container, 'SM') AND part.p_partkey = partsupp.ps_partkey + JOIN tpch.lineitem AS lineitem + ON YEAR(CAST(lineitem.l_shipdate AS TIMESTAMP)) IN (1995, 1996) + AND lineitem.l_partkey = partsupp.ps_partkey + AND lineitem.l_suppkey = partsupp.ps_suppkey + GROUP BY + partsupp.ps_suppkey, + 1 +) +SELECT + part.p_name AS part_name, + ROUND(COALESCE(_s6.sum_revenue, 0), 2) AS revenue_generated +FROM _s6 AS _s6 +JOIN tpch.part AS part + ON _s6.ps_partkey = part.p_partkey +ORDER BY + 2 NULLS FIRST, + 1 NULLS FIRST +LIMIT 4 diff --git a/tests/test_sql_refsols/aggregation_analytics_3_trino.sql b/tests/test_sql_refsols/aggregation_analytics_3_trino.sql new file mode 100644 index 000000000..74804a6cb --- /dev/null +++ b/tests/test_sql_refsols/aggregation_analytics_3_trino.sql @@ -0,0 +1,35 @@ +WITH _s6 AS ( + SELECT + partsupp.ps_partkey, + SUM(lineitem.l_quantity) AS sum_l_quantity, + SUM( + lineitem.l_extendedprice * ( + 1 - lineitem.l_discount + ) * ( + 1 - lineitem.l_tax + ) - lineitem.l_quantity * partsupp.ps_supplycost + ) AS sum_revenue + FROM tpch.partsupp AS partsupp + JOIN tpch.supplier AS supplier + ON partsupp.ps_suppkey = supplier.s_suppkey + AND supplier.s_name = 'Supplier#000000182' + JOIN tpch.part AS part + ON STARTS_WITH(part.p_container, 'MED') AND part.p_partkey = partsupp.ps_partkey + JOIN tpch.lineitem AS lineitem + ON YEAR(CAST(lineitem.l_shipdate AS TIMESTAMP)) = 1994 + AND lineitem.l_partkey = partsupp.ps_partkey + AND lineitem.l_suppkey = partsupp.ps_suppkey + GROUP BY + partsupp.ps_suppkey, + 1 +) +SELECT + part.p_name AS part_name, + ROUND(CAST(COALESCE(_s6.sum_revenue, 0) AS DOUBLE) / COALESCE(_s6.sum_l_quantity, 0), 2) AS revenue_ratio +FROM _s6 AS _s6 +JOIN tpch.part AS part + ON _s6.ps_partkey = part.p_partkey +ORDER BY + 2 NULLS FIRST, + 1 NULLS FIRST +LIMIT 3 diff --git a/tests/test_sql_refsols/aggregation_functions_trino.sql b/tests/test_sql_refsols/aggregation_functions_trino.sql new file mode 100644 index 000000000..506afddba --- /dev/null +++ b/tests/test_sql_refsols/aggregation_functions_trino.sql @@ -0,0 +1,70 @@ +WITH _s1 AS ( + SELECT + o_custkey, + COUNT(*) AS n_rows + FROM tpch.orders + GROUP BY + 1 +), _t2 AS ( + SELECT + customer.c_acctbal, + customer.c_nationkey, + _s1.n_rows, + CASE + WHEN ABS( + ( + ROW_NUMBER() OVER (PARTITION BY customer.c_nationkey ORDER BY customer.c_acctbal DESC) - 1.0 + ) - ( + CAST(( + COUNT(customer.c_acctbal) OVER (PARTITION BY customer.c_nationkey) - 1.0 + ) AS DOUBLE) / 2.0 + ) + ) < 1.0 + THEN customer.c_acctbal + ELSE NULL + END AS expr_17, + CASE + WHEN FLOOR(0.2 * COUNT(customer.c_acctbal) OVER (PARTITION BY customer.c_nationkey)) < ROW_NUMBER() OVER (PARTITION BY customer.c_nationkey ORDER BY customer.c_acctbal DESC) + THEN customer.c_acctbal + ELSE NULL + END AS expr_18 + FROM tpch.customer AS customer + LEFT JOIN _s1 AS _s1 + ON _s1.o_custkey = customer.c_custkey +), _t1 AS ( + SELECT + ARBITRARY(c_acctbal) AS anything_c_acctbal, + AVG(c_acctbal) AS avg_c_acctbal, + AVG(expr_17) AS avg_expr_17, + COUNT(c_acctbal) AS count_c_acctbal, + MAX(c_acctbal) AS max_c_acctbal, + MAX(expr_18) AS max_expr_18, + MIN(c_acctbal) AS min_c_acctbal, + COUNT(DISTINCT c_acctbal) AS ndistinct_c_acctbal, + STDDEV_POP(c_acctbal) AS population_std_c_acctbal, + VAR_POP(c_acctbal) AS population_var_c_acctbal, + STDDEV(c_acctbal) AS sample_std_c_acctbal, + VARIANCE(c_acctbal) AS sample_var_c_acctbal, + SUM(c_acctbal) AS sum_c_acctbal, + SUM(n_rows) AS sum_n_rows + FROM _t2 + GROUP BY + c_nationkey +) +SELECT + COALESCE(sum_c_acctbal, 0) AS sum_value, + avg_c_acctbal AS avg_value, + avg_expr_17 AS median_value, + min_c_acctbal AS min_value, + max_c_acctbal AS max_value, + max_expr_18 AS quantile_value, + anything_c_acctbal AS anything_value, + count_c_acctbal AS count_value, + ndistinct_c_acctbal AS count_distinct_value, + sample_var_c_acctbal AS variance_s_value, + population_var_c_acctbal AS variance_p_value, + sample_std_c_acctbal AS stddev_s_value, + population_std_c_acctbal AS stddev_p_value +FROM _t1 +WHERE + sum_n_rows = 0 OR sum_n_rows IS NULL diff --git a/tests/test_sql_refsols/alternative_quarter_cum_ir_analysis_trino.sql b/tests/test_sql_refsols/alternative_quarter_cum_ir_analysis_trino.sql new file mode 100644 index 000000000..b2a2d7d3c --- /dev/null +++ b/tests/test_sql_refsols/alternative_quarter_cum_ir_analysis_trino.sql @@ -0,0 +1,78 @@ +WITH _s0 AS ( + SELECT + ca_dt + FROM main.calendar +), _t2 AS ( + SELECT + pr_name, + pr_release + FROM main.products + WHERE + pr_name = 'RubyCopper-Star' +), _s12 AS ( + SELECT DISTINCT + DATE_TRUNC('QUARTER', CAST(_s0.ca_dt AS TIMESTAMP)) AS quarter + FROM _s0 AS _s0 + JOIN _t2 AS _t2 + ON _s0.ca_dt < DATE_TRUNC('QUARTER', DATE_ADD('YEAR', 2, CAST(_t2.pr_release AS TIMESTAMP))) + AND _s0.ca_dt >= _t2.pr_release +), _t5 AS ( + SELECT + pr_id, + pr_name + FROM main.products + WHERE + pr_name = 'RubyCopper-Star' +), _s9 AS ( + SELECT + countries.co_id, + _t5.pr_id + FROM _t5 AS _t5 + JOIN main.countries AS countries + ON countries.co_name = 'CN' +), _s13 AS ( + SELECT + DATE_TRUNC('QUARTER', CAST(_s2.ca_dt AS TIMESTAMP)) AS quarter, + COUNT(DISTINCT incidents.in_device_id) AS ndistinct_in_device_id + FROM _s0 AS _s2 + JOIN _t2 AS _t4 + ON _s2.ca_dt < DATE_TRUNC('QUARTER', DATE_ADD('YEAR', 2, CAST(_t4.pr_release AS TIMESTAMP))) + AND _s2.ca_dt >= _t4.pr_release + JOIN main.incidents AS incidents + ON _s2.ca_dt = DATE_TRUNC('DAY', CAST(incidents.in_error_report_ts AS TIMESTAMP)) + JOIN _s9 AS _s9 + ON _s9.co_id = incidents.in_repair_country_id + JOIN main.devices AS devices + ON _s9.pr_id = devices.de_product_id AND devices.de_id = incidents.in_device_id + GROUP BY + 1 +), _s21 AS ( + SELECT + DATE_TRUNC('QUARTER', CAST(_s14.ca_dt AS TIMESTAMP)) AS quarter, + COUNT(*) AS n_rows + FROM _s0 AS _s14 + JOIN _t2 AS _t8 + ON _s14.ca_dt < DATE_TRUNC('QUARTER', DATE_ADD('YEAR', 2, CAST(_t8.pr_release AS TIMESTAMP))) + AND _s14.ca_dt >= _t8.pr_release + JOIN main.devices AS devices + ON _s14.ca_dt = DATE_TRUNC('DAY', CAST(devices.de_purchase_ts AS TIMESTAMP)) + JOIN _t5 AS _t9 + ON _t9.pr_id = devices.de_product_id + GROUP BY + 1 +) +SELECT + _s12.quarter, + COALESCE(_s13.ndistinct_in_device_id, 0) AS n_incidents, + COALESCE(_s21.n_rows, 0) AS n_sold, + ROUND( + CAST(SUM(COALESCE(_s13.ndistinct_in_device_id, 0)) OVER (ORDER BY _s12.quarter ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS DOUBLE) / SUM(COALESCE(_s21.n_rows, 0)) OVER (ORDER BY _s12.quarter ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), + 2 + ) AS quarter_cum +FROM _s12 AS _s12 +LEFT JOIN _s13 AS _s13 + ON _s12.quarter = _s13.quarter +LEFT JOIN _s21 AS _s21 + ON _s12.quarter = _s21.quarter +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/arithmetic_and_binary_operators_trino.sql b/tests/test_sql_refsols/arithmetic_and_binary_operators_trino.sql new file mode 100644 index 000000000..55b545339 --- /dev/null +++ b/tests/test_sql_refsols/arithmetic_and_binary_operators_trino.sql @@ -0,0 +1,16 @@ +SELECT + CAST(( + lineitem.l_extendedprice * ( + 1 - ( + POWER(lineitem.l_discount, 2) + ) + ) + 1.0 + ) AS DOUBLE) / part.p_retailprice AS computed_value, + lineitem.l_quantity + lineitem.l_extendedprice AS total, + lineitem.l_extendedprice - lineitem.l_quantity AS delta, + lineitem.l_quantity * lineitem.l_discount AS product, + CAST(lineitem.l_extendedprice AS DOUBLE) / lineitem.l_quantity AS ratio, + POWER(lineitem.l_discount, 2) AS exponent +FROM tpch.lineitem AS lineitem +JOIN tpch.part AS part + ON lineitem.l_partkey = part.p_partkey diff --git a/tests/test_sql_refsols/avg_acctbal_wo_debt_trino.sql b/tests/test_sql_refsols/avg_acctbal_wo_debt_trino.sql new file mode 100644 index 000000000..8f341c0e8 --- /dev/null +++ b/tests/test_sql_refsols/avg_acctbal_wo_debt_trino.sql @@ -0,0 +1,25 @@ +WITH _s1 AS ( + SELECT + c_nationkey, + SUM(GREATEST(c_acctbal, 0)) AS expr_0, + COUNT(GREATEST(c_acctbal, 0)) AS expr_1_0 + FROM tpch.customer + GROUP BY + 1 +), _s3 AS ( + SELECT + nation.n_regionkey, + SUM(_s1.expr_0) AS sum_expr, + SUM(_s1.expr_1_0) AS sum_expr_1 + FROM tpch.nation AS nation + JOIN _s1 AS _s1 + ON _s1.c_nationkey = nation.n_nationkey + GROUP BY + 1 +) +SELECT + region.r_name AS region_name, + CAST(_s3.sum_expr AS DOUBLE) / _s3.sum_expr_1 AS avg_bal_without_debt_erasure +FROM tpch.region AS region +JOIN _s3 AS _s3 + ON _s3.n_regionkey = region.r_regionkey diff --git a/tests/test_sql_refsols/avg_gap_prev_urgent_same_clerk_trino.sql b/tests/test_sql_refsols/avg_gap_prev_urgent_same_clerk_trino.sql new file mode 100644 index 000000000..83ab6c7e2 --- /dev/null +++ b/tests/test_sql_refsols/avg_gap_prev_urgent_same_clerk_trino.sql @@ -0,0 +1,14 @@ +WITH _t0 AS ( + SELECT + DATE_DIFF( + 'DAY', + CAST(LAG(o_orderdate, 1) OVER (PARTITION BY o_clerk ORDER BY o_orderdate) AS TIMESTAMP), + CAST(o_orderdate AS TIMESTAMP) + ) AS delta + FROM tpch.orders + WHERE + o_orderpriority = '1-URGENT' +) +SELECT + AVG(delta) AS avg_delta +FROM _t0 diff --git a/tests/test_sql_refsols/avg_order_diff_per_customer_trino.sql b/tests/test_sql_refsols/avg_order_diff_per_customer_trino.sql new file mode 100644 index 000000000..8d74993fa --- /dev/null +++ b/tests/test_sql_refsols/avg_order_diff_per_customer_trino.sql @@ -0,0 +1,24 @@ +WITH _t1 AS ( + SELECT + customer.c_name, + orders.o_custkey, + DATE_DIFF( + 'DAY', + CAST(LAG(orders.o_orderdate, 1) OVER (PARTITION BY orders.o_custkey ORDER BY orders.o_orderdate) AS TIMESTAMP), + CAST(orders.o_orderdate AS TIMESTAMP) + ) AS day_diff + FROM tpch.customer AS customer + JOIN tpch.nation AS nation + ON customer.c_nationkey = nation.n_nationkey AND nation.n_name = 'JAPAN' + JOIN tpch.orders AS orders + ON customer.c_custkey = orders.o_custkey AND orders.o_orderpriority = '1-URGENT' +) +SELECT + ARBITRARY(c_name) AS name, + AVG(day_diff) AS avg_diff +FROM _t1 +GROUP BY + o_custkey +ORDER BY + 2 DESC +LIMIT 5 diff --git a/tests/test_sql_refsols/bad_child_reuse_1_trino.sql b/tests/test_sql_refsols/bad_child_reuse_1_trino.sql new file mode 100644 index 000000000..8bc75a9f4 --- /dev/null +++ b/tests/test_sql_refsols/bad_child_reuse_1_trino.sql @@ -0,0 +1,27 @@ +WITH _s1 AS ( + SELECT + o_custkey, + COUNT(*) AS n_rows + FROM tpch.orders + GROUP BY + 1 +), _t1 AS ( + SELECT + customer.c_custkey, + _s1.n_rows + FROM tpch.customer AS customer + LEFT JOIN _s1 AS _s1 + ON _s1.o_custkey = customer.c_custkey + ORDER BY + customer.c_acctbal DESC, + COALESCE(_s1.n_rows, 0) NULLS FIRST + LIMIT 10 +) +SELECT + c_custkey AS cust_key, + n_rows AS n_orders +FROM _t1 +WHERE + n_rows <> 0 +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/bad_child_reuse_2_trino.sql b/tests/test_sql_refsols/bad_child_reuse_2_trino.sql new file mode 100644 index 000000000..551b95028 --- /dev/null +++ b/tests/test_sql_refsols/bad_child_reuse_2_trino.sql @@ -0,0 +1,27 @@ +WITH _s1 AS ( + SELECT + o_custkey, + COUNT(*) AS n_rows + FROM tpch.orders + GROUP BY + 1 +), _t1 AS ( + SELECT + customer.c_acctbal, + customer.c_custkey, + _s1.n_rows, + COUNT(*) OVER (PARTITION BY customer.c_nationkey) AS n_cust + FROM tpch.customer AS customer + LEFT JOIN _s1 AS _s1 + ON _s1.o_custkey = customer.c_custkey +) +SELECT + c_custkey AS cust_key, + n_rows AS n_orders, + n_cust +FROM _t1 +WHERE + n_rows <> 0 +ORDER BY + c_acctbal DESC +LIMIT 10 diff --git a/tests/test_sql_refsols/bad_child_reuse_3_trino.sql b/tests/test_sql_refsols/bad_child_reuse_3_trino.sql new file mode 100644 index 000000000..551b95028 --- /dev/null +++ b/tests/test_sql_refsols/bad_child_reuse_3_trino.sql @@ -0,0 +1,27 @@ +WITH _s1 AS ( + SELECT + o_custkey, + COUNT(*) AS n_rows + FROM tpch.orders + GROUP BY + 1 +), _t1 AS ( + SELECT + customer.c_acctbal, + customer.c_custkey, + _s1.n_rows, + COUNT(*) OVER (PARTITION BY customer.c_nationkey) AS n_cust + FROM tpch.customer AS customer + LEFT JOIN _s1 AS _s1 + ON _s1.o_custkey = customer.c_custkey +) +SELECT + c_custkey AS cust_key, + n_rows AS n_orders, + n_cust +FROM _t1 +WHERE + n_rows <> 0 +ORDER BY + c_acctbal DESC +LIMIT 10 diff --git a/tests/test_sql_refsols/bad_child_reuse_4_trino.sql b/tests/test_sql_refsols/bad_child_reuse_4_trino.sql new file mode 100644 index 000000000..683b8202c --- /dev/null +++ b/tests/test_sql_refsols/bad_child_reuse_4_trino.sql @@ -0,0 +1,26 @@ +WITH _s1 AS ( + SELECT + o_custkey, + COUNT(*) AS n_rows + FROM tpch.orders + GROUP BY + 1 +), _t AS ( + SELECT + customer.c_acctbal, + customer.c_custkey, + _s1.n_rows, + AVG(CAST(COALESCE(_s1.n_rows, 0) AS DOUBLE)) OVER (PARTITION BY customer.c_nationkey) AS _w + FROM tpch.customer AS customer + LEFT JOIN _s1 AS _s1 + ON _s1.o_custkey = customer.c_custkey +) +SELECT + c_custkey AS cust_key, + n_rows AS n_orders +FROM _t +WHERE + _w > COALESCE(n_rows, 0) AND n_rows <> 0 +ORDER BY + c_acctbal DESC +LIMIT 10 diff --git a/tests/test_sql_refsols/bad_child_reuse_5_trino.sql b/tests/test_sql_refsols/bad_child_reuse_5_trino.sql new file mode 100644 index 000000000..bd396316f --- /dev/null +++ b/tests/test_sql_refsols/bad_child_reuse_5_trino.sql @@ -0,0 +1,41 @@ +WITH _t2 AS ( + SELECT + o_custkey + FROM tpch.orders +), _s1 AS ( + SELECT + o_custkey, + COUNT(*) AS n_rows + FROM _t2 + GROUP BY + 1 +), _s2 AS ( + SELECT + customer.c_acctbal, + customer.c_custkey, + _s1.n_rows + FROM tpch.customer AS customer + LEFT JOIN _s1 AS _s1 + ON _s1.o_custkey = customer.c_custkey + ORDER BY + 1 DESC, + 2 DESC + LIMIT 10 +), _u_0 AS ( + SELECT + o_custkey AS _u_1 + FROM _t2 + GROUP BY + 1 +) +SELECT + _s2.c_custkey AS cust_key, + COALESCE(_s2.n_rows, 0) AS n_orders +FROM _s2 AS _s2 +LEFT JOIN _u_0 AS _u_0 + ON _s2.c_custkey = _u_0._u_1 +WHERE + _u_0._u_1 IS NULL +ORDER BY + _s2.c_acctbal DESC, + 1 DESC diff --git a/tests/test_sql_refsols/casting_functions_trino.sql b/tests/test_sql_refsols/casting_functions_trino.sql new file mode 100644 index 000000000..8f6903759 --- /dev/null +++ b/tests/test_sql_refsols/casting_functions_trino.sql @@ -0,0 +1,6 @@ +SELECT + DATE_FORMAT(o_orderdate, '%Y-%m-%d') AS cast_to_string, + CAST(o_totalprice AS VARCHAR) AS cast_to_string2, + CAST(o_totalprice AS BIGINT) AS cast_to_integer, + CAST(o_shippriority AS DOUBLE) AS cast_to_float +FROM tpch.orders diff --git a/tests/test_sql_refsols/comparisons_and_logical_operators_trino.sql b/tests/test_sql_refsols/comparisons_and_logical_operators_trino.sql new file mode 100644 index 000000000..b29ed3f2e --- /dev/null +++ b/tests/test_sql_refsols/comparisons_and_logical_operators_trino.sql @@ -0,0 +1,24 @@ +WITH _s1 AS ( + SELECT + o_custkey, + COUNT(*) AS n_rows + FROM tpch.orders + GROUP BY + 1 +) +SELECT + customer.c_acctbal < 0 AS in_debt, + _s1.n_rows <= 12 OR _s1.n_rows IS NULL AS at_most_12_orders, + region.r_name = 'EUROPE' AS is_european, + nation.n_name <> 'GERMANY' AS non_german, + customer.c_acctbal > 0 AS non_empty_acct, + NOT _s1.n_rows IS NULL AND _s1.n_rows >= 5 AS at_least_5_orders, + region.r_name = 'ASIA' OR region.r_name = 'EUROPE' AS is_eurasian, + customer.c_acctbal < 0 AND region.r_name = 'EUROPE' AS is_european_in_debt +FROM tpch.customer AS customer +LEFT JOIN _s1 AS _s1 + ON _s1.o_custkey = customer.c_custkey +JOIN tpch.nation AS nation + ON customer.c_nationkey = nation.n_nationkey +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey diff --git a/tests/test_sql_refsols/conditional_functions_trino.sql b/tests/test_sql_refsols/conditional_functions_trino.sql new file mode 100644 index 000000000..08ff9ddaf --- /dev/null +++ b/tests/test_sql_refsols/conditional_functions_trino.sql @@ -0,0 +1,18 @@ +SELECT + IF(ARBITRARY(customer.c_acctbal) > 1000, 'High', 'Low') AS iff_col, + ARBITRARY(customer.c_name) IN ('Alice', 'Bob', 'Charlie') AS isin_col, + COALESCE(MIN(orders.o_totalprice), 0.0) AS default_val, + NOT MIN(orders.o_totalprice) IS NULL AS has_acct_bal, + MIN(orders.o_totalprice) IS NULL AS no_acct_bal, + CASE + WHEN ARBITRARY(customer.c_acctbal) > 0 + THEN ARBITRARY(customer.c_acctbal) + ELSE NULL + END AS no_debt_bal +FROM tpch.customer AS customer +LEFT JOIN tpch.orders AS orders + ON customer.c_custkey = orders.o_custkey +WHERE + customer.c_acctbal <= 1000 AND customer.c_acctbal >= 100 +GROUP BY + customer.c_custkey diff --git a/tests/test_sql_refsols/country_x_year_analysis_trino.sql b/tests/test_sql_refsols/country_x_year_analysis_trino.sql new file mode 100644 index 000000000..173455ac9 --- /dev/null +++ b/tests/test_sql_refsols/country_x_year_analysis_trino.sql @@ -0,0 +1,63 @@ +WITH _t1 AS ( + SELECT + co_name + FROM main.countries + WHERE + NOT co_name LIKE '%C%' +), _t4 AS ( + SELECT + pr_name, + pr_release + FROM main.products + WHERE + pr_name = 'AmethystCopper-I' +), _s3 AS ( + SELECT + ca_dt + FROM main.calendar +), _s15 AS ( + SELECT + _s7.ca_dt, + _t6.co_name, + COUNT(*) AS n_rows + FROM _t1 AS _t6 + CROSS JOIN _t4 AS _t7 + JOIN _s3 AS _s7 + ON _s7.ca_dt < DATE_ADD('YEAR', 2, CAST(_t7.pr_release AS TIMESTAMP)) + AND _s7.ca_dt >= _t7.pr_release + JOIN main.devices AS devices + ON _s7.ca_dt = DATE_TRUNC('DAY', CAST(devices.de_purchase_ts AS TIMESTAMP)) + JOIN main.products AS products + ON devices.de_product_id = products.pr_id AND products.pr_name = 'AmethystCopper-I' + JOIN main.countries AS countries + ON _t6.co_name = countries.co_name + AND countries.co_id = devices.de_purchase_country_id + GROUP BY + 1, + 2 +), _s17 AS ( + SELECT + DATE_TRUNC('YEAR', CAST(_s3.ca_dt AS TIMESTAMP)) AS start_of_year, + _t3.co_name, + SUM(_s15.n_rows) AS sum_n_rows + FROM _t1 AS _t3 + CROSS JOIN _t4 AS _t4 + JOIN _s3 AS _s3 + ON _s3.ca_dt < DATE_ADD('YEAR', 2, CAST(_t4.pr_release AS TIMESTAMP)) + AND _s3.ca_dt >= _t4.pr_release + LEFT JOIN _s15 AS _s15 + ON _s15.ca_dt = _s3.ca_dt AND _s15.co_name = _t3.co_name + GROUP BY + 1, + 2 +) +SELECT + _t1.co_name AS country_name, + _s17.start_of_year, + COALESCE(_s17.sum_n_rows, 0) AS n_purchases +FROM _t1 AS _t1 +LEFT JOIN _s17 AS _s17 + ON _s17.co_name = _t1.co_name +ORDER BY + 1 NULLS FIRST, + 2 NULLS FIRST diff --git a/tests/test_sql_refsols/country_x_year_combos_trino.sql b/tests/test_sql_refsols/country_x_year_combos_trino.sql new file mode 100644 index 000000000..1e7a894fd --- /dev/null +++ b/tests/test_sql_refsols/country_x_year_combos_trino.sql @@ -0,0 +1,26 @@ +WITH _t1 AS ( + SELECT + co_name + FROM main.countries + WHERE + NOT co_name LIKE '%C%' +), _s5 AS ( + SELECT DISTINCT + DATE_TRUNC('YEAR', CAST(calendar.ca_dt AS TIMESTAMP)) AS start_of_year, + _t3.co_name + FROM _t1 AS _t3 + JOIN main.products AS products + ON products.pr_name = 'AmethystCopper-I' + JOIN main.calendar AS calendar + ON calendar.ca_dt < DATE_ADD('YEAR', 2, CAST(products.pr_release AS TIMESTAMP)) + AND calendar.ca_dt >= products.pr_release +) +SELECT + _t1.co_name AS country_name, + _s5.start_of_year +FROM _t1 AS _t1 +LEFT JOIN _s5 AS _s5 + ON _s5.co_name = _t1.co_name +ORDER BY + 1 NULLS FIRST, + 2 NULLS FIRST diff --git a/tests/test_sql_refsols/cumulative_stock_analysis_trino.sql b/tests/test_sql_refsols/cumulative_stock_analysis_trino.sql new file mode 100644 index 000000000..420d87549 --- /dev/null +++ b/tests/test_sql_refsols/cumulative_stock_analysis_trino.sql @@ -0,0 +1,32 @@ +SELECT + sbtransaction.sbtxdatetime AS date_time, + COUNT(*) OVER (PARTITION BY DATE_TRUNC('DAY', CAST(sbtransaction.sbtxdatetime AS TIMESTAMP)) ORDER BY sbtransaction.sbtxdatetime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS txn_within_day, + COUNT( + CASE WHEN sbtransaction.sbtxtype = 'buy' THEN sbtransaction.sbtxtype ELSE NULL END + ) OVER (PARTITION BY DATE_TRUNC('DAY', CAST(sbtransaction.sbtxdatetime AS TIMESTAMP)) ORDER BY sbtransaction.sbtxdatetime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS n_buys_within_day, + ROUND( + CAST(( + 100.0 * SUM(sbticker.sbtickersymbol IN ('AAPL', 'AMZN')) OVER (ORDER BY sbtransaction.sbtxdatetime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) + ) AS DOUBLE) / COUNT(*) OVER (ORDER BY sbtransaction.sbtxdatetime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), + 2 + ) AS pct_apple_txns, + SUM( + IF( + sbtransaction.sbtxtype = 'buy', + sbtransaction.sbtxshares, + 0 - sbtransaction.sbtxshares + ) + ) OVER (ORDER BY sbtransaction.sbtxdatetime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS share_change, + ROUND( + AVG(CAST(sbtransaction.sbtxamount AS DOUBLE)) OVER (ORDER BY sbtransaction.sbtxdatetime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), + 2 + ) AS rolling_avg_amount +FROM main.sbtransaction AS sbtransaction +JOIN main.sbticker AS sbticker + ON sbticker.sbtickerid = sbtransaction.sbtxtickerid +WHERE + MONTH(CAST(sbtransaction.sbtxdatetime AS TIMESTAMP)) = 4 + AND YEAR(CAST(sbtransaction.sbtxdatetime AS TIMESTAMP)) = 2023 + AND sbtransaction.sbtxstatus = 'success' +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/customer_largest_order_deltas_trino.sql b/tests/test_sql_refsols/customer_largest_order_deltas_trino.sql new file mode 100644 index 000000000..c8e7ebc93 --- /dev/null +++ b/tests/test_sql_refsols/customer_largest_order_deltas_trino.sql @@ -0,0 +1,49 @@ +WITH _s1 AS ( + SELECT + l_discount, + l_extendedprice, + l_orderkey + FROM tpch.lineitem + WHERE + YEAR(CAST(l_shipdate AS TIMESTAMP)) = 1994 AND l_shipmode = 'AIR' +), _t5 AS ( + SELECT + ARBITRARY(orders.o_custkey) AS anything_o_custkey, + ARBITRARY(orders.o_orderdate) AS anything_o_orderdate, + SUM(_s1.l_extendedprice * ( + 1 - _s1.l_discount + )) AS sum_r + FROM tpch.orders AS orders + LEFT JOIN _s1 AS _s1 + ON _s1.l_orderkey = orders.o_orderkey + WHERE + YEAR(CAST(orders.o_orderdate AS TIMESTAMP)) = 1994 + GROUP BY + orders.o_orderkey +), _t AS ( + SELECT + anything_o_custkey, + anything_o_orderdate, + sum_r, + LAG(COALESCE(sum_r, 0), 1) OVER (PARTITION BY anything_o_custkey ORDER BY anything_o_orderdate) AS _w + FROM _t5 +), _t1 AS ( + SELECT + _t.anything_o_custkey, + customer.c_name, + COALESCE(_t.sum_r, 0) - LAG(COALESCE(_t.sum_r, 0), 1) OVER (PARTITION BY _t.anything_o_custkey ORDER BY _t.anything_o_orderdate) AS revenue_delta + FROM tpch.customer AS customer + JOIN _t AS _t + ON NOT _t._w IS NULL AND _t.anything_o_custkey = customer.c_custkey + WHERE + customer.c_mktsegment = 'AUTOMOBILE' +) +SELECT + ARBITRARY(c_name) AS name, + IF(ABS(MIN(revenue_delta)) > MAX(revenue_delta), MIN(revenue_delta), MAX(revenue_delta)) AS largest_diff +FROM _t1 +GROUP BY + anything_o_custkey +ORDER BY + 2 DESC +LIMIT 5 diff --git a/tests/test_sql_refsols/customer_most_recent_orders_trino.sql b/tests/test_sql_refsols/customer_most_recent_orders_trino.sql new file mode 100644 index 000000000..0305ae4d8 --- /dev/null +++ b/tests/test_sql_refsols/customer_most_recent_orders_trino.sql @@ -0,0 +1,25 @@ +WITH _t AS ( + SELECT + o_custkey, + o_totalprice, + ROW_NUMBER() OVER (PARTITION BY o_custkey ORDER BY o_orderdate DESC NULLS FIRST, o_orderkey) AS _w + FROM tpch.orders +), _s1 AS ( + SELECT + o_custkey, + SUM(o_totalprice) AS sum_o_totalprice + FROM _t + WHERE + _w <= 5 + GROUP BY + 1 +) +SELECT + customer.c_name AS name, + COALESCE(_s1.sum_o_totalprice, 0) AS total_recent_value +FROM tpch.customer AS customer +JOIN _s1 AS _s1 + ON _s1.o_custkey = customer.c_custkey +ORDER BY + 2 DESC +LIMIT 3 diff --git a/tests/test_sql_refsols/dataframe_collection_best_trino.sql b/tests/test_sql_refsols/dataframe_collection_best_trino.sql new file mode 100644 index 000000000..b79ccc18d --- /dev/null +++ b/tests/test_sql_refsols/dataframe_collection_best_trino.sql @@ -0,0 +1,27 @@ +WITH _t AS ( + SELECT + orders.o_custkey, + orders.o_orderkey, + orders.o_orderpriority, + orders.o_totalprice, + priority_taxes.tax_rate, + ROW_NUMBER() OVER (PARTITION BY orders.o_orderkey ORDER BY orders.o_totalprice + orders.o_totalprice * priority_taxes.tax_rate) AS _w + FROM tpch.orders AS orders + JOIN (VALUES + ('1-URGENT', 0.05), + ('2-HIGH', 0.04), + ('3-MEDIUM', 0.03), + ('4-NOT SPECIFIED', 0.02)) AS priority_taxes(priority_lvl, tax_rate) + ON orders.o_orderpriority = priority_taxes.priority_lvl +) +SELECT + customer.c_name AS name, + _t.o_orderkey AS order_key, + _t.o_orderpriority AS order_priority, + _t.o_totalprice + _t.o_totalprice * _t.tax_rate AS cheapest_order_price +FROM tpch.customer AS customer +JOIN _t AS _t + ON _t._w = 1 AND _t.o_custkey = customer.c_custkey +ORDER BY + 4 NULLS FIRST +LIMIT 5 diff --git a/tests/test_sql_refsols/dataframe_collection_correlation_trino.sql b/tests/test_sql_refsols/dataframe_collection_correlation_trino.sql new file mode 100644 index 000000000..2c7d6bba3 --- /dev/null +++ b/tests/test_sql_refsols/dataframe_collection_correlation_trino.sql @@ -0,0 +1,32 @@ +WITH _s3 AS ( + SELECT + classes_2.key, + COUNT(*) AS n_rows + FROM (VALUES + (15112, 'Programming Fundamentals', 'Python'), + (15122, 'Imperative Programming', 'C'), + (15150, 'Functional Programming', 'SML'), + (15210, 'Parallel Algorithms', 'SML'), + (15251, 'Theoretical CS', NULL)) AS classes_2(key, class_name, language) + JOIN (VALUES + (15112, 'Programming Fundamentals', 'Python'), + (15122, 'Imperative Programming', 'C'), + (15150, 'Functional Programming', 'SML'), + (15210, 'Parallel Algorithms', 'SML'), + (15251, 'Theoretical CS', NULL)) AS classes_3(key, class_name, language) + ON classes_2.key <> classes_3.key AND classes_2.language = classes_3.language + GROUP BY + 1 +) +SELECT + classes.class_name, + classes.language, + COALESCE(_s3.n_rows, 0) AS n_other_classes +FROM (VALUES + (15112, 'Programming Fundamentals', 'Python'), + (15122, 'Imperative Programming', 'C'), + (15150, 'Functional Programming', 'SML'), + (15210, 'Parallel Algorithms', 'SML'), + (15251, 'Theoretical CS', NULL)) AS classes(key, class_name, language) +LEFT JOIN _s3 AS _s3 + ON _s3.key = classes.key diff --git a/tests/test_sql_refsols/dataframe_collection_cross_trino.sql b/tests/test_sql_refsols/dataframe_collection_cross_trino.sql new file mode 100644 index 000000000..5d764c11f --- /dev/null +++ b/tests/test_sql_refsols/dataframe_collection_cross_trino.sql @@ -0,0 +1,18 @@ +SELECT + users.id_ AS id1, + users.name AS name1, + orders.order_id, + orders.amount +FROM (VALUES + (1, 'John'), + (2, 'Jane'), + (3, 'Bob'), + (4, 'Alice'), + (5, 'Charlie')) AS users(id_, name) +JOIN (VALUES + (101.0, 1.0, 250.0), + (102.0, 2.0, 150.5), + (103.0, 1.0, 300.0), + (104.0, 3.0, 450.75), + (105.0, 2.0, 200.0)) AS orders(order_id, user_id, amount) + ON orders.user_id = users.id_ diff --git a/tests/test_sql_refsols/dataframe_collection_datatypes_trino.sql b/tests/test_sql_refsols/dataframe_collection_datatypes_trino.sql new file mode 100644 index 000000000..8b7df6271 --- /dev/null +++ b/tests/test_sql_refsols/dataframe_collection_datatypes_trino.sql @@ -0,0 +1,14 @@ +SELECT + alldatatypes.string_col, + alldatatypes.int_col, + alldatatypes.float_col, + alldatatypes.nullable_int_col, + alldatatypes.bool_col, + alldatatypes.null_col, + alldatatypes.datetime_col +FROM (VALUES + ('red', 0, 1.5, 1.0, 1, NULL, CAST('2024-01-01 00:00:00' AS TIMESTAMP)), + ('orange', 1, 2.0, NULL, 0, NULL, CAST('2024-01-02 00:00:00' AS TIMESTAMP)), + (NULL, 2, NULL, 7.0, 0, NULL, NULL)) AS alldatatypes(string_col, int_col, float_col, nullable_int_col, bool_col, null_col, datetime_col) +ORDER BY + 2 NULLS FIRST diff --git a/tests/test_sql_refsols/dataframe_collection_highest_rating_trino.sql b/tests/test_sql_refsols/dataframe_collection_highest_rating_trino.sql new file mode 100644 index 000000000..5ab6b7be9 --- /dev/null +++ b/tests/test_sql_refsols/dataframe_collection_highest_rating_trino.sql @@ -0,0 +1,67 @@ +WITH _t AS ( + SELECT + classes.class_name, + teaching.semester, + teaching.teacher_id, + ROW_NUMBER() OVER (PARTITION BY classes.key ORDER BY teaching.rating DESC NULLS FIRST) AS _w + FROM (VALUES + (15112, 'Programming Fundamentals', 'Python'), + (15122, 'Imperative Programming', 'C'), + (15150, 'Functional Programming', 'SML'), + (15210, 'Parallel Algorithms', 'SML'), + (15251, 'Theoretical CS', NULL)) AS classes(key, class_name, language) + JOIN (VALUES + (15112, 1, '2020-09-01', 11.39), + (15122, 2, '2020-09-01', 9.22), + (15150, 9, '2020-09-01', 11.93), + (15210, 4, '2020-09-01', 0.32), + (15251, 5, '2020-09-01', 3.19), + (15112, 6, '2021-02-01', 1.35), + (15122, 1, '2021-02-01', 11.58), + (15150, 8, '2021-02-01', 2.69), + (15210, 9, '2021-02-01', 3.48), + (15251, 10, '2021-02-01', 6.75), + (15112, 5, '2021-09-01', 5.31), + (15122, 12, '2021-09-01', 3.94), + (15150, 1, '2021-09-01', 7.45), + (15210, 2, '2021-09-01', 8.64), + (15251, 9, '2021-09-01', 0.31), + (15112, 4, '2022-02-01', 11.27), + (15122, 5, '2022-02-01', 10.3), + (15150, 6, '2022-02-01', 2.21), + (15210, 1, '2022-02-01', 3.8), + (15251, 8, '2022-02-01', 7.87), + (15112, 9, '2022-09-01', 7.23), + (15122, 10, '2022-09-01', 6.66), + (15150, 5, '2022-09-01', 10.97), + (15210, 12, '2022-09-01', 0.96), + (15251, 1, '2022-09-01', 5.43), + (15112, 2, '2023-02-01', 5.19), + (15122, 9, '2023-02-01', 5.02), + (15150, 4, '2023-02-01', 9.73), + (15210, 5, '2023-02-01', 0.12), + (15251, 6, '2023-02-01', 4.99)) AS teaching(class_key, teacher_id, semester, rating) + ON classes.key = teaching.class_key +) +SELECT + _t.class_name, + _t.semester AS last_semester, + teachers.first_name AS teacher_first_name, + teachers.last_name AS teacher_last_name +FROM _t AS _t +LEFT JOIN (VALUES + (1, 'Anil', 'Lee'), + (2, 'Mike', 'Lee'), + (3, 'Ian', 'Lee'), + (4, 'David', 'Smith'), + (5, 'Anil', 'Smith'), + (6, 'Mike', 'Smith'), + (7, 'Ian', 'Taylor'), + (8, 'David', 'Taylor'), + (9, 'Anil', 'Taylor'), + (10, 'Mike', 'Thomas'), + (11, 'Ian', 'Thomas'), + (12, 'David', 'Thomas')) AS teachers(tid, first_name, last_name) + ON _t.teacher_id = teachers.tid +WHERE + _t._w = 1 diff --git a/tests/test_sql_refsols/dataframe_collection_inf_trino.sql b/tests/test_sql_refsols/dataframe_collection_inf_trino.sql new file mode 100644 index 000000000..ccb7c9176 --- /dev/null +++ b/tests/test_sql_refsols/dataframe_collection_inf_trino.sql @@ -0,0 +1,9 @@ +SELECT + infinty.py_float, + infinty.np_float64, + infinty.np_float32 +FROM (VALUES + (1.5, -2.25, 0.0), + (NULL, NULL, NULL), + ('Infinity', 'Infinity', 'Infinity'), + ('-Infinity', '-Infinity', '-Infinity')) AS infinty(py_float, np_float64, np_float32) diff --git a/tests/test_sql_refsols/dataframe_collection_language_highest_rating_trino.sql b/tests/test_sql_refsols/dataframe_collection_language_highest_rating_trino.sql new file mode 100644 index 000000000..9fd5354d8 --- /dev/null +++ b/tests/test_sql_refsols/dataframe_collection_language_highest_rating_trino.sql @@ -0,0 +1,69 @@ +WITH _t AS ( + SELECT + classes.language, + teaching.rating, + teaching.teacher_id, + ROW_NUMBER() OVER (PARTITION BY classes.language ORDER BY teaching.rating DESC NULLS FIRST) AS _w + FROM (VALUES + (15112, 'Programming Fundamentals', 'Python'), + (15122, 'Imperative Programming', 'C'), + (15150, 'Functional Programming', 'SML'), + (15210, 'Parallel Algorithms', 'SML'), + (15251, 'Theoretical CS', NULL)) AS classes(key, class_name, language) + JOIN (VALUES + (15112, 1, '2020-09-01', 11.39), + (15122, 2, '2020-09-01', 9.22), + (15150, 9, '2020-09-01', 11.93), + (15210, 4, '2020-09-01', 0.32), + (15251, 5, '2020-09-01', 3.19), + (15112, 6, '2021-02-01', 1.35), + (15122, 1, '2021-02-01', 11.58), + (15150, 8, '2021-02-01', 2.69), + (15210, 9, '2021-02-01', 3.48), + (15251, 10, '2021-02-01', 6.75), + (15112, 5, '2021-09-01', 5.31), + (15122, 12, '2021-09-01', 3.94), + (15150, 1, '2021-09-01', 7.45), + (15210, 2, '2021-09-01', 8.64), + (15251, 9, '2021-09-01', 0.31), + (15112, 4, '2022-02-01', 11.27), + (15122, 5, '2022-02-01', 10.3), + (15150, 6, '2022-02-01', 2.21), + (15210, 1, '2022-02-01', 3.8), + (15251, 8, '2022-02-01', 7.87), + (15112, 9, '2022-09-01', 7.23), + (15122, 10, '2022-09-01', 6.66), + (15150, 5, '2022-09-01', 10.97), + (15210, 12, '2022-09-01', 0.96), + (15251, 1, '2022-09-01', 5.43), + (15112, 2, '2023-02-01', 5.19), + (15122, 9, '2023-02-01', 5.02), + (15150, 4, '2023-02-01', 9.73), + (15210, 5, '2023-02-01', 0.12), + (15251, 6, '2023-02-01', 4.99)) AS teaching(class_key, teacher_id, semester, rating) + ON classes.key = teaching.class_key + WHERE + NOT classes.language IS NULL +) +SELECT + _t.language, + _t.rating, + teachers.first_name, + teachers.last_name +FROM _t AS _t +LEFT JOIN (VALUES + (1, 'Anil', 'Lee'), + (2, 'Mike', 'Lee'), + (3, 'Ian', 'Lee'), + (4, 'David', 'Smith'), + (5, 'Anil', 'Smith'), + (6, 'Mike', 'Smith'), + (7, 'Ian', 'Taylor'), + (8, 'David', 'Taylor'), + (9, 'Anil', 'Taylor'), + (10, 'Mike', 'Thomas'), + (11, 'Ian', 'Thomas'), + (12, 'David', 'Thomas')) AS teachers(tid, first_name, last_name) + ON _t.teacher_id = teachers.tid +WHERE + _t._w = 1 diff --git a/tests/test_sql_refsols/dataframe_collection_numbers_trino.sql b/tests/test_sql_refsols/dataframe_collection_numbers_trino.sql new file mode 100644 index 000000000..b196aef16 --- /dev/null +++ b/tests/test_sql_refsols/dataframe_collection_numbers_trino.sql @@ -0,0 +1,13 @@ +SELECT + numbers.pyd_numbers, + numbers.py_float, + numbers.np_float64, + numbers.np_float32, + numbers.null_vs_nan, + numbers.decimal_val +FROM (VALUES + (10.0, 1.5, 1.5, 1.5, NULL, 1.50), + (-3.0, 0.0, 0.0, 3.33333, NULL, 0.00), + (3.56, 10.0001, 4.4444444, 0.0, NULL, -2.25), + (NULL, -2.25, -2.25, -2.25, 1.0, NULL), + (NULL, NULL, NULL, NULL, 0.0, NULL)) AS numbers(pyd_numbers, py_float, np_float64, np_float32, null_vs_nan, decimal_val) diff --git a/tests/test_sql_refsols/dataframe_collection_partition_trino.sql b/tests/test_sql_refsols/dataframe_collection_partition_trino.sql new file mode 100644 index 000000000..8ba739d39 --- /dev/null +++ b/tests/test_sql_refsols/dataframe_collection_partition_trino.sql @@ -0,0 +1,17 @@ +SELECT + products_collection.product_category, + AVG(products_collection.price) AS avg_price, + COUNT(*) AS n_products, + MIN(pricing_collection.discount) AS min_discount +FROM (VALUES + (1, 'A', 17.99), + (2, 'B', 45.65), + (3, 'A', 15.0), + (4, 'B', 10.99)) AS products_collection(product_id, product_category, price) +JOIN (VALUES + (1, 'A', 0.1), + (2, 'B', 0.15), + (3, 'C', 0.05)) AS pricing_collection(rule_id, rule_category, discount) + ON pricing_collection.rule_category = products_collection.product_category +GROUP BY + 1 diff --git a/tests/test_sql_refsols/dataframe_collection_strings_trino.sql b/tests/test_sql_refsols/dataframe_collection_strings_trino.sql new file mode 100644 index 000000000..1c9cf1ba6 --- /dev/null +++ b/tests/test_sql_refsols/dataframe_collection_strings_trino.sql @@ -0,0 +1,11 @@ +SELECT + strings.normal_strings, + strings.empty_string, + strings.special_characters +FROM (VALUES + ('hello', '', '''simple quoted'''), + ('world', 'not_empty', '"double quoted"'), + ('pydough', '', 'unicode_ß_ç_ü'), + (NULL, NULL, NULL), + ('test_string', ' ', 'tap_space newline_ +_test')) AS strings(normal_strings, empty_string, special_characters) diff --git a/tests/test_sql_refsols/dataframe_collection_taught_recently_trino.sql b/tests/test_sql_refsols/dataframe_collection_taught_recently_trino.sql new file mode 100644 index 000000000..1f45ffedb --- /dev/null +++ b/tests/test_sql_refsols/dataframe_collection_taught_recently_trino.sql @@ -0,0 +1,67 @@ +WITH _t AS ( + SELECT + classes.class_name, + teaching.semester, + teaching.teacher_id, + ROW_NUMBER() OVER (PARTITION BY classes.key ORDER BY teaching.semester DESC NULLS FIRST) AS _w + FROM (VALUES + (15112, 'Programming Fundamentals', 'Python'), + (15122, 'Imperative Programming', 'C'), + (15150, 'Functional Programming', 'SML'), + (15210, 'Parallel Algorithms', 'SML'), + (15251, 'Theoretical CS', NULL)) AS classes(key, class_name, language) + JOIN (VALUES + (15112, 1, '2020-09-01', 11.39), + (15122, 2, '2020-09-01', 9.22), + (15150, 9, '2020-09-01', 11.93), + (15210, 4, '2020-09-01', 0.32), + (15251, 5, '2020-09-01', 3.19), + (15112, 6, '2021-02-01', 1.35), + (15122, 1, '2021-02-01', 11.58), + (15150, 8, '2021-02-01', 2.69), + (15210, 9, '2021-02-01', 3.48), + (15251, 10, '2021-02-01', 6.75), + (15112, 5, '2021-09-01', 5.31), + (15122, 12, '2021-09-01', 3.94), + (15150, 1, '2021-09-01', 7.45), + (15210, 2, '2021-09-01', 8.64), + (15251, 9, '2021-09-01', 0.31), + (15112, 4, '2022-02-01', 11.27), + (15122, 5, '2022-02-01', 10.3), + (15150, 6, '2022-02-01', 2.21), + (15210, 1, '2022-02-01', 3.8), + (15251, 8, '2022-02-01', 7.87), + (15112, 9, '2022-09-01', 7.23), + (15122, 10, '2022-09-01', 6.66), + (15150, 5, '2022-09-01', 10.97), + (15210, 12, '2022-09-01', 0.96), + (15251, 1, '2022-09-01', 5.43), + (15112, 2, '2023-02-01', 5.19), + (15122, 9, '2023-02-01', 5.02), + (15150, 4, '2023-02-01', 9.73), + (15210, 5, '2023-02-01', 0.12), + (15251, 6, '2023-02-01', 4.99)) AS teaching(class_key, teacher_id, semester, rating) + ON classes.key = teaching.class_key +) +SELECT + _t.class_name, + _t.semester AS last_semester, + teachers.first_name AS teacher_first_name, + teachers.last_name AS teacher_last_name +FROM _t AS _t +LEFT JOIN (VALUES + (1, 'Anil', 'Lee'), + (2, 'Mike', 'Lee'), + (3, 'Ian', 'Lee'), + (4, 'David', 'Smith'), + (5, 'Anil', 'Smith'), + (6, 'Mike', 'Smith'), + (7, 'Ian', 'Taylor'), + (8, 'David', 'Taylor'), + (9, 'Anil', 'Taylor'), + (10, 'Mike', 'Thomas'), + (11, 'Ian', 'Thomas'), + (12, 'David', 'Thomas')) AS teachers(tid, first_name, last_name) + ON _t.teacher_id = teachers.tid +WHERE + _t._w = 1 diff --git a/tests/test_sql_refsols/dataframe_collection_teacher_class_trino.sql b/tests/test_sql_refsols/dataframe_collection_teacher_class_trino.sql new file mode 100644 index 000000000..4f71d3a71 --- /dev/null +++ b/tests/test_sql_refsols/dataframe_collection_teacher_class_trino.sql @@ -0,0 +1,68 @@ +WITH _t AS ( + SELECT + teaching.class_key, + teachers.first_name, + teachers.last_name, + teaching.semester, + ROW_NUMBER() OVER (PARTITION BY teachers.tid ORDER BY teaching.semester DESC NULLS FIRST) AS _w + FROM (VALUES + (1, 'Anil', 'Lee'), + (2, 'Mike', 'Lee'), + (3, 'Ian', 'Lee'), + (4, 'David', 'Smith'), + (5, 'Anil', 'Smith'), + (6, 'Mike', 'Smith'), + (7, 'Ian', 'Taylor'), + (8, 'David', 'Taylor'), + (9, 'Anil', 'Taylor'), + (10, 'Mike', 'Thomas'), + (11, 'Ian', 'Thomas'), + (12, 'David', 'Thomas')) AS teachers(tid, first_name, last_name) + JOIN (VALUES + (15112, 1, '2020-09-01', 11.39), + (15122, 2, '2020-09-01', 9.22), + (15150, 9, '2020-09-01', 11.93), + (15210, 4, '2020-09-01', 0.32), + (15251, 5, '2020-09-01', 3.19), + (15112, 6, '2021-02-01', 1.35), + (15122, 1, '2021-02-01', 11.58), + (15150, 8, '2021-02-01', 2.69), + (15210, 9, '2021-02-01', 3.48), + (15251, 10, '2021-02-01', 6.75), + (15112, 5, '2021-09-01', 5.31), + (15122, 12, '2021-09-01', 3.94), + (15150, 1, '2021-09-01', 7.45), + (15210, 2, '2021-09-01', 8.64), + (15251, 9, '2021-09-01', 0.31), + (15112, 4, '2022-02-01', 11.27), + (15122, 5, '2022-02-01', 10.3), + (15150, 6, '2022-02-01', 2.21), + (15210, 1, '2022-02-01', 3.8), + (15251, 8, '2022-02-01', 7.87), + (15112, 9, '2022-09-01', 7.23), + (15122, 10, '2022-09-01', 6.66), + (15150, 5, '2022-09-01', 10.97), + (15210, 12, '2022-09-01', 0.96), + (15251, 1, '2022-09-01', 5.43), + (15112, 2, '2023-02-01', 5.19), + (15122, 9, '2023-02-01', 5.02), + (15150, 4, '2023-02-01', 9.73), + (15210, 5, '2023-02-01', 0.12), + (15251, 6, '2023-02-01', 4.99)) AS teaching(class_key, teacher_id, semester, rating) + ON teachers.tid = teaching.teacher_id +) +SELECT + _t.first_name, + _t.last_name, + _t.semester AS recent_semester, + classes.class_name +FROM _t AS _t +LEFT JOIN (VALUES + (15112, 'Programming Fundamentals', 'Python'), + (15122, 'Imperative Programming', 'C'), + (15150, 'Functional Programming', 'SML'), + (15210, 'Parallel Algorithms', 'SML'), + (15251, 'Theoretical CS', NULL)) AS classes(key, class_name, language) + ON _t.class_key = classes.key +WHERE + _t._w = 1 diff --git a/tests/test_sql_refsols/dataframe_collection_teacher_count_trino.sql b/tests/test_sql_refsols/dataframe_collection_teacher_count_trino.sql new file mode 100644 index 000000000..7719b69c4 --- /dev/null +++ b/tests/test_sql_refsols/dataframe_collection_teacher_count_trino.sql @@ -0,0 +1,85 @@ +SELECT + teachers.first_name, + teachers.last_name, + COUNT(*) AS n_teachers +FROM (VALUES + (15112, 1, '2020-09-01', 11.39), + (15122, 2, '2020-09-01', 9.22), + (15150, 9, '2020-09-01', 11.93), + (15210, 4, '2020-09-01', 0.32), + (15251, 5, '2020-09-01', 3.19), + (15112, 6, '2021-02-01', 1.35), + (15122, 1, '2021-02-01', 11.58), + (15150, 8, '2021-02-01', 2.69), + (15210, 9, '2021-02-01', 3.48), + (15251, 10, '2021-02-01', 6.75), + (15112, 5, '2021-09-01', 5.31), + (15122, 12, '2021-09-01', 3.94), + (15150, 1, '2021-09-01', 7.45), + (15210, 2, '2021-09-01', 8.64), + (15251, 9, '2021-09-01', 0.31), + (15112, 4, '2022-02-01', 11.27), + (15122, 5, '2022-02-01', 10.3), + (15150, 6, '2022-02-01', 2.21), + (15210, 1, '2022-02-01', 3.8), + (15251, 8, '2022-02-01', 7.87), + (15112, 9, '2022-09-01', 7.23), + (15122, 10, '2022-09-01', 6.66), + (15150, 5, '2022-09-01', 10.97), + (15210, 12, '2022-09-01', 0.96), + (15251, 1, '2022-09-01', 5.43), + (15112, 2, '2023-02-01', 5.19), + (15122, 9, '2023-02-01', 5.02), + (15150, 4, '2023-02-01', 9.73), + (15210, 5, '2023-02-01', 0.12), + (15251, 6, '2023-02-01', 4.99)) AS teaching(class_key, teacher_id, semester, rating) +JOIN (VALUES + (15112, 1, '2020-09-01', 11.39), + (15122, 2, '2020-09-01', 9.22), + (15150, 9, '2020-09-01', 11.93), + (15210, 4, '2020-09-01', 0.32), + (15251, 5, '2020-09-01', 3.19), + (15112, 6, '2021-02-01', 1.35), + (15122, 1, '2021-02-01', 11.58), + (15150, 8, '2021-02-01', 2.69), + (15210, 9, '2021-02-01', 3.48), + (15251, 10, '2021-02-01', 6.75), + (15112, 5, '2021-09-01', 5.31), + (15122, 12, '2021-09-01', 3.94), + (15150, 1, '2021-09-01', 7.45), + (15210, 2, '2021-09-01', 8.64), + (15251, 9, '2021-09-01', 0.31), + (15112, 4, '2022-02-01', 11.27), + (15122, 5, '2022-02-01', 10.3), + (15150, 6, '2022-02-01', 2.21), + (15210, 1, '2022-02-01', 3.8), + (15251, 8, '2022-02-01', 7.87), + (15112, 9, '2022-09-01', 7.23), + (15122, 10, '2022-09-01', 6.66), + (15150, 5, '2022-09-01', 10.97), + (15210, 12, '2022-09-01', 0.96), + (15251, 1, '2022-09-01', 5.43), + (15112, 2, '2023-02-01', 5.19), + (15122, 9, '2023-02-01', 5.02), + (15150, 4, '2023-02-01', 9.73), + (15210, 5, '2023-02-01', 0.12), + (15251, 6, '2023-02-01', 4.99)) AS teaching_2(class_key, teacher_id, semester, rating) + ON teaching.class_key = teaching_2.class_key + AND teaching.teacher_id <> teaching_2.teacher_id +JOIN (VALUES + (1, 'Anil', 'Lee'), + (2, 'Mike', 'Lee'), + (3, 'Ian', 'Lee'), + (4, 'David', 'Smith'), + (5, 'Anil', 'Smith'), + (6, 'Mike', 'Smith'), + (7, 'Ian', 'Taylor'), + (8, 'David', 'Taylor'), + (9, 'Anil', 'Taylor'), + (10, 'Mike', 'Thomas'), + (11, 'Ian', 'Thomas'), + (12, 'David', 'Thomas')) AS teachers(tid, first_name, last_name) + ON teachers.tid = teaching.teacher_id +GROUP BY + 1, + 2 diff --git a/tests/test_sql_refsols/dataframe_collection_teacher_lowest_rating_trino.sql b/tests/test_sql_refsols/dataframe_collection_teacher_lowest_rating_trino.sql new file mode 100644 index 000000000..08734e56a --- /dev/null +++ b/tests/test_sql_refsols/dataframe_collection_teacher_lowest_rating_trino.sql @@ -0,0 +1,68 @@ +WITH _t AS ( + SELECT + teaching.class_key, + teachers.first_name, + teachers.last_name, + teaching.rating, + ROW_NUMBER() OVER (PARTITION BY teachers.tid ORDER BY teaching.rating DESC NULLS FIRST) AS _w + FROM (VALUES + (1, 'Anil', 'Lee'), + (2, 'Mike', 'Lee'), + (3, 'Ian', 'Lee'), + (4, 'David', 'Smith'), + (5, 'Anil', 'Smith'), + (6, 'Mike', 'Smith'), + (7, 'Ian', 'Taylor'), + (8, 'David', 'Taylor'), + (9, 'Anil', 'Taylor'), + (10, 'Mike', 'Thomas'), + (11, 'Ian', 'Thomas'), + (12, 'David', 'Thomas')) AS teachers(tid, first_name, last_name) + JOIN (VALUES + (15112, 1, '2020-09-01', 11.39), + (15122, 2, '2020-09-01', 9.22), + (15150, 9, '2020-09-01', 11.93), + (15210, 4, '2020-09-01', 0.32), + (15251, 5, '2020-09-01', 3.19), + (15112, 6, '2021-02-01', 1.35), + (15122, 1, '2021-02-01', 11.58), + (15150, 8, '2021-02-01', 2.69), + (15210, 9, '2021-02-01', 3.48), + (15251, 10, '2021-02-01', 6.75), + (15112, 5, '2021-09-01', 5.31), + (15122, 12, '2021-09-01', 3.94), + (15150, 1, '2021-09-01', 7.45), + (15210, 2, '2021-09-01', 8.64), + (15251, 9, '2021-09-01', 0.31), + (15112, 4, '2022-02-01', 11.27), + (15122, 5, '2022-02-01', 10.3), + (15150, 6, '2022-02-01', 2.21), + (15210, 1, '2022-02-01', 3.8), + (15251, 8, '2022-02-01', 7.87), + (15112, 9, '2022-09-01', 7.23), + (15122, 10, '2022-09-01', 6.66), + (15150, 5, '2022-09-01', 10.97), + (15210, 12, '2022-09-01', 0.96), + (15251, 1, '2022-09-01', 5.43), + (15112, 2, '2023-02-01', 5.19), + (15122, 9, '2023-02-01', 5.02), + (15150, 4, '2023-02-01', 9.73), + (15210, 5, '2023-02-01', 0.12), + (15251, 6, '2023-02-01', 4.99)) AS teaching(class_key, teacher_id, semester, rating) + ON teachers.tid = teaching.teacher_id +) +SELECT + _t.first_name, + _t.last_name, + _t.rating, + classes.class_name +FROM _t AS _t +LEFT JOIN (VALUES + (15112, 'Programming Fundamentals', 'Python'), + (15122, 'Imperative Programming', 'C'), + (15150, 'Functional Programming', 'SML'), + (15210, 'Parallel Algorithms', 'SML'), + (15251, 'Theoretical CS', NULL)) AS classes(key, class_name, language) + ON _t.class_key = classes.key +WHERE + _t._w = 1 diff --git a/tests/test_sql_refsols/dataframe_collection_top_k_trino.sql b/tests/test_sql_refsols/dataframe_collection_top_k_trino.sql new file mode 100644 index 000000000..65edcd735 --- /dev/null +++ b/tests/test_sql_refsols/dataframe_collection_top_k_trino.sql @@ -0,0 +1,21 @@ +SELECT + part.p_name AS name, + discounts.shipping_type, + lineitem.l_extendedprice AS extended_price, + lineitem.l_discount + discounts.added_discount AS added_discount, + lineitem.l_extendedprice * ( + 1 - ( + lineitem.l_discount + discounts.added_discount + ) + ) AS final_price +FROM (VALUES + ('REG AIR', 0.05), + ('SHIP', 0.06), + ('TRUCK', 0.05)) AS discounts(shipping_type, added_discount) +JOIN tpch.lineitem AS lineitem + ON discounts.shipping_type = lineitem.l_shipmode +JOIN tpch.part AS part + ON lineitem.l_partkey = part.p_partkey +ORDER BY + 5 NULLS FIRST +LIMIT 5 diff --git a/tests/test_sql_refsols/dataframe_collection_unique_partition_trino.sql b/tests/test_sql_refsols/dataframe_collection_unique_partition_trino.sql new file mode 100644 index 000000000..88a1dd945 --- /dev/null +++ b/tests/test_sql_refsols/dataframe_collection_unique_partition_trino.sql @@ -0,0 +1,16 @@ +SELECT DISTINCT + teachers.first_name, + teachers.last_name +FROM (VALUES + (1, 'Anil', 'Lee'), + (2, 'Mike', 'Lee'), + (3, 'Ian', 'Lee'), + (4, 'David', 'Smith'), + (5, 'Anil', 'Smith'), + (6, 'Mike', 'Smith'), + (7, 'Ian', 'Taylor'), + (8, 'David', 'Taylor'), + (9, 'Anil', 'Taylor'), + (10, 'Mike', 'Thomas'), + (11, 'Ian', 'Thomas'), + (12, 'David', 'Thomas')) AS teachers(tid, first_name, last_name) diff --git a/tests/test_sql_refsols/dataframe_collection_where_date_trino.sql b/tests/test_sql_refsols/dataframe_collection_where_date_trino.sql new file mode 100644 index 000000000..8dc52c096 --- /dev/null +++ b/tests/test_sql_refsols/dataframe_collection_where_date_trino.sql @@ -0,0 +1,25 @@ +SELECT + dates.clerk_id, + COUNT(*) AS n_orders +FROM (VALUES + ( + 'Clerk#000000456', + CAST('1996-01-01 00:00:00' AS TIMESTAMP), + CAST('1996-02-01 00:00:00' AS TIMESTAMP) + ), + ( + 'Clerk#000000743', + CAST('1995-06-01 00:00:00' AS TIMESTAMP), + CAST('1995-07-01 00:00:00' AS TIMESTAMP) + ), + ( + 'Clerk#000000547', + CAST('1995-11-01 00:00:00' AS TIMESTAMP), + CAST('1995-12-01 00:00:00' AS TIMESTAMP) + )) AS dates(clerk_id, start_date, end_date) +JOIN tpch.orders AS orders + ON dates.clerk_id = orders.o_clerk + AND dates.end_date >= CAST(orders.o_orderdate AS TIMESTAMP) + AND dates.start_date <= CAST(orders.o_orderdate AS TIMESTAMP) +GROUP BY + 1 diff --git a/tests/test_sql_refsols/dataframe_collection_where_trino.sql b/tests/test_sql_refsols/dataframe_collection_where_trino.sql new file mode 100644 index 000000000..de9df3d4d --- /dev/null +++ b/tests/test_sql_refsols/dataframe_collection_where_trino.sql @@ -0,0 +1,67 @@ +WITH _s1 AS ( + SELECT + s_acctbal, + s_nationkey + FROM tpch.supplier +), _s2 AS ( + SELECT + n_nationkey, + n_regionkey + FROM tpch.nation +), _s3 AS ( + SELECT + r_name, + r_regionkey + FROM tpch.region +), _s5 AS ( + SELECT + _s2.n_nationkey, + _s3.r_name + FROM _s2 AS _s2 + JOIN _s3 AS _s3 + ON _s2.n_regionkey = _s3.r_regionkey +), _s12 AS ( + SELECT DISTINCT + _s5.r_name + FROM (VALUES + ('AFRICA', 5000.32), + ('AMERICA', 8000.0), + ('ASIA', 4600.32), + ('EUROPE', 6400.5), + ('MIDDLE EAST', 8999.99)) AS thresholds_collection(region_name, min_account_balance) + JOIN _s1 AS _s1 + ON _s1.s_acctbal > thresholds_collection.min_account_balance + JOIN _s5 AS _s5 + ON _s1.s_nationkey = _s5.n_nationkey + AND _s5.r_name = thresholds_collection.region_name +), _s11 AS ( + SELECT + _s8.n_nationkey, + _s9.r_name + FROM _s2 AS _s8 + JOIN _s3 AS _s9 + ON _s8.n_regionkey = _s9.r_regionkey +), _s13 AS ( + SELECT + _s11.r_name, + COUNT(*) AS n_rows + FROM (VALUES + ('AFRICA', 5000.32), + ('AMERICA', 8000.0), + ('ASIA', 4600.32), + ('EUROPE', 6400.5), + ('MIDDLE EAST', 8999.99)) AS thresholds_collection_2(region_name, min_account_balance) + JOIN _s1 AS _s7 + ON _s7.s_acctbal > thresholds_collection_2.min_account_balance + JOIN _s11 AS _s11 + ON _s11.n_nationkey = _s7.s_nationkey + AND _s11.r_name = thresholds_collection_2.region_name + GROUP BY + 1 +) +SELECT + _s12.r_name AS sup_region_name, + _s13.n_rows AS n_suppliers +FROM _s12 AS _s12 +JOIN _s13 AS _s13 + ON _s12.r_name = _s13.r_name diff --git a/tests/test_sql_refsols/dataframe_collection_window_functions_trino.sql b/tests/test_sql_refsols/dataframe_collection_window_functions_trino.sql new file mode 100644 index 000000000..ca2ad18cb --- /dev/null +++ b/tests/test_sql_refsols/dataframe_collection_window_functions_trino.sql @@ -0,0 +1,81 @@ +WITH _t AS ( + SELECT + customer.c_acctbal, + customer.c_custkey, + customer.c_mktsegment, + customer.c_name, + customers_filters.mrk_segment, + nation.n_name, + customers_filters.nation_name, + NTILE(1000) OVER (ORDER BY customer.c_acctbal) AS _w + FROM (VALUES + ('UNITED STATES', 'BUILDING'), + ('JAPAN', 'AUTOMOBILE'), + ('BRAZIL', 'MACHINERY')) AS customers_filters(nation_name, mrk_segment) + CROSS JOIN tpch.customer AS customer + JOIN tpch.nation AS nation + ON customer.c_nationkey = nation.n_nationkey +), _s6 AS ( + SELECT + _t.c_custkey, + ARBITRARY(_t.c_acctbal) AS anything_c_acctbal, + ARBITRARY(_t.c_name) AS anything_c_name, + ARBITRARY(_t.nation_name) AS anything_nation_name, + COUNT(orders.o_custkey) AS count_o_custkey + FROM _t AS _t + LEFT JOIN tpch.orders AS orders + ON _t.c_custkey = orders.o_custkey + WHERE + _t._w > 996 AND _t.c_mktsegment = _t.mrk_segment AND _t.n_name = _t.nation_name + GROUP BY + 1 +), _t4 AS ( + SELECT + o_custkey, + DATE_DIFF( + 'MONTH', + CAST(LAG(o_orderdate, 1) OVER (PARTITION BY o_custkey ORDER BY o_orderdate) AS TIMESTAMP), + CAST(o_orderdate AS TIMESTAMP) + ) AS month_diff + FROM tpch.orders +), _s7 AS ( + SELECT + o_custkey, + AVG(month_diff) AS avg_month_diff + FROM _t4 + GROUP BY + 1 +), _t6 AS ( + SELECT + o_custkey, + o_totalprice - LEAD(o_totalprice, 1) OVER (PARTITION BY o_custkey ORDER BY o_orderdate) AS price_diff + FROM tpch.orders +), _s9 AS ( + SELECT + o_custkey, + AVG(price_diff) AS avg_price_diff + FROM _t6 + GROUP BY + 1 +) +SELECT + _s6.anything_c_name AS name, + ROW_NUMBER() OVER (PARTITION BY _s6.anything_nation_name ORDER BY _s6.anything_c_acctbal DESC NULLS FIRST) AS ranking_balance, + COALESCE(_s6.count_o_custkey, 0) AS n_orders, + _s7.avg_month_diff AS avg_month_orders, + _s9.avg_price_diff, + CAST(_s6.anything_c_acctbal AS DOUBLE) / SUM(_s6.anything_c_acctbal) OVER () AS proportion, + IF( + _s6.anything_c_acctbal > AVG(CAST(_s6.anything_c_acctbal AS DOUBLE)) OVER (), + TRUE, + FALSE + ) AS above_avg, + COUNT(_s6.anything_c_acctbal) OVER (ORDER BY _s6.anything_c_acctbal ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS n_poorer, + CAST(_s6.anything_c_acctbal AS DOUBLE) / COUNT(*) OVER () AS ratio +FROM _s6 AS _s6 +LEFT JOIN _s7 AS _s7 + ON _s6.c_custkey = _s7.o_custkey +LEFT JOIN _s9 AS _s9 + ON _s6.c_custkey = _s9.o_custkey +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/datediff_trino.sql b/tests/test_sql_refsols/datediff_trino.sql new file mode 100644 index 000000000..5da3563f8 --- /dev/null +++ b/tests/test_sql_refsols/datediff_trino.sql @@ -0,0 +1,16 @@ +SELECT + sbtxdatetime AS x, + CAST('2025-05-02 11:00:00' AS TIMESTAMP) AS y1, + CAST('2023-04-03 13:16:30' AS TIMESTAMP) AS y, + DATE_DIFF('YEAR', CAST(sbtxdatetime AS TIMESTAMP), CAST('2025-05-02 11:00:00' AS TIMESTAMP)) AS years_diff, + DATE_DIFF('MONTH', CAST(sbtxdatetime AS TIMESTAMP), CAST('2025-05-02 11:00:00' AS TIMESTAMP)) AS months_diff, + DATE_DIFF('DAY', CAST(sbtxdatetime AS TIMESTAMP), CAST('2025-05-02 11:00:00' AS TIMESTAMP)) AS days_diff, + DATE_DIFF('HOUR', CAST(sbtxdatetime AS TIMESTAMP), CAST('2025-05-02 11:00:00' AS TIMESTAMP)) AS hours_diff, + DATE_DIFF('MINUTE', CAST(sbtxdatetime AS TIMESTAMP), CAST('2023-04-03 13:16:30' AS TIMESTAMP)) AS minutes_diff, + DATE_DIFF('SECOND', CAST(sbtxdatetime AS TIMESTAMP), CAST('2023-04-03 13:16:30' AS TIMESTAMP)) AS seconds_diff +FROM main.sbtransaction +WHERE + YEAR(CAST(sbtxdatetime AS TIMESTAMP)) < 2025 +ORDER BY + 4 NULLS FIRST +LIMIT 30 diff --git a/tests/test_sql_refsols/datetime_current_trino.sql b/tests/test_sql_refsols/datetime_current_trino.sql new file mode 100644 index 000000000..009d92a9d --- /dev/null +++ b/tests/test_sql_refsols/datetime_current_trino.sql @@ -0,0 +1,10 @@ +SELECT + DATE_ADD('DAY', -1, DATE_ADD('MONTH', 5, DATE_TRUNC('YEAR', CURRENT_TIMESTAMP))) AS d1, + DATE_ADD('HOUR', 24, DATE_TRUNC('MONTH', CURRENT_TIMESTAMP)) AS d2, + DATE_ADD( + 'SECOND', + 2, + DATE_ADD('MINUTE', -150, DATE_ADD('HOUR', 12, DATE_TRUNC('DAY', CURRENT_TIMESTAMP))) + ) AS d3 +FROM (VALUES + (NULL)) AS _q_0(_col_0) diff --git a/tests/test_sql_refsols/datetime_functions_trino.sql b/tests/test_sql_refsols/datetime_functions_trino.sql new file mode 100644 index 000000000..66be05da4 --- /dev/null +++ b/tests/test_sql_refsols/datetime_functions_trino.sql @@ -0,0 +1,87 @@ +SELECT + CURRENT_TIMESTAMP AS ts_now_1, + DATE_TRUNC('DAY', CURRENT_TIMESTAMP) AS ts_now_2, + DATE_TRUNC('MONTH', CURRENT_TIMESTAMP) AS ts_now_3, + DATE_ADD('HOUR', 1, CURRENT_TIMESTAMP) AS ts_now_4, + CAST('2025-01-01' AS DATE) AS ts_now_5, + CAST('1995-10-08' AS DATE) AS ts_now_6, + YEAR(CAST(o_orderdate AS TIMESTAMP)) AS year_col, + 2020 AS year_py, + 1995 AS year_pd, + MONTH(CAST(o_orderdate AS TIMESTAMP)) AS month_col, + 2 AS month_str, + 1 AS month_dt, + DAY(CAST(o_orderdate AS TIMESTAMP)) AS day_col, + 25 AS day_str, + 23 AS hour_str, + 59 AS minute_str, + 59 AS second_ts, + DATE_DIFF('DAY', CAST(o_orderdate AS TIMESTAMP), CAST('1992-01-01' AS TIMESTAMP)) AS dd_col_str, + DATE_DIFF('DAY', CAST('1992-01-01' AS TIMESTAMP), CAST(o_orderdate AS TIMESTAMP)) AS dd_str_col, + DATE_DIFF('MONTH', CAST('1995-10-10 00:00:00' AS TIMESTAMP), CAST(o_orderdate AS TIMESTAMP)) AS dd_pd_col, + DATE_DIFF('YEAR', CAST(o_orderdate AS TIMESTAMP), CAST('1992-01-01 12:30:45' AS TIMESTAMP)) AS dd_col_dt, + DATE_DIFF('WEEK', CAST('1992-01-01' AS TIMESTAMP), CAST('1992-01-01 12:30:45' AS TIMESTAMP)) AS dd_dt_str, + ( + DAY_OF_WEEK(o_orderdate) % 7 + ) + 1 AS dow_col, + 3 AS dow_str1, + 4 AS dow_str2, + 5 AS dow_str3, + 6 AS dow_str4, + 0 AS dow_str5, + 1 AS dow_str6, + 2 AS dow_str7, + 3 AS dow_dt, + 2 AS dow_pd, + CASE + WHEN ( + ( + DAY_OF_WEEK(o_orderdate) % 7 + ) + 1 + ) = 1 + THEN 'Monday' + WHEN ( + ( + DAY_OF_WEEK(o_orderdate) % 7 + ) + 1 + ) = 2 + THEN 'Tuesday' + WHEN ( + ( + DAY_OF_WEEK(o_orderdate) % 7 + ) + 1 + ) = 3 + THEN 'Wednesday' + WHEN ( + ( + DAY_OF_WEEK(o_orderdate) % 7 + ) + 1 + ) = 4 + THEN 'Thursday' + WHEN ( + ( + DAY_OF_WEEK(o_orderdate) % 7 + ) + 1 + ) = 5 + THEN 'Friday' + WHEN ( + ( + DAY_OF_WEEK(o_orderdate) % 7 + ) + 1 + ) = 6 + THEN 'Saturday' + WHEN ( + ( + DAY_OF_WEEK(o_orderdate) % 7 + ) + 1 + ) = 7 + THEN 'Sunday' + END AS dayname_col, + 'Monday' AS dayname_str1, + 'Tuesday' AS dayname_str2, + 'Wednesday' AS dayname_str3, + 'Thursday' AS dayname_str4, + 'Friday' AS dayname_str5, + 'Saturday' AS dayname_str6, + 'Sunday' AS dayname_dt +FROM tpch.orders diff --git a/tests/test_sql_refsols/datetime_relative_trino.sql b/tests/test_sql_refsols/datetime_relative_trino.sql new file mode 100644 index 000000000..29f59ea99 --- /dev/null +++ b/tests/test_sql_refsols/datetime_relative_trino.sql @@ -0,0 +1,35 @@ +WITH _t0 AS ( + SELECT + o_orderdate + FROM tpch.orders + ORDER BY + o_custkey NULLS FIRST, + 1 NULLS FIRST + LIMIT 10 +) +SELECT + DATE_TRUNC('YEAR', CAST(o_orderdate AS TIMESTAMP)) AS d1, + DATE_TRUNC('MONTH', CAST(o_orderdate AS TIMESTAMP)) AS d2, + DATE_ADD( + 'SECOND', + 1, + DATE_ADD( + 'MINUTE', + -3, + DATE_ADD( + 'HOUR', + 5, + DATE_ADD( + 'DAY', + -7, + DATE_ADD('MONTH', 9, DATE_ADD('YEAR', -11, CAST(o_orderdate AS TIMESTAMP))) + ) + ) + ) + ) AS d3, + CAST('2025-07-04 12:00:00' AS TIMESTAMP) AS d4, + CAST('2025-07-04 12:58:00' AS TIMESTAMP) AS d5, + CAST('2025-07-26 02:45:25' AS TIMESTAMP) AS d6 +FROM _t0 +ORDER BY + o_orderdate NULLS FIRST diff --git a/tests/test_sql_refsols/datetime_sampler_trino.sql b/tests/test_sql_refsols/datetime_sampler_trino.sql new file mode 100644 index 000000000..1f818fb7f --- /dev/null +++ b/tests/test_sql_refsols/datetime_sampler_trino.sql @@ -0,0 +1,164 @@ +SELECT + CAST('2025-07-04 12:58:45' AS TIMESTAMP) AS _expr0, + CAST('2024-12-31 11:59:00' AS TIMESTAMP) AS _expr1, + CAST('2025-01-01' AS DATE) AS _expr2, + CAST('1999-03-14' AS DATE) AS _expr3, + CURRENT_TIMESTAMP AS _expr4, + CURRENT_TIMESTAMP AS _expr5, + CURRENT_TIMESTAMP AS _expr6, + CURRENT_TIMESTAMP AS _expr7, + CURRENT_TIMESTAMP AS _expr8, + CURRENT_TIMESTAMP AS _expr9, + CURRENT_TIMESTAMP AS _expr10, + CURRENT_TIMESTAMP AS _expr11, + CURRENT_TIMESTAMP AS _expr12, + CURRENT_TIMESTAMP AS _expr13, + CURRENT_TIMESTAMP AS _expr14, + CURRENT_TIMESTAMP AS _expr15, + CURRENT_TIMESTAMP AS _expr16, + CURRENT_TIMESTAMP AS _expr17, + CURRENT_TIMESTAMP AS _expr18, + CAST(o_orderdate AS TIMESTAMP) AS _expr19, + DATE_TRUNC('SECOND', CURRENT_TIMESTAMP) AS _expr20, + DATE_ADD('MONTH', -141, DATE_ADD('MINUTE', 8, DATE_TRUNC('YEAR', CURRENT_TIMESTAMP))) AS _expr21, + DATE_TRUNC('HOUR', DATE_TRUNC('SECOND', DATE_TRUNC('MONTH', CURRENT_TIMESTAMP))) AS _expr22, + DATE_TRUNC('HOUR', DATE_TRUNC('SECOND', DATE_TRUNC('HOUR', CURRENT_TIMESTAMP))) AS _expr23, + DATE_ADD('YEAR', 15, DATE_ADD('HOUR', -96, CURRENT_TIMESTAMP)) AS _expr24, + DATE_ADD( + 'MONTH', + 65, + DATE_TRUNC('MINUTE', DATE_ADD('YEAR', -3, DATE_TRUNC('YEAR', CURRENT_TIMESTAMP))) + ) AS _expr25, + DATE_TRUNC('YEAR', DATE_ADD('HOUR', -56, CAST(o_orderdate AS TIMESTAMP))) AS _expr26, + DATE_TRUNC('SECOND', DATE_TRUNC('MINUTE', DATE_ADD('DAY', -63, CURRENT_TIMESTAMP))) AS _expr27, + DATE_TRUNC('MONTH', CURRENT_TIMESTAMP) AS _expr28, + DATE_ADD('YEAR', 48, DATE_TRUNC('SECOND', DATE_ADD('HOUR', -312, CURRENT_TIMESTAMP))) AS _expr29, + DATE_ADD( + 'DAY', + -294, + DATE_ADD('MINUTE', 600, DATE_TRUNC('DAY', DATE_ADD('DAY', 75, CURRENT_TIMESTAMP))) + ) AS _expr30, + DATE_ADD('YEAR', -45, DATE_ADD('MONTH', 480, DATE_TRUNC('MONTH', CURRENT_TIMESTAMP))) AS _expr31, + DATE_TRUNC( + 'SECOND', + DATE_TRUNC('DAY', DATE_ADD('SECOND', -34, DATE_ADD('MINUTE', -270, CURRENT_TIMESTAMP))) + ) AS _expr32, + DATE_ADD('SECOND', 213, DATE_TRUNC('MONTH', CURRENT_TIMESTAMP)) AS _expr33, + DATE_ADD( + 'SECOND', + 344, + DATE_ADD('YEAR', 28, DATE_ADD('MINUTE', 13, DATE_TRUNC('MONTH', CURRENT_TIMESTAMP))) + ) AS _expr34, + DATE_TRUNC('DAY', CURRENT_TIMESTAMP) AS _expr35, + CAST('2116-01-01 00:49:00' AS TIMESTAMP) AS _expr36, + DATE_TRUNC('DAY', DATE_TRUNC('YEAR', CURRENT_TIMESTAMP)) AS _expr37, + DATE_TRUNC('YEAR', DATE_TRUNC('DAY', CURRENT_TIMESTAMP)) AS _expr38, + CAST('2025-07-01 00:22:00' AS TIMESTAMP) AS _expr39, + DATE_TRUNC('YEAR', CURRENT_TIMESTAMP) AS _expr40, + DATE_TRUNC( + 'YEAR', + DATE_ADD( + 'SECOND', + -160, + DATE_ADD('SECOND', 415, DATE_ADD('SECOND', 82, CAST(o_orderdate AS TIMESTAMP))) + ) + ) AS _expr41, + DATE_ADD('MONTH', 192, CURRENT_TIMESTAMP) AS _expr42, + DATE_ADD( + 'MINUTE', + 486, + DATE_TRUNC('HOUR', DATE_TRUNC('MINUTE', DATE_TRUNC('HOUR', CURRENT_TIMESTAMP))) + ) AS _expr43, + DATE_ADD('HOUR', -50, DATE_TRUNC('SECOND', CURRENT_TIMESTAMP)) AS _expr44, + DATE_TRUNC( + 'HOUR', + DATE_ADD('MONTH', -92, DATE_ADD('MONTH', 72, DATE_ADD('DAY', 297, CURRENT_TIMESTAMP))) + ) AS _expr45, + DATE_TRUNC('DAY', DATE_ADD('SECOND', 285, CURRENT_TIMESTAMP)) AS _expr46, + CAST('1999-05-15' AS DATE) AS _expr47, + DATE_ADD( + 'DAY', + -21, + DATE_TRUNC('MONTH', DATE_ADD('HOUR', 1, DATE_TRUNC('MONTH', CURRENT_TIMESTAMP))) + ) AS _expr48, + DATE_ADD('YEAR', 368, DATE_ADD('MINUTE', 212, CURRENT_TIMESTAMP)) AS _expr49, + CAST('2024-01-01' AS DATE) AS _expr50, + CAST('1999-03-14' AS DATE) AS _expr51, + DATE_ADD( + 'YEAR', + 196, + DATE_TRUNC('MINUTE', DATE_TRUNC('DAY', DATE_ADD('HOUR', -60, CURRENT_TIMESTAMP))) + ) AS _expr52, + DATE_ADD( + 'HOUR', + 29, + DATE_TRUNC('MINUTE', DATE_ADD('DAY', -385, DATE_ADD('HOUR', -40, CURRENT_TIMESTAMP))) + ) AS _expr53, + DATE_TRUNC('MINUTE', DATE_TRUNC('HOUR', DATE_ADD('DAY', 405, CURRENT_TIMESTAMP))) AS _expr54, + DATE_ADD( + 'MONTH', + 96, + DATE_ADD('YEAR', 98, DATE_TRUNC('SECOND', DATE_TRUNC('YEAR', CURRENT_TIMESTAMP))) + ) AS _expr55, + DATE_ADD( + 'SECOND', + 78, + DATE_TRUNC('DAY', DATE_TRUNC('SECOND', DATE_TRUNC('MINUTE', CURRENT_TIMESTAMP))) + ) AS _expr56, + DATE_ADD( + 'DAY', + 312, + DATE_ADD('MONTH', -104, DATE_ADD('MINUTE', 104, DATE_ADD('HOUR', 136, CURRENT_TIMESTAMP))) + ) AS _expr57, + DATE_ADD('SECOND', -135, DATE_ADD('MONTH', 45, CURRENT_TIMESTAMP)) AS _expr58, + YEAR(CURRENT_TIMESTAMP) AS _expr59, + 2025 AS _expr60, + 1999 AS _expr61, + MONTH(CURRENT_TIMESTAMP) AS _expr62, + 6 AS _expr63, + 3 AS _expr64, + DAY(CURRENT_TIMESTAMP) AS _expr65, + 4 AS _expr66, + 4 AS _expr67, + HOUR(CURRENT_TIMESTAMP) AS _expr68, + 0 AS _expr69, + 0 AS _expr70, + MINUTE(CURRENT_TIMESTAMP) AS _expr71, + 30 AS _expr72, + 0 AS _expr73, + SECOND(CURRENT_TIMESTAMP) AS _expr74, + 45 AS _expr75, + 0 AS _expr76, + DATE_DIFF('YEAR', CAST('2018-02-14 12:41:06' AS TIMESTAMP), CURRENT_TIMESTAMP) AS _expr77, + DATE_DIFF('YEAR', CAST(o_orderdate AS TIMESTAMP), CAST('2022-11-24' AS DATE)) AS _expr78, + DATE_DIFF('MONTH', CAST('2005-06-30' AS DATE), CAST('1999-03-14' AS TIMESTAMP)) AS _expr79, + DATE_DIFF('MONTH', CAST('2006-05-01 12:00:00' AS TIMESTAMP), CAST('2022-11-24' AS DATE)) AS _expr80, + DATE_DIFF('DAY', CURRENT_TIMESTAMP, CURRENT_TIMESTAMP) AS _expr81, + DATE_DIFF('DAY', CAST('1999-03-14' AS TIMESTAMP), CURRENT_TIMESTAMP) AS _expr82, + DATE_DIFF('HOUR', CURRENT_TIMESTAMP, CURRENT_TIMESTAMP) AS _expr83, + DATE_DIFF('HOUR', CAST('2005-06-30' AS DATE), CAST(o_orderdate AS TIMESTAMP)) AS _expr84, + DATE_DIFF('MINUTE', CURRENT_TIMESTAMP, CAST('2006-05-01 12:00:00' AS TIMESTAMP)) AS _expr85, + DATE_DIFF('MINUTE', CAST(o_orderdate AS TIMESTAMP), CAST('2021-01-01 07:35:00' AS TIMESTAMP)) AS _expr86, + DATE_DIFF('SECOND', CAST('2022-11-24' AS DATE), CAST('2021-01-01 07:35:00' AS TIMESTAMP)) AS _expr87, + DATE_DIFF('SECOND', CAST('2005-06-30' AS DATE), CAST('2018-02-14 12:41:06' AS TIMESTAMP)) AS _expr88, + DATE_DIFF('YEAR', CAST(o_orderdate AS TIMESTAMP), CAST('2006-05-01 12:00:00' AS TIMESTAMP)) AS _expr89, + DATE_DIFF('YEAR', CAST('2018-02-14 12:41:06' AS TIMESTAMP), CAST(o_orderdate AS TIMESTAMP)) AS _expr90, + DATE_DIFF('MONTH', CAST(o_orderdate AS TIMESTAMP), CAST('2019-07-04 11:30:00' AS TIMESTAMP)) AS _expr91, + DATE_DIFF( + 'MONTH', + CAST('2019-07-04 11:30:00' AS TIMESTAMP), + CAST('2018-02-14 12:41:06' AS TIMESTAMP) + ) AS _expr92, + DATE_DIFF('DAY', CURRENT_TIMESTAMP, CAST(o_orderdate AS TIMESTAMP)) AS _expr93, + DATE_DIFF('DAY', CAST('2019-07-04 11:30:00' AS TIMESTAMP), CURRENT_TIMESTAMP) AS _expr94, + DATE_DIFF('HOUR', CAST('2022-11-24' AS DATE), CAST('1999-03-14' AS TIMESTAMP)) AS _expr95, + DATE_DIFF( + 'HOUR', + CAST('2018-02-14 12:41:06' AS TIMESTAMP), + CAST('2020-12-31 00:31:06' AS TIMESTAMP) + ) AS _expr96, + DATE_DIFF('MINUTE', CAST('2005-06-30' AS DATE), CAST('2020-12-31 00:31:06' AS TIMESTAMP)) AS _expr97, + DATE_DIFF('MINUTE', CURRENT_TIMESTAMP, CAST('2018-02-14 12:41:06' AS TIMESTAMP)) AS _expr98, + DATE_DIFF('SECOND', CURRENT_TIMESTAMP, CAST('1999-03-14' AS TIMESTAMP)) AS _expr99, + DATE_DIFF('SECOND', CAST('2022-11-24' AS DATE), CAST('2019-07-04 11:30:00' AS TIMESTAMP)) AS _expr100 +FROM tpch.orders diff --git a/tests/test_sql_refsols/deep_best_analysis_trino.sql b/tests/test_sql_refsols/deep_best_analysis_trino.sql new file mode 100644 index 000000000..5ea23ecda --- /dev/null +++ b/tests/test_sql_refsols/deep_best_analysis_trino.sql @@ -0,0 +1,109 @@ +WITH _t2 AS ( + SELECT + c_acctbal, + c_custkey, + c_nationkey + FROM tpch.customer +), _t AS ( + SELECT + c_acctbal, + c_custkey, + c_nationkey, + ROW_NUMBER() OVER (PARTITION BY c_nationkey ORDER BY c_acctbal DESC NULLS FIRST, c_custkey) AS _w + FROM _t2 +), _s4 AS ( + SELECT + n_nationkey, + n_regionkey + FROM tpch.nation +), _t_2 AS ( + SELECT + _s5.c_acctbal, + _s4.n_nationkey, + _s4.n_regionkey, + ROW_NUMBER() OVER (PARTITION BY _s4.n_regionkey ORDER BY _s5.c_acctbal DESC NULLS FIRST, _s5.c_custkey) AS _w + FROM _s4 AS _s4 + JOIN _t2 AS _s5 + ON _s4.n_nationkey = _s5.c_nationkey +), _s7 AS ( + SELECT + c_acctbal, + n_nationkey, + n_regionkey + FROM _t_2 + WHERE + _w = 1 +), _t_3 AS ( + SELECT + _s8.n_nationkey, + _s8.n_regionkey, + partsupp.ps_availqty, + partsupp.ps_partkey, + supplier.s_nationkey, + supplier.s_suppkey, + ROW_NUMBER() OVER (PARTITION BY _s8.n_regionkey ORDER BY partsupp.ps_availqty DESC NULLS FIRST, partsupp.ps_partkey) AS _w + FROM _s4 AS _s8 + JOIN tpch.supplier AS supplier + ON _s8.n_nationkey = supplier.s_nationkey + JOIN tpch.partsupp AS partsupp + ON partsupp.ps_suppkey = supplier.s_suppkey +), _t_4 AS ( + SELECT + n_nationkey, + n_regionkey, + ps_availqty, + ps_partkey, + s_nationkey, + s_suppkey, + ROW_NUMBER() OVER (PARTITION BY n_regionkey ORDER BY ps_availqty DESC NULLS FIRST, s_suppkey) AS _w + FROM _t_3 + WHERE + _w = 1 +), _s13 AS ( + SELECT + n_nationkey, + n_regionkey, + ps_availqty, + ps_partkey, + s_suppkey + FROM _t_4 + WHERE + _w = 1 AND n_nationkey = s_nationkey +), _t_5 AS ( + SELECT + c_custkey, + c_nationkey, + ROW_NUMBER() OVER (ORDER BY c_acctbal DESC NULLS FIRST, c_custkey) AS _w + FROM _t2 +), _s15 AS ( + SELECT + c_custkey, + c_nationkey + FROM _t_5 + WHERE + _w = 1 +) +SELECT + region.r_name, + nation.n_name, + _t.c_custkey AS c_key, + _t.c_acctbal AS c_bal, + _s7.c_acctbal AS cr_bal, + _s13.s_suppkey AS s_key, + _s13.ps_partkey AS p_key, + _s13.ps_availqty AS p_qty, + _s15.c_custkey AS cg_key +FROM tpch.region AS region +JOIN tpch.nation AS nation + ON nation.n_regionkey = region.r_regionkey +JOIN _t AS _t + ON _t._w = 1 AND _t.c_nationkey = nation.n_nationkey +LEFT JOIN _s7 AS _s7 + ON _s7.n_nationkey = nation.n_nationkey AND _s7.n_regionkey = region.r_regionkey +LEFT JOIN _s13 AS _s13 + ON _s13.n_nationkey = nation.n_nationkey AND _s13.n_regionkey = region.r_regionkey +LEFT JOIN _s15 AS _s15 + ON _s15.c_nationkey = nation.n_nationkey +ORDER BY + 2 NULLS FIRST +LIMIT 10 diff --git a/tests/test_sql_refsols/defog_academic_gen10_trino.sql b/tests/test_sql_refsols/defog_academic_gen10_trino.sql new file mode 100644 index 000000000..1626c5df3 --- /dev/null +++ b/tests/test_sql_refsols/defog_academic_gen10_trino.sql @@ -0,0 +1,5 @@ +SELECT + title +FROM main.publication +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/defog_academic_gen11_trino.sql b/tests/test_sql_refsols/defog_academic_gen11_trino.sql new file mode 100644 index 000000000..91deda330 --- /dev/null +++ b/tests/test_sql_refsols/defog_academic_gen11_trino.sql @@ -0,0 +1,13 @@ +WITH _s0 AS ( + SELECT + COUNT(*) AS n_rows + FROM main.publication +), _s1 AS ( + SELECT + COUNT(*) AS n_rows + FROM main.author +) +SELECT + CAST(_s0.n_rows AS DOUBLE) / NULLIF(_s1.n_rows, 0) AS publication_to_author_ratio +FROM _s0 AS _s0 +CROSS JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/defog_academic_gen12_trino.sql b/tests/test_sql_refsols/defog_academic_gen12_trino.sql new file mode 100644 index 000000000..db47a635b --- /dev/null +++ b/tests/test_sql_refsols/defog_academic_gen12_trino.sql @@ -0,0 +1,3 @@ +SELECT + CAST(SUM(NOT cid IS NULL) AS DOUBLE) / NULLIF(SUM(NOT jid IS NULL), 0) AS ratio +FROM main.publication diff --git a/tests/test_sql_refsols/defog_academic_gen13_trino.sql b/tests/test_sql_refsols/defog_academic_gen13_trino.sql new file mode 100644 index 000000000..9ded51138 --- /dev/null +++ b/tests/test_sql_refsols/defog_academic_gen13_trino.sql @@ -0,0 +1,23 @@ +WITH _s1 AS ( + SELECT + did, + COUNT(*) AS n_rows + FROM main.domain_publication + GROUP BY + 1 +), _s3 AS ( + SELECT + did, + COUNT(*) AS n_rows + FROM main.domain_keyword + GROUP BY + 1 +) +SELECT + domain.did AS domain_id, + CAST(COALESCE(_s1.n_rows, 0) AS DOUBLE) / NULLIF(_s3.n_rows, 0) AS ratio +FROM main.domain AS domain +LEFT JOIN _s1 AS _s1 + ON _s1.did = domain.did +LEFT JOIN _s3 AS _s3 + ON _s3.did = domain.did diff --git a/tests/test_sql_refsols/defog_academic_gen14_trino.sql b/tests/test_sql_refsols/defog_academic_gen14_trino.sql new file mode 100644 index 000000000..b41d4b069 --- /dev/null +++ b/tests/test_sql_refsols/defog_academic_gen14_trino.sql @@ -0,0 +1,8 @@ +SELECT + year, + COUNT(*) AS num_publications, + COUNT(DISTINCT jid) AS num_journals, + CAST(COUNT(*) AS DOUBLE) / NULLIF(COUNT(DISTINCT jid), 0) AS ratio +FROM main.publication +GROUP BY + 1 diff --git a/tests/test_sql_refsols/defog_academic_gen15_trino.sql b/tests/test_sql_refsols/defog_academic_gen15_trino.sql new file mode 100644 index 000000000..c12bc5262 --- /dev/null +++ b/tests/test_sql_refsols/defog_academic_gen15_trino.sql @@ -0,0 +1,18 @@ +WITH _t1 AS ( + SELECT + ARBITRARY(organization.continent) AS anything_continent, + COUNT(author.oid) AS count_oid + FROM main.organization AS organization + LEFT JOIN main.author AS author + ON author.oid = organization.oid + GROUP BY + organization.oid +) +SELECT + anything_continent AS continent, + CAST(COALESCE(SUM(count_oid), 0) AS DOUBLE) / COUNT(*) AS ratio +FROM _t1 +GROUP BY + 1 +ORDER BY + 2 DESC diff --git a/tests/test_sql_refsols/defog_academic_gen16_trino.sql b/tests/test_sql_refsols/defog_academic_gen16_trino.sql new file mode 100644 index 000000000..08a3f601e --- /dev/null +++ b/tests/test_sql_refsols/defog_academic_gen16_trino.sql @@ -0,0 +1,19 @@ +WITH _s3 AS ( + SELECT + writes.aid, + COUNT(DISTINCT publication.pid) AS ndistinct_pid + FROM main.writes AS writes + JOIN main.publication AS publication + ON publication.pid = writes.pid AND publication.year = 2021 + GROUP BY + 1 +) +SELECT + author.name, + _s3.ndistinct_pid AS count_publication +FROM main.author AS author +JOIN _s3 AS _s3 + ON _s3.aid = author.aid +ORDER BY + 2 DESC +LIMIT 1 diff --git a/tests/test_sql_refsols/defog_academic_gen17_trino.sql b/tests/test_sql_refsols/defog_academic_gen17_trino.sql new file mode 100644 index 000000000..75eef2a11 --- /dev/null +++ b/tests/test_sql_refsols/defog_academic_gen17_trino.sql @@ -0,0 +1,11 @@ +SELECT + ARBITRARY(conference.name) AS name, + COUNT(publication.cid) AS count_publications +FROM main.conference AS conference +LEFT JOIN main.publication AS publication + ON conference.cid = publication.cid +GROUP BY + conference.cid +ORDER BY + 2 DESC, + 1 DESC diff --git a/tests/test_sql_refsols/defog_academic_gen18_trino.sql b/tests/test_sql_refsols/defog_academic_gen18_trino.sql new file mode 100644 index 000000000..5e74d9732 --- /dev/null +++ b/tests/test_sql_refsols/defog_academic_gen18_trino.sql @@ -0,0 +1,17 @@ +WITH _s1 AS ( + SELECT + jid, + COUNT(*) AS n_rows + FROM main.publication + GROUP BY + 1 +) +SELECT + journal.name, + journal.jid AS journal_id, + COALESCE(_s1.n_rows, 0) AS num_publications +FROM main.journal AS journal +LEFT JOIN _s1 AS _s1 + ON _s1.jid = journal.jid +ORDER BY + 3 DESC diff --git a/tests/test_sql_refsols/defog_academic_gen19_trino.sql b/tests/test_sql_refsols/defog_academic_gen19_trino.sql new file mode 100644 index 000000000..c36c9104a --- /dev/null +++ b/tests/test_sql_refsols/defog_academic_gen19_trino.sql @@ -0,0 +1,11 @@ +SELECT + ARBITRARY(conference.name) AS name, + COUNT(publication.cid) AS num_publications +FROM main.conference AS conference +LEFT JOIN main.publication AS publication + ON conference.cid = publication.cid +GROUP BY + conference.cid +ORDER BY + 2 DESC, + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/defog_academic_gen1_trino.sql b/tests/test_sql_refsols/defog_academic_gen1_trino.sql new file mode 100644 index 000000000..dcbb778aa --- /dev/null +++ b/tests/test_sql_refsols/defog_academic_gen1_trino.sql @@ -0,0 +1,18 @@ +WITH _t0 AS ( + SELECT + writes.aid, + COUNT(DISTINCT domain_publication.did) AS ndistinct_did + FROM main.writes AS writes + JOIN main.domain_publication AS domain_publication + ON domain_publication.pid = writes.pid + JOIN main.domain AS domain + ON domain.did = domain_publication.did + AND domain.name IN ('Data Science', 'Machine Learning') + GROUP BY + 1 +) +SELECT + author.name +FROM main.author AS author +JOIN _t0 AS _t0 + ON _t0.aid = author.aid AND _t0.ndistinct_did = 2 diff --git a/tests/test_sql_refsols/defog_academic_gen20_trino.sql b/tests/test_sql_refsols/defog_academic_gen20_trino.sql new file mode 100644 index 000000000..d22bd2904 --- /dev/null +++ b/tests/test_sql_refsols/defog_academic_gen20_trino.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM main.publication AS publication +JOIN main.journal AS journal + ON STARTS_WITH(LOWER(journal.name), 'j') AND journal.jid = publication.jid diff --git a/tests/test_sql_refsols/defog_academic_gen21_trino.sql b/tests/test_sql_refsols/defog_academic_gen21_trino.sql new file mode 100644 index 000000000..e0c5c9c96 --- /dev/null +++ b/tests/test_sql_refsols/defog_academic_gen21_trino.sql @@ -0,0 +1,21 @@ +WITH _u_0 AS ( + SELECT + author.oid AS _u_1 + FROM main.author AS author + JOIN main.writes AS writes + ON author.aid = writes.aid + JOIN main.domain_publication AS domain_publication + ON domain_publication.pid = writes.pid + JOIN main.domain AS domain + ON domain.did = domain_publication.did AND domain.name = 'Machine Learning' + GROUP BY + 1 +) +SELECT + organization.name AS oranization_name, + organization.oid AS organization_id +FROM main.organization AS organization +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = organization.oid +WHERE + NOT _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/defog_academic_gen22_trino.sql b/tests/test_sql_refsols/defog_academic_gen22_trino.sql new file mode 100644 index 000000000..53ec2291e --- /dev/null +++ b/tests/test_sql_refsols/defog_academic_gen22_trino.sql @@ -0,0 +1,24 @@ +WITH _s0 AS ( + SELECT + aid, + did + FROM main.domain_author +), _u_0 AS ( + SELECT + _s0.aid AS _u_1 + FROM _s0 AS _s0 + JOIN _s0 AS _s1 + ON _s0.did = _s1.did + JOIN main.author AS author + ON LOWER(author.name) LIKE '%martin%' AND _s1.aid = author.aid + GROUP BY + 1 +) +SELECT + author.name, + author.aid AS author_id +FROM main.author AS author +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = author.aid +WHERE + NOT _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/defog_academic_gen23_trino.sql b/tests/test_sql_refsols/defog_academic_gen23_trino.sql new file mode 100644 index 000000000..4104c0016 --- /dev/null +++ b/tests/test_sql_refsols/defog_academic_gen23_trino.sql @@ -0,0 +1,15 @@ +WITH _u_0 AS ( + SELECT + oid AS _u_1 + FROM main.organization + GROUP BY + 1 +) +SELECT + author.name, + author.aid AS author_id +FROM main.author AS author +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = author.oid +WHERE + _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/defog_academic_gen24_trino.sql b/tests/test_sql_refsols/defog_academic_gen24_trino.sql new file mode 100644 index 000000000..8b9e27d18 --- /dev/null +++ b/tests/test_sql_refsols/defog_academic_gen24_trino.sql @@ -0,0 +1,22 @@ +WITH _u_0 AS ( + SELECT + domain_conference.cid AS _u_1, + writes.pid AS _u_2 + FROM main.writes AS writes + JOIN main.domain_author AS domain_author + ON domain_author.aid = writes.aid + JOIN main.domain AS domain + ON LOWER(domain.name) LIKE '%sociology%' AND domain.did = domain_author.did + JOIN main.domain_conference AS domain_conference + ON domain.did = domain_conference.did + GROUP BY + 1, + 2 +) +SELECT + publication.title +FROM main.publication AS publication +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = publication.cid AND _u_0._u_2 = publication.pid +WHERE + NOT _u_0._u_1 IS NULL AND publication.year = 2020 diff --git a/tests/test_sql_refsols/defog_academic_gen25_trino.sql b/tests/test_sql_refsols/defog_academic_gen25_trino.sql new file mode 100644 index 000000000..41c74bcd0 --- /dev/null +++ b/tests/test_sql_refsols/defog_academic_gen25_trino.sql @@ -0,0 +1,9 @@ +SELECT DISTINCT + author.name AS author_name +FROM main.author AS author +JOIN main.writes AS writes + ON author.aid = writes.aid +JOIN main.domain_publication AS domain_publication + ON domain_publication.pid = writes.pid +JOIN main.domain AS domain + ON domain.did = domain_publication.did AND domain.name = 'Computer Science' diff --git a/tests/test_sql_refsols/defog_academic_gen2_trino.sql b/tests/test_sql_refsols/defog_academic_gen2_trino.sql new file mode 100644 index 000000000..fd86a1bce --- /dev/null +++ b/tests/test_sql_refsols/defog_academic_gen2_trino.sql @@ -0,0 +1,16 @@ +WITH _s3 AS ( + SELECT + writes.aid, + SUM(publication.citation_num) AS sum_citation_num + FROM main.writes AS writes + JOIN main.publication AS publication + ON publication.pid = writes.pid + GROUP BY + 1 +) +SELECT + author.name, + COALESCE(_s3.sum_citation_num, 0) AS total_citations +FROM main.author AS author +JOIN _s3 AS _s3 + ON _s3.aid = author.aid diff --git a/tests/test_sql_refsols/defog_academic_gen3_trino.sql b/tests/test_sql_refsols/defog_academic_gen3_trino.sql new file mode 100644 index 000000000..4a1ae7ad1 --- /dev/null +++ b/tests/test_sql_refsols/defog_academic_gen3_trino.sql @@ -0,0 +1,6 @@ +SELECT + year, + COUNT(*) AS _expr0 +FROM main.publication +GROUP BY + 1 diff --git a/tests/test_sql_refsols/defog_academic_gen4_trino.sql b/tests/test_sql_refsols/defog_academic_gen4_trino.sql new file mode 100644 index 000000000..51b5364c7 --- /dev/null +++ b/tests/test_sql_refsols/defog_academic_gen4_trino.sql @@ -0,0 +1,16 @@ +WITH _s3 AS ( + SELECT + domain_publication.did, + AVG(publication.reference_num) AS avg_reference_num + FROM main.domain_publication AS domain_publication + JOIN main.publication AS publication + ON domain_publication.pid = publication.pid + GROUP BY + 1 +) +SELECT + domain.name, + _s3.avg_reference_num AS average_references +FROM main.domain AS domain +LEFT JOIN _s3 AS _s3 + ON _s3.did = domain.did diff --git a/tests/test_sql_refsols/defog_academic_gen5_trino.sql b/tests/test_sql_refsols/defog_academic_gen5_trino.sql new file mode 100644 index 000000000..8dc5824dd --- /dev/null +++ b/tests/test_sql_refsols/defog_academic_gen5_trino.sql @@ -0,0 +1,6 @@ +SELECT + year, + AVG(citation_num) AS average_citations +FROM main.publication +GROUP BY + 1 diff --git a/tests/test_sql_refsols/defog_academic_gen6_trino.sql b/tests/test_sql_refsols/defog_academic_gen6_trino.sql new file mode 100644 index 000000000..5e08ea7b2 --- /dev/null +++ b/tests/test_sql_refsols/defog_academic_gen6_trino.sql @@ -0,0 +1,6 @@ +SELECT + title +FROM main.publication +ORDER BY + citation_num DESC +LIMIT 1 diff --git a/tests/test_sql_refsols/defog_academic_gen7_trino.sql b/tests/test_sql_refsols/defog_academic_gen7_trino.sql new file mode 100644 index 000000000..a1b749d7e --- /dev/null +++ b/tests/test_sql_refsols/defog_academic_gen7_trino.sql @@ -0,0 +1,18 @@ +WITH _s1 AS ( + SELECT + did, + COUNT(DISTINCT aid) AS ndistinct_aid + FROM main.domain_author + GROUP BY + 1 +) +SELECT + domain.name, + COALESCE(_s1.ndistinct_aid, 0) AS author_count +FROM main.domain AS domain +LEFT JOIN _s1 AS _s1 + ON _s1.did = domain.did +ORDER BY + 2 DESC, + 1 DESC +LIMIT 5 diff --git a/tests/test_sql_refsols/defog_academic_gen8_trino.sql b/tests/test_sql_refsols/defog_academic_gen8_trino.sql new file mode 100644 index 000000000..eb520cb36 --- /dev/null +++ b/tests/test_sql_refsols/defog_academic_gen8_trino.sql @@ -0,0 +1,6 @@ +SELECT + title +FROM main.publication +ORDER BY + reference_num DESC +LIMIT 3 diff --git a/tests/test_sql_refsols/defog_academic_gen9_trino.sql b/tests/test_sql_refsols/defog_academic_gen9_trino.sql new file mode 100644 index 000000000..66a2dc9ba --- /dev/null +++ b/tests/test_sql_refsols/defog_academic_gen9_trino.sql @@ -0,0 +1,7 @@ +SELECT + title, + citation_num +FROM main.publication +ORDER BY + 2 DESC +LIMIT 3 diff --git a/tests/test_sql_refsols/defog_broker_adv10_trino.sql b/tests/test_sql_refsols/defog_broker_adv10_trino.sql new file mode 100644 index 000000000..bcc41bffe --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_adv10_trino.sql @@ -0,0 +1,24 @@ +WITH _s1 AS ( + SELECT + MONTH(CAST(sbtxdatetime AS TIMESTAMP)) AS month_sbtxdatetime, + YEAR(CAST(sbtxdatetime AS TIMESTAMP)) AS year_sbtxdatetime, + sbtxcustid, + COUNT(*) AS n_rows + FROM main.sbtransaction + GROUP BY + 1, + 2, + 3 +) +SELECT + sbcustomer.sbcustid AS _id, + sbcustomer.sbcustname AS name, + COALESCE(_s1.n_rows, 0) AS num_transactions +FROM main.sbcustomer AS sbcustomer +LEFT JOIN _s1 AS _s1 + ON _s1.month_sbtxdatetime = MONTH(CAST(sbcustomer.sbcustjoindate AS TIMESTAMP)) + AND _s1.sbtxcustid = sbcustomer.sbcustid + AND _s1.year_sbtxdatetime = YEAR(CAST(sbcustomer.sbcustjoindate AS TIMESTAMP)) +ORDER BY + 3 DESC +LIMIT 1 diff --git a/tests/test_sql_refsols/defog_broker_adv11_trino.sql b/tests/test_sql_refsols/defog_broker_adv11_trino.sql new file mode 100644 index 000000000..e1dc3aa43 --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_adv11_trino.sql @@ -0,0 +1,17 @@ +WITH _u_0 AS ( + SELECT + sbtransaction.sbtxcustid AS _u_1 + FROM main.sbtransaction AS sbtransaction + JOIN main.sbticker AS sbticker + ON sbticker.sbtickerid = sbtransaction.sbtxtickerid + AND sbticker.sbtickersymbol IN ('AMZN', 'AAPL', 'GOOGL', 'META', 'NFLX') + GROUP BY + 1 +) +SELECT + COUNT(*) AS n_customers +FROM main.sbcustomer AS sbcustomer +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = sbcustomer.sbcustid +WHERE + NOT _u_0._u_1 IS NULL AND sbcustomer.sbcustemail LIKE '%.com' diff --git a/tests/test_sql_refsols/defog_broker_adv12_trino.sql b/tests/test_sql_refsols/defog_broker_adv12_trino.sql new file mode 100644 index 000000000..17d3b056c --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_adv12_trino.sql @@ -0,0 +1,8 @@ +SELECT + COUNT(*) AS n_customers +FROM main.sbcustomer +WHERE + ( + LOWER(sbcustname) LIKE '%ez' OR STARTS_WITH(LOWER(sbcustname), 'j') + ) + AND LOWER(sbcuststate) LIKE '%a' diff --git a/tests/test_sql_refsols/defog_broker_adv13_trino.sql b/tests/test_sql_refsols/defog_broker_adv13_trino.sql new file mode 100644 index 000000000..011c5ba86 --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_adv13_trino.sql @@ -0,0 +1,8 @@ +SELECT + sbcustcountry AS cust_country, + COUNT(*) AS TAC +FROM main.sbcustomer +WHERE + sbcustjoindate >= CAST('2023-01-01' AS DATE) +GROUP BY + 1 diff --git a/tests/test_sql_refsols/defog_broker_adv14_trino.sql b/tests/test_sql_refsols/defog_broker_adv14_trino.sql new file mode 100644 index 000000000..8f2a5d689 --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_adv14_trino.sql @@ -0,0 +1,19 @@ +WITH _s1 AS ( + SELECT + sbdptickerid, + COUNT(sbdpclose) AS count_sbdpclose, + SUM(sbdpclose) AS sum_sbdpclose + FROM main.sbdailyprice + WHERE + DATE_DIFF('DAY', CAST(sbdpdate AS TIMESTAMP), CURRENT_TIMESTAMP) <= 7 + GROUP BY + 1 +) +SELECT + sbticker.sbtickertype AS ticker_type, + CAST(SUM(_s1.sum_sbdpclose) AS DOUBLE) / SUM(_s1.count_sbdpclose) AS ACP +FROM main.sbticker AS sbticker +JOIN _s1 AS _s1 + ON _s1.sbdptickerid = sbticker.sbtickerid +GROUP BY + 1 diff --git a/tests/test_sql_refsols/defog_broker_adv15_trino.sql b/tests/test_sql_refsols/defog_broker_adv15_trino.sql new file mode 100644 index 000000000..432af2b1d --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_adv15_trino.sql @@ -0,0 +1,10 @@ +SELECT + sbcustcountry AS country, + 100 * ( + CAST(COALESCE(SUM(sbcuststatus = 'active'), 0) AS DOUBLE) / COUNT(*) + ) AS ar +FROM main.sbcustomer +WHERE + sbcustjoindate <= '2022-12-31' AND sbcustjoindate >= '2022-01-01' +GROUP BY + 1 diff --git a/tests/test_sql_refsols/defog_broker_adv16_trino.sql b/tests/test_sql_refsols/defog_broker_adv16_trino.sql new file mode 100644 index 000000000..29f888f82 --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_adv16_trino.sql @@ -0,0 +1,23 @@ +WITH _s1 AS ( + SELECT + sbtxtickerid, + SUM(sbtxtax + sbtxcommission) AS sum_expr, + SUM(sbtxamount) AS sum_sbtxamount + FROM main.sbtransaction + WHERE + sbtxdatetime >= DATE_ADD('MONTH', -1, CURRENT_TIMESTAMP) AND sbtxtype = 'sell' + GROUP BY + 1 +) +SELECT + sbticker.sbtickersymbol AS symbol, + ( + 100.0 * ( + COALESCE(_s1.sum_sbtxamount, 0) - COALESCE(_s1.sum_expr, 0) + ) + ) / NULLIF(_s1.sum_sbtxamount, 0) AS SPM +FROM main.sbticker AS sbticker +JOIN _s1 AS _s1 + ON _s1.sbtxtickerid = sbticker.sbtickerid +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/defog_broker_adv1_trino.sql b/tests/test_sql_refsols/defog_broker_adv1_trino.sql new file mode 100644 index 000000000..82fa342df --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_adv1_trino.sql @@ -0,0 +1,17 @@ +WITH _s1 AS ( + SELECT + sbtxcustid, + SUM(sbtxamount) AS sum_sbtxamount + FROM main.sbtransaction + GROUP BY + 1 +) +SELECT + sbcustomer.sbcustname AS name, + COALESCE(_s1.sum_sbtxamount, 0) AS total_amount +FROM main.sbcustomer AS sbcustomer +LEFT JOIN _s1 AS _s1 + ON _s1.sbtxcustid = sbcustomer.sbcustid +ORDER BY + 2 DESC +LIMIT 5 diff --git a/tests/test_sql_refsols/defog_broker_adv2_trino.sql b/tests/test_sql_refsols/defog_broker_adv2_trino.sql new file mode 100644 index 000000000..ce670556c --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_adv2_trino.sql @@ -0,0 +1,20 @@ +WITH _s1 AS ( + SELECT + sbtxtickerid, + COUNT(*) AS n_rows + FROM main.sbtransaction + WHERE + sbtxdatetime >= DATE_TRUNC('DAY', DATE_ADD('DAY', -10, CURRENT_TIMESTAMP)) + AND sbtxtype = 'buy' + GROUP BY + 1 +) +SELECT + sbticker.sbtickersymbol AS symbol, + COALESCE(_s1.n_rows, 0) AS tx_count +FROM main.sbticker AS sbticker +LEFT JOIN _s1 AS _s1 + ON _s1.sbtxtickerid = sbticker.sbtickerid +ORDER BY + 2 DESC +LIMIT 2 diff --git a/tests/test_sql_refsols/defog_broker_adv3_trino.sql b/tests/test_sql_refsols/defog_broker_adv3_trino.sql new file mode 100644 index 000000000..bd8a6ff2b --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_adv3_trino.sql @@ -0,0 +1,19 @@ +WITH _t1 AS ( + SELECT + sbtxcustid, + COUNT(*) AS n_rows, + SUM(sbtxstatus = 'success') AS sum_expr + FROM main.sbtransaction + GROUP BY + 1 +) +SELECT + sbcustomer.sbcustname AS name, + ( + 100.0 * COALESCE(_t1.sum_expr, 0) + ) / _t1.n_rows AS success_rate +FROM main.sbcustomer AS sbcustomer +JOIN _t1 AS _t1 + ON _t1.n_rows >= 5 AND _t1.sbtxcustid = sbcustomer.sbcustid +ORDER BY + 2 NULLS FIRST diff --git a/tests/test_sql_refsols/defog_broker_adv4_trino.sql b/tests/test_sql_refsols/defog_broker_adv4_trino.sql new file mode 100644 index 000000000..54cfd9b6b --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_adv4_trino.sql @@ -0,0 +1,20 @@ +WITH _s1 AS ( + SELECT + sbdptickerid, + MAX(sbdphigh) AS max_sbdphigh, + MIN(sbdplow) AS min_sbdplow + FROM main.sbdailyprice + WHERE + sbdpdate <= CAST('2023-04-04' AS DATE) AND sbdpdate >= CAST('2023-04-01' AS DATE) + GROUP BY + 1 +) +SELECT + sbticker.sbtickersymbol AS symbol, + _s1.max_sbdphigh - _s1.min_sbdplow AS price_change +FROM main.sbticker AS sbticker +LEFT JOIN _s1 AS _s1 + ON _s1.sbdptickerid = sbticker.sbtickerid +ORDER BY + 2 DESC +LIMIT 3 diff --git a/tests/test_sql_refsols/defog_broker_adv5_trino.sql b/tests/test_sql_refsols/defog_broker_adv5_trino.sql new file mode 100644 index 000000000..d4905a008 --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_adv5_trino.sql @@ -0,0 +1,46 @@ +WITH _s0 AS ( + SELECT + CONCAT_WS( + '-', + YEAR(CAST(sbdpdate AS TIMESTAMP)), + LPAD(MONTH(CAST(sbdpdate AS TIMESTAMP)), 2, '0') + ) AS month, + sbdptickerid, + COUNT(sbdpclose) AS count_sbdpclose, + MAX(sbdphigh) AS max_sbdphigh, + MIN(sbdplow) AS min_sbdplow, + SUM(sbdpclose) AS sum_sbdpclose + FROM main.sbdailyprice + GROUP BY + 1, + 2 +), _t0 AS ( + SELECT + _s0.month, + sbticker.sbtickersymbol, + MAX(_s0.max_sbdphigh) AS max_max_sbdphigh, + MIN(_s0.min_sbdplow) AS min_min_sbdplow, + SUM(_s0.count_sbdpclose) AS sum_count_sbdpclose, + SUM(_s0.sum_sbdpclose) AS sum_sum_sbdpclose + FROM _s0 AS _s0 + JOIN main.sbticker AS sbticker + ON _s0.sbdptickerid = sbticker.sbtickerid + GROUP BY + 1, + 2 +) +SELECT + sbtickersymbol AS symbol, + month, + CAST(sum_sum_sbdpclose AS DOUBLE) / sum_count_sbdpclose AS avg_close, + max_max_sbdphigh AS max_high, + min_min_sbdplow AS min_low, + CAST(( + ( + CAST(sum_sum_sbdpclose AS DOUBLE) / sum_count_sbdpclose + ) - LAG(CAST(sum_sum_sbdpclose AS DOUBLE) / sum_count_sbdpclose, 1) OVER (PARTITION BY sbtickersymbol ORDER BY month) + ) AS DOUBLE) / NULLIF( + LAG(CAST(sum_sum_sbdpclose AS DOUBLE) / sum_count_sbdpclose, 1) OVER (PARTITION BY sbtickersymbol ORDER BY month), + 0 + ) AS momc +FROM _t0 diff --git a/tests/test_sql_refsols/defog_broker_adv6_trino.sql b/tests/test_sql_refsols/defog_broker_adv6_trino.sql new file mode 100644 index 000000000..74a9df6e7 --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_adv6_trino.sql @@ -0,0 +1,17 @@ +WITH _s1 AS ( + SELECT + sbtxcustid, + COUNT(*) AS n_rows, + SUM(sbtxamount) AS sum_sbtxamount + FROM main.sbtransaction + GROUP BY + 1 +) +SELECT + sbcustomer.sbcustname AS name, + _s1.n_rows AS num_tx, + COALESCE(_s1.sum_sbtxamount, 0) AS total_amount, + RANK() OVER (ORDER BY COALESCE(_s1.sum_sbtxamount, 0) DESC NULLS FIRST) AS cust_rank +FROM main.sbcustomer AS sbcustomer +JOIN _s1 AS _s1 + ON _s1.sbtxcustid = sbcustomer.sbcustid diff --git a/tests/test_sql_refsols/defog_broker_adv7_trino.sql b/tests/test_sql_refsols/defog_broker_adv7_trino.sql new file mode 100644 index 000000000..c823fc9ab --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_adv7_trino.sql @@ -0,0 +1,40 @@ +WITH _s2 AS ( + SELECT + CONCAT_WS( + '-', + YEAR(CAST(sbcustjoindate AS TIMESTAMP)), + LPAD(MONTH(CAST(sbcustjoindate AS TIMESTAMP)), 2, '0') + ) AS month, + COUNT(*) AS n_rows + FROM main.sbcustomer + WHERE + sbcustjoindate < DATE_TRUNC('MONTH', CURRENT_TIMESTAMP) + AND sbcustjoindate >= DATE_TRUNC('MONTH', DATE_ADD('MONTH', -6, CURRENT_TIMESTAMP)) + GROUP BY + 1 +), _s3 AS ( + SELECT + CONCAT_WS( + '-', + YEAR(CAST(sbcustomer.sbcustjoindate AS TIMESTAMP)), + LPAD(MONTH(CAST(sbcustomer.sbcustjoindate AS TIMESTAMP)), 2, '0') + ) AS month, + AVG(sbtransaction.sbtxamount) AS avg_sbtxamount + FROM main.sbcustomer AS sbcustomer + JOIN main.sbtransaction AS sbtransaction + ON MONTH(CAST(sbcustomer.sbcustjoindate AS TIMESTAMP)) = MONTH(CAST(sbtransaction.sbtxdatetime AS TIMESTAMP)) + AND YEAR(CAST(sbcustomer.sbcustjoindate AS TIMESTAMP)) = YEAR(CAST(sbtransaction.sbtxdatetime AS TIMESTAMP)) + AND sbcustomer.sbcustid = sbtransaction.sbtxcustid + WHERE + sbcustomer.sbcustjoindate < DATE_TRUNC('MONTH', CURRENT_TIMESTAMP) + AND sbcustomer.sbcustjoindate >= DATE_TRUNC('MONTH', DATE_ADD('MONTH', -6, CURRENT_TIMESTAMP)) + GROUP BY + 1 +) +SELECT + _s2.month, + _s2.n_rows AS customer_signups, + _s3.avg_sbtxamount AS avg_tx_amount +FROM _s2 AS _s2 +LEFT JOIN _s3 AS _s3 + ON _s2.month = _s3.month diff --git a/tests/test_sql_refsols/defog_broker_adv8_trino.sql b/tests/test_sql_refsols/defog_broker_adv8_trino.sql new file mode 100644 index 000000000..cf501bb0f --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_adv8_trino.sql @@ -0,0 +1,40 @@ +SELECT + NULLIF(COUNT(*), 0) AS n_transactions, + COALESCE(SUM(sbtransaction.sbtxamount), 0) AS total_amount +FROM main.sbtransaction AS sbtransaction +JOIN main.sbcustomer AS sbcustomer + ON LOWER(sbcustomer.sbcustcountry) = 'usa' + AND sbcustomer.sbcustid = sbtransaction.sbtxcustid +WHERE + sbtransaction.sbtxdatetime < DATE_TRUNC( + 'DAY', + DATE_ADD( + 'DAY', + ( + ( + ( + DAY_OF_WEEK(CURRENT_TIMESTAMP) % 7 + ) + 0 + ) % 7 + ) * -1, + CURRENT_TIMESTAMP + ) + ) + AND sbtransaction.sbtxdatetime >= DATE_ADD( + 'WEEK', + -1, + DATE_TRUNC( + 'DAY', + DATE_ADD( + 'DAY', + ( + ( + ( + DAY_OF_WEEK(CURRENT_TIMESTAMP) % 7 + ) + 0 + ) % 7 + ) * -1, + CURRENT_TIMESTAMP + ) + ) + ) diff --git a/tests/test_sql_refsols/defog_broker_adv9_trino.sql b/tests/test_sql_refsols/defog_broker_adv9_trino.sql new file mode 100644 index 000000000..6f8b46b9f --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_adv9_trino.sql @@ -0,0 +1,69 @@ +SELECT + DATE_TRUNC( + 'DAY', + DATE_ADD( + 'DAY', + ( + ( + ( + ( + DAY_OF_WEEK(CAST(sbtransaction.sbtxdatetime AS TIMESTAMP)) % 7 + ) + 1 + ) + -1 + ) % 7 + ) * -1, + CAST(sbtransaction.sbtxdatetime AS TIMESTAMP) + ) + ) AS week, + COUNT(*) AS num_transactions, + COALESCE( + SUM( + ( + ( + ( + DAY_OF_WEEK(sbtransaction.sbtxdatetime) % 7 + ) + 0 + ) % 7 + ) IN (5, 6) + ), + 0 + ) AS weekend_transactions +FROM main.sbtransaction AS sbtransaction +JOIN main.sbticker AS sbticker + ON sbticker.sbtickerid = sbtransaction.sbtxtickerid + AND sbticker.sbtickertype = 'stock' +WHERE + sbtransaction.sbtxdatetime < DATE_TRUNC( + 'DAY', + DATE_ADD( + 'DAY', + ( + ( + ( + DAY_OF_WEEK(CURRENT_TIMESTAMP) % 7 + ) + 0 + ) % 7 + ) * -1, + CURRENT_TIMESTAMP + ) + ) + AND sbtransaction.sbtxdatetime >= DATE_ADD( + 'WEEK', + -8, + DATE_TRUNC( + 'DAY', + DATE_ADD( + 'DAY', + ( + ( + ( + DAY_OF_WEEK(CURRENT_TIMESTAMP) % 7 + ) + 0 + ) % 7 + ) * -1, + CURRENT_TIMESTAMP + ) + ) + ) +GROUP BY + 1 diff --git a/tests/test_sql_refsols/defog_broker_basic10_trino.sql b/tests/test_sql_refsols/defog_broker_basic10_trino.sql new file mode 100644 index 000000000..1b3a16cf5 --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_basic10_trino.sql @@ -0,0 +1,15 @@ +WITH _u_0 AS ( + SELECT + sbdptickerid AS _u_1 + FROM main.sbdailyprice + GROUP BY + 1 +) +SELECT + sbticker.sbtickerid AS _id, + sbticker.sbtickersymbol AS symbol +FROM main.sbticker AS sbticker +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = sbticker.sbtickerid +WHERE + _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/defog_broker_basic1_trino.sql b/tests/test_sql_refsols/defog_broker_basic1_trino.sql new file mode 100644 index 000000000..effd5bf32 --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_basic1_trino.sql @@ -0,0 +1,20 @@ +WITH _s1 AS ( + SELECT + sbtxcustid, + COUNT(*) AS n_rows, + SUM(sbtxamount) AS sum_sbtxamount + FROM main.sbtransaction + WHERE + sbtxdatetime >= DATE_TRUNC('DAY', DATE_ADD('DAY', -30, CURRENT_TIMESTAMP)) + GROUP BY + 1 +) +SELECT + sbcustomer.sbcustcountry AS country, + COALESCE(SUM(_s1.n_rows), 0) AS num_transactions, + COALESCE(SUM(_s1.sum_sbtxamount), 0) AS total_amount +FROM main.sbcustomer AS sbcustomer +LEFT JOIN _s1 AS _s1 + ON _s1.sbtxcustid = sbcustomer.sbcustid +GROUP BY + 1 diff --git a/tests/test_sql_refsols/defog_broker_basic2_trino.sql b/tests/test_sql_refsols/defog_broker_basic2_trino.sql new file mode 100644 index 000000000..8a86160b1 --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_basic2_trino.sql @@ -0,0 +1,14 @@ +SELECT + sbtxtype AS transaction_type, + COUNT(DISTINCT sbtxcustid) AS num_customers, + AVG(sbtxshares) AS avg_shares +FROM main.sbtransaction +WHERE + sbtxdatetime <= CAST('2023-03-31' AS DATE) + AND sbtxdatetime >= CAST('2023-01-01' AS DATE) +GROUP BY + 1 +ORDER BY + 2 DESC, + 1 NULLS FIRST +LIMIT 3 diff --git a/tests/test_sql_refsols/defog_broker_basic3_trino.sql b/tests/test_sql_refsols/defog_broker_basic3_trino.sql new file mode 100644 index 000000000..33d84cee0 --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_basic3_trino.sql @@ -0,0 +1,19 @@ +WITH _s1 AS ( + SELECT + sbtxtickerid, + COUNT(*) AS n_rows, + SUM(sbtxamount) AS sum_sbtxamount + FROM main.sbtransaction + GROUP BY + 1 +) +SELECT + sbticker.sbtickersymbol AS symbol, + COALESCE(_s1.n_rows, 0) AS num_transactions, + COALESCE(_s1.sum_sbtxamount, 0) AS total_amount +FROM main.sbticker AS sbticker +LEFT JOIN _s1 AS _s1 + ON _s1.sbtxtickerid = sbticker.sbtickerid +ORDER BY + 3 DESC +LIMIT 10 diff --git a/tests/test_sql_refsols/defog_broker_basic4_trino.sql b/tests/test_sql_refsols/defog_broker_basic4_trino.sql new file mode 100644 index 000000000..fa6a1ff5c --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_basic4_trino.sql @@ -0,0 +1,34 @@ +WITH _s0 AS ( + SELECT + sbtxcustid, + sbtxtickerid, + COUNT(*) AS n_rows + FROM main.sbtransaction + GROUP BY + 1, + 2 +), _s2 AS ( + SELECT + sbticker.sbtickertype, + _s0.sbtxcustid, + SUM(_s0.n_rows) AS sum_n_rows + FROM _s0 AS _s0 + JOIN main.sbticker AS sbticker + ON _s0.sbtxtickerid = sbticker.sbtickerid + GROUP BY + 1, + 2 +) +SELECT + sbcustomer.sbcuststate AS state, + _s2.sbtickertype AS ticker_type, + SUM(_s2.sum_n_rows) AS num_transactions +FROM _s2 AS _s2 +JOIN main.sbcustomer AS sbcustomer + ON _s2.sbtxcustid = sbcustomer.sbcustid +GROUP BY + 1, + 2 +ORDER BY + 3 DESC +LIMIT 5 diff --git a/tests/test_sql_refsols/defog_broker_basic5_trino.sql b/tests/test_sql_refsols/defog_broker_basic5_trino.sql new file mode 100644 index 000000000..669d92e42 --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_basic5_trino.sql @@ -0,0 +1,16 @@ +WITH _u_0 AS ( + SELECT + sbtxcustid AS _u_1 + FROM main.sbtransaction + WHERE + sbtxtype = 'buy' + GROUP BY + 1 +) +SELECT + sbcustomer.sbcustid AS _id +FROM main.sbcustomer AS sbcustomer +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = sbcustomer.sbcustid +WHERE + NOT _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/defog_broker_basic6_trino.sql b/tests/test_sql_refsols/defog_broker_basic6_trino.sql new file mode 100644 index 000000000..170c215bd --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_basic6_trino.sql @@ -0,0 +1,16 @@ +WITH _u_0 AS ( + SELECT + sbdptickerid AS _u_1 + FROM main.sbdailyprice + WHERE + sbdpdate >= CAST('2023-04-01' AS DATE) + GROUP BY + 1 +) +SELECT + sbticker.sbtickerid AS _id +FROM main.sbticker AS sbticker +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = sbticker.sbtickerid +WHERE + NOT _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/defog_broker_basic7_trino.sql b/tests/test_sql_refsols/defog_broker_basic7_trino.sql new file mode 100644 index 000000000..d8f4a45fb --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_basic7_trino.sql @@ -0,0 +1,9 @@ +SELECT + sbtxstatus AS status, + COUNT(*) AS num_transactions +FROM main.sbtransaction +GROUP BY + 1 +ORDER BY + 2 DESC +LIMIT 3 diff --git a/tests/test_sql_refsols/defog_broker_basic8_trino.sql b/tests/test_sql_refsols/defog_broker_basic8_trino.sql new file mode 100644 index 000000000..21c4e9f66 --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_basic8_trino.sql @@ -0,0 +1,9 @@ +SELECT + sbcustcountry AS country, + COUNT(*) AS num_customers +FROM main.sbcustomer +GROUP BY + 1 +ORDER BY + 2 DESC +LIMIT 5 diff --git a/tests/test_sql_refsols/defog_broker_basic9_trino.sql b/tests/test_sql_refsols/defog_broker_basic9_trino.sql new file mode 100644 index 000000000..f96baada7 --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_basic9_trino.sql @@ -0,0 +1,15 @@ +WITH _u_0 AS ( + SELECT + sbtxcustid AS _u_1 + FROM main.sbtransaction + GROUP BY + 1 +) +SELECT + sbcustomer.sbcustid AS _id, + sbcustomer.sbcustname AS name +FROM main.sbcustomer AS sbcustomer +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = sbcustomer.sbcustid +WHERE + _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/defog_broker_gen1_trino.sql b/tests/test_sql_refsols/defog_broker_gen1_trino.sql new file mode 100644 index 000000000..e662c392a --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_gen1_trino.sql @@ -0,0 +1,8 @@ +SELECT + MIN(sbdailyprice.sbdpclose) AS lowest_price +FROM main.sbdailyprice AS sbdailyprice +JOIN main.sbticker AS sbticker + ON sbdailyprice.sbdptickerid = sbticker.sbtickerid + AND sbticker.sbtickersymbol = 'VTI' +WHERE + DATE_DIFF('DAY', CAST(sbdailyprice.sbdpdate AS TIMESTAMP), CURRENT_TIMESTAMP) <= 7 diff --git a/tests/test_sql_refsols/defog_broker_gen2_trino.sql b/tests/test_sql_refsols/defog_broker_gen2_trino.sql new file mode 100644 index 000000000..f856be023 --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_gen2_trino.sql @@ -0,0 +1,6 @@ +SELECT + COUNT(*) AS transaction_count +FROM main.sbtransaction AS sbtransaction +JOIN main.sbcustomer AS sbcustomer + ON sbcustomer.sbcustid = sbtransaction.sbtxcustid + AND sbcustomer.sbcustjoindate >= DATE_TRUNC('DAY', DATE_ADD('DAY', -70, CURRENT_TIMESTAMP)) diff --git a/tests/test_sql_refsols/defog_broker_gen3_trino.sql b/tests/test_sql_refsols/defog_broker_gen3_trino.sql new file mode 100644 index 000000000..5e374bff3 --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_gen3_trino.sql @@ -0,0 +1,18 @@ +WITH _s1 AS ( + SELECT + sbtxcustid, + MIN(sbtxdatetime) AS min_sbtxdatetime + FROM main.sbtransaction + GROUP BY + 1 +) +SELECT + sbcustomer.sbcustid AS cust_id, + CAST(DATE_DIFF( + 'SECOND', + CAST(sbcustomer.sbcustjoindate AS TIMESTAMP), + CAST(_s1.min_sbtxdatetime AS TIMESTAMP) + ) AS DOUBLE) / 86400.0 AS DaysFromJoinToFirstTransaction +FROM main.sbcustomer AS sbcustomer +JOIN _s1 AS _s1 + ON _s1.sbtxcustid = sbcustomer.sbcustid diff --git a/tests/test_sql_refsols/defog_broker_gen4_trino.sql b/tests/test_sql_refsols/defog_broker_gen4_trino.sql new file mode 100644 index 000000000..29a88a75d --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_gen4_trino.sql @@ -0,0 +1,22 @@ +WITH _s1 AS ( + SELECT + sbtxcustid, + COUNT(*) AS n_rows + FROM main.sbtransaction + WHERE + CAST(sbtxdatetime AS TIMESTAMP) < CAST('2023-04-02' AS DATE) + AND CAST(sbtxdatetime AS TIMESTAMP) >= CAST('2023-04-01' AS DATE) + AND sbtxtype = 'sell' + GROUP BY + 1 +) +SELECT + sbcustomer.sbcustid AS _id, + sbcustomer.sbcustname AS name, + COALESCE(_s1.n_rows, 0) AS num_tx +FROM main.sbcustomer AS sbcustomer +LEFT JOIN _s1 AS _s1 + ON _s1.sbtxcustid = sbcustomer.sbcustid +ORDER BY + 3 DESC +LIMIT 1 diff --git a/tests/test_sql_refsols/defog_broker_gen5_trino.sql b/tests/test_sql_refsols/defog_broker_gen5_trino.sql new file mode 100644 index 000000000..b51e767ee --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_gen5_trino.sql @@ -0,0 +1,12 @@ +SELECT + DATE_TRUNC('MONTH', CAST(sbtxdatetime AS TIMESTAMP)) AS month, + AVG(sbtxprice) AS avg_price +FROM main.sbtransaction +WHERE + MONTH(CAST(sbtxdatetime AS TIMESTAMP)) IN (1, 2, 3) + AND YEAR(CAST(sbtxdatetime AS TIMESTAMP)) = 2023 + AND sbtxstatus = 'success' +GROUP BY + 1 +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/defog_dealership_adv10_trino.sql b/tests/test_sql_refsols/defog_dealership_adv10_trino.sql new file mode 100644 index 000000000..1b3532f06 --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_adv10_trino.sql @@ -0,0 +1,18 @@ +WITH _s1 AS ( + SELECT + sale_id, + MAX(payment_date) AS max_payment_date + FROM main.payments_received + GROUP BY + 1 +) +SELECT + ROUND( + AVG( + DATE_DIFF('DAY', CAST(sales.sale_date AS TIMESTAMP), CAST(_s1.max_payment_date AS TIMESTAMP)) + ), + 2 + ) AS avg_days_to_payment +FROM main.sales AS sales +LEFT JOIN _s1 AS _s1 + ON _s1.sale_id = sales._id diff --git a/tests/test_sql_refsols/defog_dealership_adv11_trino.sql b/tests/test_sql_refsols/defog_dealership_adv11_trino.sql new file mode 100644 index 000000000..0656dd117 --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_adv11_trino.sql @@ -0,0 +1,19 @@ +WITH _s0 AS ( + SELECT + car_id, + SUM(sale_price) AS sum_sale_price + FROM main.sales + WHERE + YEAR(CAST(sale_date AS TIMESTAMP)) = 2023 + GROUP BY + 1 +) +SELECT + ( + CAST(( + COALESCE(SUM(_s0.sum_sale_price), 0) - COALESCE(SUM(cars.cost), 0) + ) AS DOUBLE) / NULLIF(SUM(cars.cost), 0) + ) * 100 AS GPM +FROM _s0 AS _s0 +JOIN main.cars AS cars + ON _s0.car_id = cars._id diff --git a/tests/test_sql_refsols/defog_dealership_adv12_trino.sql b/tests/test_sql_refsols/defog_dealership_adv12_trino.sql new file mode 100644 index 000000000..ab2c3cee5 --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_adv12_trino.sql @@ -0,0 +1,14 @@ +SELECT + cars.make, + cars.model, + sales.sale_price +FROM main.sales AS sales +JOIN main.cars AS cars + ON cars._id = sales.car_id +JOIN main.inventory_snapshots AS inventory_snapshots + ON NOT inventory_snapshots.is_in_inventory + AND cars._id = inventory_snapshots.car_id + AND inventory_snapshots.snapshot_date = sales.sale_date +ORDER BY + 3 DESC +LIMIT 1 diff --git a/tests/test_sql_refsols/defog_dealership_adv13_trino.sql b/tests/test_sql_refsols/defog_dealership_adv13_trino.sql new file mode 100644 index 000000000..a7ad60e2d --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_adv13_trino.sql @@ -0,0 +1,36 @@ +WITH _s1 AS ( + SELECT + DATE_TRUNC('MONTH', CAST(payment_date AS TIMESTAMP)) AS start_month, + SUM(payment_amount) AS sum_payment_amount + FROM main.payments_received + GROUP BY + 1 +), _t0 AS ( + SELECT + DATE_ADD('N', CAST(_s1.start_month AS BIGINT), 'month') AS dt, + SUM(IF(months.n > 0, 0, COALESCE(_s1.sum_payment_amount, 0))) AS sum_payment + FROM (VALUES + (0), + (1), + (2), + (3), + (4), + (5), + (6), + (7), + (8), + (9), + (10), + (11)) AS months(n) + JOIN _s1 AS _s1 + ON DATE_ADD('N', CAST(_s1.start_month AS BIGINT), 'month') <= DATE_ADD('HOUR', 1, DATE_TRUNC('MONTH', CURRENT_TIMESTAMP)) + GROUP BY + 1 +) +SELECT + dt, + sum_payment AS total_payments, + sum_payment - LAG(sum_payment, 1) OVER (ORDER BY dt) AS MoM_change +FROM _t0 +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/defog_dealership_adv14_trino.sql b/tests/test_sql_refsols/defog_dealership_adv14_trino.sql new file mode 100644 index 000000000..ec41ac12a --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_adv14_trino.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS TSC +FROM main.sales +WHERE + DATE_DIFF('DAY', CAST(sale_date AS TIMESTAMP), CURRENT_TIMESTAMP) <= 7 diff --git a/tests/test_sql_refsols/defog_dealership_adv15_trino.sql b/tests/test_sql_refsols/defog_dealership_adv15_trino.sql new file mode 100644 index 000000000..0677b90ce --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_adv15_trino.sql @@ -0,0 +1,18 @@ +WITH _s1 AS ( + SELECT + salesperson_id, + AVG(sale_price) AS avg_sale_price + FROM main.sales + GROUP BY + 1 +) +SELECT + salespersons.first_name, + salespersons.last_name, + _s1.avg_sale_price AS ASP +FROM main.salespersons AS salespersons +LEFT JOIN _s1 AS _s1 + ON _s1.salesperson_id = salespersons._id +ORDER BY + 3 DESC +LIMIT 3 diff --git a/tests/test_sql_refsols/defog_dealership_adv16_trino.sql b/tests/test_sql_refsols/defog_dealership_adv16_trino.sql new file mode 100644 index 000000000..701ecc22c --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_adv16_trino.sql @@ -0,0 +1,19 @@ +WITH _s1 AS ( + SELECT + salesperson_id, + SUM(sale_price) AS sum_sale_price + FROM main.sales + GROUP BY + 1 +) +SELECT + salespersons._id, + salespersons.first_name, + salespersons.last_name, + COALESCE(_s1.sum_sale_price, 0) AS total +FROM main.salespersons AS salespersons +LEFT JOIN _s1 AS _s1 + ON _s1.salesperson_id = salespersons._id +ORDER BY + 4 DESC +LIMIT 5 diff --git a/tests/test_sql_refsols/defog_dealership_adv1_trino.sql b/tests/test_sql_refsols/defog_dealership_adv1_trino.sql new file mode 100644 index 000000000..f9844dfd6 --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_adv1_trino.sql @@ -0,0 +1,38 @@ +SELECT + DATE_TRUNC( + 'DAY', + DATE_ADD( + 'DAY', + ( + ( + ( + ( + DAY_OF_WEEK(CAST(payments_received.payment_date AS TIMESTAMP)) % 7 + ) + 1 + ) + -1 + ) % 7 + ) * -1, + CAST(payments_received.payment_date AS TIMESTAMP) + ) + ) AS payment_week, + COUNT(*) AS total_payments, + COALESCE( + SUM( + ( + ( + ( + DAY_OF_WEEK(payments_received.payment_date) % 7 + ) + 0 + ) % 7 + ) IN (5, 6) + ), + 0 + ) AS weekend_payments +FROM main.payments_received AS payments_received +JOIN main.sales AS sales + ON payments_received.sale_id = sales._id AND sales.sale_price > 30000 +WHERE + DATE_DIFF('WEEK', CAST(payments_received.payment_date AS TIMESTAMP), CURRENT_TIMESTAMP) <= 8 + AND DATE_DIFF('WEEK', CAST(payments_received.payment_date AS TIMESTAMP), CURRENT_TIMESTAMP) >= 1 +GROUP BY + 1 diff --git a/tests/test_sql_refsols/defog_dealership_adv2_trino.sql b/tests/test_sql_refsols/defog_dealership_adv2_trino.sql new file mode 100644 index 000000000..e75179a8e --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_adv2_trino.sql @@ -0,0 +1,21 @@ +WITH _s1 AS ( + SELECT + salesperson_id, + COUNT(*) AS n_rows + FROM main.sales + WHERE + DATE_DIFF('DAY', CAST(sale_date AS TIMESTAMP), CURRENT_TIMESTAMP) <= 30 + GROUP BY + 1 +) +SELECT + salespersons._id, + salespersons.first_name, + salespersons.last_name, + _s1.n_rows AS num_sales +FROM main.salespersons AS salespersons +JOIN _s1 AS _s1 + ON _s1.salesperson_id = salespersons._id +ORDER BY + 4 DESC, + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/defog_dealership_adv3_trino.sql b/tests/test_sql_refsols/defog_dealership_adv3_trino.sql new file mode 100644 index 000000000..79da086fa --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_adv3_trino.sql @@ -0,0 +1,21 @@ +WITH _t1 AS ( + SELECT + ARBITRARY(cars.make) AS anything_make, + ARBITRARY(cars.model) AS anything_model, + COUNT(sales.car_id) AS count_car_id + FROM main.cars AS cars + LEFT JOIN main.sales AS sales + ON cars._id = sales.car_id + WHERE + LOWER(cars.vin_number) LIKE '%m5%' + GROUP BY + cars._id +) +SELECT + anything_make AS make, + anything_model AS model, + COALESCE(SUM(count_car_id), 0) AS num_sales +FROM _t1 +GROUP BY + 1, + 2 diff --git a/tests/test_sql_refsols/defog_dealership_adv4_trino.sql b/tests/test_sql_refsols/defog_dealership_adv4_trino.sql new file mode 100644 index 000000000..49b4de645 --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_adv4_trino.sql @@ -0,0 +1,9 @@ +SELECT + COUNT(*) AS num_sales, + CASE WHEN COUNT(*) <> 0 THEN COALESCE(SUM(sales.sale_price), 0) ELSE NULL END AS total_revenue +FROM main.cars AS cars +JOIN main.sales AS sales + ON cars._id = sales.car_id + AND sales.sale_date >= DATE_ADD('DAY', -30, CURRENT_TIMESTAMP) +WHERE + LOWER(cars.make) LIKE '%toyota%' diff --git a/tests/test_sql_refsols/defog_dealership_adv5_trino.sql b/tests/test_sql_refsols/defog_dealership_adv5_trino.sql new file mode 100644 index 000000000..ad83aebfa --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_adv5_trino.sql @@ -0,0 +1,20 @@ +WITH _s1 AS ( + SELECT + salesperson_id, + COUNT(*) AS n_rows, + SUM(sale_price) AS sum_sale_price + FROM main.sales + GROUP BY + 1 +) +SELECT + salespersons.first_name, + salespersons.last_name, + COALESCE(_s1.sum_sale_price, 0) AS total_sales, + _s1.n_rows AS num_sales, + RANK() OVER (ORDER BY COALESCE(_s1.sum_sale_price, 0) DESC NULLS FIRST) AS sales_rank +FROM main.salespersons AS salespersons +JOIN _s1 AS _s1 + ON _s1.salesperson_id = salespersons._id +ORDER BY + 3 DESC diff --git a/tests/test_sql_refsols/defog_dealership_adv6_trino.sql b/tests/test_sql_refsols/defog_dealership_adv6_trino.sql new file mode 100644 index 000000000..1cb84ff0c --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_adv6_trino.sql @@ -0,0 +1,20 @@ +WITH _t AS ( + SELECT + car_id, + is_in_inventory, + ROW_NUMBER() OVER (PARTITION BY car_id ORDER BY snapshot_date DESC NULLS FIRST) AS _w + FROM main.inventory_snapshots +) +SELECT + ARBITRARY(cars.make) AS make, + ARBITRARY(cars.model) AS model, + MAX(sales.sale_price) AS highest_sale_price +FROM main.cars AS cars +JOIN _t AS _t + ON NOT _t.is_in_inventory AND _t._w = 1 AND _t.car_id = cars._id +LEFT JOIN main.sales AS sales + ON cars._id = sales.car_id +GROUP BY + cars._id +ORDER BY + 3 DESC diff --git a/tests/test_sql_refsols/defog_dealership_adv7_trino.sql b/tests/test_sql_refsols/defog_dealership_adv7_trino.sql new file mode 100644 index 000000000..259104aa4 --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_adv7_trino.sql @@ -0,0 +1,20 @@ +WITH _s1 AS ( + SELECT + car_id, + AVG(sale_price) AS avg_sale_price + FROM main.sales + GROUP BY + 1 +) +SELECT + cars.make, + cars.model, + cars.year, + cars.color, + cars.vin_number, + _s1.avg_sale_price +FROM main.cars AS cars +LEFT JOIN _s1 AS _s1 + ON _s1.car_id = cars._id +WHERE + LOWER(cars.make) LIKE '%fords%' OR LOWER(cars.model) LIKE '%mustang%' diff --git a/tests/test_sql_refsols/defog_dealership_adv8_trino.sql b/tests/test_sql_refsols/defog_dealership_adv8_trino.sql new file mode 100644 index 000000000..da058f233 --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_adv8_trino.sql @@ -0,0 +1,28 @@ +WITH _s3 AS ( + SELECT + DATE_TRUNC('MONTH', CAST(sales.sale_date AS TIMESTAMP)) AS sale_month, + COUNT(*) AS n_rows, + SUM(sales.sale_price) AS sum_sale_price + FROM main.sales AS sales + JOIN main.salespersons AS salespersons + ON YEAR(CAST(salespersons.hire_date AS TIMESTAMP)) <= 2023 + AND YEAR(CAST(salespersons.hire_date AS TIMESTAMP)) >= 2022 + AND sales.salesperson_id = salespersons._id + GROUP BY + 1 +) +SELECT + DATE_FORMAT(DATE_TRUNC('MONTH', months_range.dt), '%Y-%m-%d') AS month, + COALESCE(_s3.n_rows, 0) AS PMSPS, + COALESCE(_s3.sum_sale_price, 0) AS PMSR +FROM (VALUES + (CAST('2025-09-01 00:00:00' AS TIMESTAMP)), + (CAST('2025-10-01 00:00:00' AS TIMESTAMP)), + (CAST('2025-11-01 00:00:00' AS TIMESTAMP)), + (CAST('2025-12-01 00:00:00' AS TIMESTAMP)), + (CAST('2026-01-01 00:00:00' AS TIMESTAMP)), + (CAST('2026-02-01 00:00:00' AS TIMESTAMP))) AS months_range(dt) +LEFT JOIN _s3 AS _s3 + ON _s3.sale_month = DATE_TRUNC('MONTH', months_range.dt) +ORDER BY + DATE_TRUNC('MONTH', months_range.dt) NULLS FIRST diff --git a/tests/test_sql_refsols/defog_dealership_adv9_trino.sql b/tests/test_sql_refsols/defog_dealership_adv9_trino.sql new file mode 100644 index 000000000..a581d84e5 --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_adv9_trino.sql @@ -0,0 +1,5 @@ +SELECT + AVG(sale_price) AS ASP +FROM main.sales +WHERE + sale_date <= '2023-03-31' AND sale_date >= '2023-01-01' diff --git a/tests/test_sql_refsols/defog_dealership_basic10_trino.sql b/tests/test_sql_refsols/defog_dealership_basic10_trino.sql new file mode 100644 index 000000000..120927e70 --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_basic10_trino.sql @@ -0,0 +1,22 @@ +WITH _s1 AS ( + SELECT + salesperson_id, + COUNT(*) AS n_rows, + SUM(sale_price) AS sum_sale_price + FROM main.sales + WHERE + sale_date >= DATE_ADD('MONTH', -3, CURRENT_TIMESTAMP) + GROUP BY + 1 +) +SELECT + salespersons.first_name, + salespersons.last_name, + COALESCE(_s1.n_rows, 0) AS total_sales, + COALESCE(_s1.sum_sale_price, 0) AS total_revenue +FROM main.salespersons AS salespersons +LEFT JOIN _s1 AS _s1 + ON _s1.salesperson_id = salespersons._id +ORDER BY + 4 DESC +LIMIT 3 diff --git a/tests/test_sql_refsols/defog_dealership_basic1_trino.sql b/tests/test_sql_refsols/defog_dealership_basic1_trino.sql new file mode 100644 index 000000000..bbd756397 --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_basic1_trino.sql @@ -0,0 +1,17 @@ +WITH _u_0 AS ( + SELECT + car_id AS _u_1 + FROM main.sales + GROUP BY + 1 +) +SELECT + cars._id, + cars.make, + cars.model, + cars.year +FROM main.cars AS cars +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = cars._id +WHERE + _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/defog_dealership_basic2_trino.sql b/tests/test_sql_refsols/defog_dealership_basic2_trino.sql new file mode 100644 index 000000000..16cb73183 --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_basic2_trino.sql @@ -0,0 +1,14 @@ +WITH _u_0 AS ( + SELECT + customer_id AS _u_1 + FROM main.sales + GROUP BY + 1 +) +SELECT + customers._id +FROM main.customers AS customers +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = customers._id +WHERE + NOT _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/defog_dealership_basic3_trino.sql b/tests/test_sql_refsols/defog_dealership_basic3_trino.sql new file mode 100644 index 000000000..4310aba26 --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_basic3_trino.sql @@ -0,0 +1,17 @@ +WITH _u_0 AS ( + SELECT + sales.salesperson_id AS _u_1 + FROM main.sales AS sales + JOIN main.payments_received AS payments_received + ON payments_received.payment_method = 'cash' + AND payments_received.sale_id = sales._id + GROUP BY + 1 +) +SELECT + salespersons._id AS salesperson_id +FROM main.salespersons AS salespersons +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = salespersons._id +WHERE + NOT _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/defog_dealership_basic4_trino.sql b/tests/test_sql_refsols/defog_dealership_basic4_trino.sql new file mode 100644 index 000000000..8d94ed4be --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_basic4_trino.sql @@ -0,0 +1,16 @@ +WITH _u_0 AS ( + SELECT + salesperson_id AS _u_1 + FROM main.sales + GROUP BY + 1 +) +SELECT + salespersons._id, + salespersons.first_name, + salespersons.last_name +FROM main.salespersons AS salespersons +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = salespersons._id +WHERE + _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/defog_dealership_basic5_trino.sql b/tests/test_sql_refsols/defog_dealership_basic5_trino.sql new file mode 100644 index 000000000..559f3c383 --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_basic5_trino.sql @@ -0,0 +1,22 @@ +WITH _s1 AS ( + SELECT + salesperson_id, + COUNT(*) AS n_rows, + SUM(sale_price) AS sum_sale_price + FROM main.sales + WHERE + DATE_DIFF('DAY', CAST(sale_date AS TIMESTAMP), CURRENT_TIMESTAMP) <= 30 + GROUP BY + 1 +) +SELECT + salespersons.first_name, + salespersons.last_name, + _s1.n_rows AS total_sales, + COALESCE(_s1.sum_sale_price, 0) AS total_revenue +FROM main.salespersons AS salespersons +JOIN _s1 AS _s1 + ON _s1.salesperson_id = salespersons._id +ORDER BY + 3 DESC +LIMIT 5 diff --git a/tests/test_sql_refsols/defog_dealership_basic6_trino.sql b/tests/test_sql_refsols/defog_dealership_basic6_trino.sql new file mode 100644 index 000000000..e56e33f27 --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_basic6_trino.sql @@ -0,0 +1,12 @@ +SELECT + customers.state, + COUNT(DISTINCT sales.customer_id) AS unique_customers, + COALESCE(SUM(sales.sale_price), 0) AS total_revenue +FROM main.sales AS sales +JOIN main.customers AS customers + ON customers._id = sales.customer_id +GROUP BY + 1 +ORDER BY + 3 DESC +LIMIT 5 diff --git a/tests/test_sql_refsols/defog_dealership_basic7_trino.sql b/tests/test_sql_refsols/defog_dealership_basic7_trino.sql new file mode 100644 index 000000000..d5c12bc09 --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_basic7_trino.sql @@ -0,0 +1,10 @@ +SELECT + payment_method, + COUNT(*) AS total_payments, + COALESCE(SUM(payment_amount), 0) AS total_amount +FROM main.payments_received +GROUP BY + 1 +ORDER BY + 3 DESC +LIMIT 3 diff --git a/tests/test_sql_refsols/defog_dealership_basic8_trino.sql b/tests/test_sql_refsols/defog_dealership_basic8_trino.sql new file mode 100644 index 000000000..77b0af5de --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_basic8_trino.sql @@ -0,0 +1,20 @@ +WITH _s1 AS ( + SELECT + car_id, + COUNT(*) AS n_rows, + SUM(sale_price) AS sum_sale_price + FROM main.sales + GROUP BY + 1 +) +SELECT + cars.make, + cars.model, + COALESCE(_s1.n_rows, 0) AS total_sales, + COALESCE(_s1.sum_sale_price, 0) AS total_revenue +FROM main.cars AS cars +LEFT JOIN _s1 AS _s1 + ON _s1.car_id = cars._id +ORDER BY + 4 DESC +LIMIT 5 diff --git a/tests/test_sql_refsols/defog_dealership_basic9_trino.sql b/tests/test_sql_refsols/defog_dealership_basic9_trino.sql new file mode 100644 index 000000000..3edf9a03d --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_basic9_trino.sql @@ -0,0 +1,9 @@ +SELECT + state, + COUNT(*) AS total_signups +FROM main.customers +GROUP BY + 1 +ORDER BY + 2 DESC +LIMIT 2 diff --git a/tests/test_sql_refsols/defog_dealership_gen1_trino.sql b/tests/test_sql_refsols/defog_dealership_gen1_trino.sql new file mode 100644 index 000000000..53649187f --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_gen1_trino.sql @@ -0,0 +1,11 @@ +SELECT + first_name, + last_name, + phone, + DATE_DIFF('DAY', CAST(hire_date AS TIMESTAMP), CAST(termination_date AS TIMESTAMP)) AS days_employed +FROM main.salespersons +WHERE + NOT termination_date IS NULL +ORDER BY + 4 NULLS FIRST +LIMIT 1 diff --git a/tests/test_sql_refsols/defog_dealership_gen2_trino.sql b/tests/test_sql_refsols/defog_dealership_gen2_trino.sql new file mode 100644 index 000000000..7bae9f619 --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_gen2_trino.sql @@ -0,0 +1,12 @@ +SELECT + COUNT(*) AS weekend_payments +FROM main.payments_made +WHERE + ( + ( + ( + DAY_OF_WEEK(payment_date) % 7 + ) + 0 + ) % 7 + ) IN (5, 6) + AND vendor_name = 'Utility Company' diff --git a/tests/test_sql_refsols/defog_dealership_gen3_trino.sql b/tests/test_sql_refsols/defog_dealership_gen3_trino.sql new file mode 100644 index 000000000..e6ac56cd1 --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_gen3_trino.sql @@ -0,0 +1,13 @@ +SELECT + payment_date, + payment_method, + COALESCE(SUM(payment_amount), 0) AS total_amount +FROM main.payments_received +WHERE + DATE_DIFF('WEEK', CAST(payment_date AS TIMESTAMP), CURRENT_TIMESTAMP) = 1 +GROUP BY + 1, + 2 +ORDER BY + 1 DESC, + 2 NULLS FIRST diff --git a/tests/test_sql_refsols/defog_dealership_gen4_trino.sql b/tests/test_sql_refsols/defog_dealership_gen4_trino.sql new file mode 100644 index 000000000..f9b3c5bb4 --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_gen4_trino.sql @@ -0,0 +1,33 @@ +WITH _s0 AS ( + SELECT + DATE_TRUNC('QUARTER', CAST(sale_date AS TIMESTAMP)) AS quarter, + customer_id, + SUM(sale_price) AS sum_sale_price + FROM main.sales + WHERE + YEAR(CAST(sale_date AS TIMESTAMP)) = 2023 + GROUP BY + 1, + 2 +), _t1 AS ( + SELECT + _s0.quarter, + customers.state, + SUM(_s0.sum_sale_price) AS sum_sum_sale_price + FROM _s0 AS _s0 + JOIN main.customers AS customers + ON _s0.customer_id = customers._id + GROUP BY + 1, + 2 +) +SELECT + quarter, + state AS customer_state, + sum_sum_sale_price AS total_sales +FROM _t1 +WHERE + NOT sum_sum_sale_price IS NULL AND sum_sum_sale_price > 0 +ORDER BY + 1 NULLS FIRST, + 2 NULLS FIRST diff --git a/tests/test_sql_refsols/defog_dealership_gen5_trino.sql b/tests/test_sql_refsols/defog_dealership_gen5_trino.sql new file mode 100644 index 000000000..6f9acf701 --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_gen5_trino.sql @@ -0,0 +1,20 @@ +WITH _t AS ( + SELECT + car_id, + is_in_inventory, + RANK() OVER (ORDER BY snapshot_date DESC NULLS FIRST) AS _w + FROM main.inventory_snapshots + WHERE + MONTH(CAST(snapshot_date AS TIMESTAMP)) = 3 + AND YEAR(CAST(snapshot_date AS TIMESTAMP)) = 2023 +) +SELECT + cars._id, + cars.make, + cars.model, + cars.year +FROM _t AS _t +JOIN main.cars AS cars + ON _t.car_id = cars._id +WHERE + _t._w = 1 AND _t.is_in_inventory diff --git a/tests/test_sql_refsols/defog_dermtreatment_adv10_trino.sql b/tests/test_sql_refsols/defog_dermtreatment_adv10_trino.sql new file mode 100644 index 000000000..0333b7eda --- /dev/null +++ b/tests/test_sql_refsols/defog_dermtreatment_adv10_trino.sql @@ -0,0 +1,21 @@ +WITH _s3 AS ( + SELECT + treatments.drug_id, + COUNT(*) AS n_rows + FROM main.treatments AS treatments + JOIN main.adverse_events AS adverse_events + ON DATE_TRUNC('MONTH', CAST(adverse_events.reported_dt AS TIMESTAMP)) = DATE_TRUNC('MONTH', CAST(treatments.start_dt AS TIMESTAMP)) + AND adverse_events.treatment_id = treatments.treatment_id + GROUP BY + 1 +) +SELECT + drugs.drug_id, + drugs.drug_name, + _s3.n_rows AS num_adverse_events +FROM main.drugs AS drugs +JOIN _s3 AS _s3 + ON _s3.drug_id = drugs.drug_id +ORDER BY + 3 DESC +LIMIT 1 diff --git a/tests/test_sql_refsols/defog_dermtreatment_adv11_trino.sql b/tests/test_sql_refsols/defog_dermtreatment_adv11_trino.sql new file mode 100644 index 000000000..d604cd994 --- /dev/null +++ b/tests/test_sql_refsols/defog_dermtreatment_adv11_trino.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS num_patients_with_gmail_or_yahoo +FROM main.patients +WHERE + email LIKE '%@gmail.com' OR email LIKE '%@yahoo.com' diff --git a/tests/test_sql_refsols/defog_dermtreatment_adv12_trino.sql b/tests/test_sql_refsols/defog_dermtreatment_adv12_trino.sql new file mode 100644 index 000000000..9ee9d42c9 --- /dev/null +++ b/tests/test_sql_refsols/defog_dermtreatment_adv12_trino.sql @@ -0,0 +1,7 @@ +SELECT + first_name, + last_name, + specialty +FROM main.doctors +WHERE + LOWER(last_name) LIKE '%son%' OR STARTS_WITH(LOWER(first_name), 'j') diff --git a/tests/test_sql_refsols/defog_dermtreatment_adv13_trino.sql b/tests/test_sql_refsols/defog_dermtreatment_adv13_trino.sql new file mode 100644 index 000000000..70b3ed58b --- /dev/null +++ b/tests/test_sql_refsols/defog_dermtreatment_adv13_trino.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS PIC_female +FROM main.patients +WHERE + gender = 'Female' AND ins_type = 'private' diff --git a/tests/test_sql_refsols/defog_dermtreatment_adv14_trino.sql b/tests/test_sql_refsols/defog_dermtreatment_adv14_trino.sql new file mode 100644 index 000000000..3027604f0 --- /dev/null +++ b/tests/test_sql_refsols/defog_dermtreatment_adv14_trino.sql @@ -0,0 +1,5 @@ +SELECT + AVG(weight_kg) AS CAW_male +FROM main.patients +WHERE + gender = 'Male' diff --git a/tests/test_sql_refsols/defog_dermtreatment_adv15_trino.sql b/tests/test_sql_refsols/defog_dermtreatment_adv15_trino.sql new file mode 100644 index 000000000..6ed8c633e --- /dev/null +++ b/tests/test_sql_refsols/defog_dermtreatment_adv15_trino.sql @@ -0,0 +1,30 @@ +WITH _u_0 AS ( + SELECT + drug_id AS _u_1 + FROM main.treatments + WHERE + NOT end_dt IS NULL + GROUP BY + 1 +), _s3 AS ( + SELECT + drug_id, + AVG( + CAST(tot_drug_amt AS DOUBLE) / NULLIF(DATE_DIFF('DAY', CAST(start_dt AS TIMESTAMP), CAST(end_dt AS TIMESTAMP)), 0) + ) AS avg_ddd + FROM main.treatments + WHERE + NOT end_dt IS NULL + GROUP BY + 1 +) +SELECT + drugs.drug_name, + _s3.avg_ddd +FROM main.drugs AS drugs +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = drugs.drug_id +LEFT JOIN _s3 AS _s3 + ON _s3.drug_id = drugs.drug_id +WHERE + NOT _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/defog_dermtreatment_adv16_trino.sql b/tests/test_sql_refsols/defog_dermtreatment_adv16_trino.sql new file mode 100644 index 000000000..b4447e64c --- /dev/null +++ b/tests/test_sql_refsols/defog_dermtreatment_adv16_trino.sql @@ -0,0 +1,9 @@ +SELECT + ( + ( + AVG(day100_pasi_score) - AVG(day7_pasi_score) + ) / NULLIF(AVG(day7_pasi_score), 0) + ) * 100 AS d7d100pir +FROM main.outcomes +WHERE + NOT day100_pasi_score IS NULL AND NOT day7_pasi_score IS NULL diff --git a/tests/test_sql_refsols/defog_dermtreatment_adv1_trino.sql b/tests/test_sql_refsols/defog_dermtreatment_adv1_trino.sql new file mode 100644 index 000000000..218921a0a --- /dev/null +++ b/tests/test_sql_refsols/defog_dermtreatment_adv1_trino.sql @@ -0,0 +1,7 @@ +SELECT DISTINCT + doctors.loc_state AS state +FROM main.treatments AS treatments +JOIN main.drugs AS drugs + ON drugs.drug_id = treatments.drug_id AND drugs.drug_type = 'biologic' +JOIN main.doctors AS doctors + ON doctors.doc_id = treatments.doc_id diff --git a/tests/test_sql_refsols/defog_dermtreatment_adv2_trino.sql b/tests/test_sql_refsols/defog_dermtreatment_adv2_trino.sql new file mode 100644 index 000000000..eabafe461 --- /dev/null +++ b/tests/test_sql_refsols/defog_dermtreatment_adv2_trino.sql @@ -0,0 +1,7 @@ +SELECT + AVG(patients.weight_kg) AS avg_weight +FROM main.treatments AS treatments +JOIN main.drugs AS drugs + ON LOWER(drugs.drug_name) = 'drugalin' AND drugs.drug_id = treatments.drug_id +JOIN main.patients AS patients + ON patients.patient_id = treatments.patient_id diff --git a/tests/test_sql_refsols/defog_dermtreatment_adv3_trino.sql b/tests/test_sql_refsols/defog_dermtreatment_adv3_trino.sql new file mode 100644 index 000000000..b0ae2d58c --- /dev/null +++ b/tests/test_sql_refsols/defog_dermtreatment_adv3_trino.sql @@ -0,0 +1,10 @@ +SELECT + adverse_events.description, + adverse_events.treatment_id, + drugs.drug_id, + drugs.drug_name +FROM main.adverse_events AS adverse_events +JOIN main.treatments AS treatments + ON adverse_events.treatment_id = treatments.treatment_id +JOIN main.drugs AS drugs + ON drugs.drug_id = treatments.drug_id AND drugs.drug_type = 'topical' diff --git a/tests/test_sql_refsols/defog_dermtreatment_adv4_trino.sql b/tests/test_sql_refsols/defog_dermtreatment_adv4_trino.sql new file mode 100644 index 000000000..1a238f9cb --- /dev/null +++ b/tests/test_sql_refsols/defog_dermtreatment_adv4_trino.sql @@ -0,0 +1,19 @@ +WITH _u_0 AS ( + SELECT + treatments.patient_id AS _u_1 + FROM main.treatments AS treatments + JOIN main.diagnoses AS diagnoses + ON LOWER(diagnoses.diag_name) = 'psoriasis vulgaris' + AND diagnoses.diag_id = treatments.diag_id + JOIN main.drugs AS drugs + ON LOWER(drugs.drug_type) = 'biologic' AND drugs.drug_id = treatments.drug_id + GROUP BY + 1 +) +SELECT + COUNT(*) AS patient_count +FROM main.patients AS patients +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = patients.patient_id +WHERE + NOT _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/defog_dermtreatment_adv5_trino.sql b/tests/test_sql_refsols/defog_dermtreatment_adv5_trino.sql new file mode 100644 index 000000000..f98347181 --- /dev/null +++ b/tests/test_sql_refsols/defog_dermtreatment_adv5_trino.sql @@ -0,0 +1,39 @@ +WITH _u_0 AS ( + SELECT + patient_id AS _u_1 + FROM main.treatments + GROUP BY + 1 +), _t1 AS ( + SELECT + MIN(YEAR(CAST(treatments.start_dt AS TIMESTAMP))) AS min_year_start_dt + FROM main.patients AS patients + LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = patients.patient_id + LEFT JOIN main.treatments AS treatments + ON patients.patient_id = treatments.patient_id + WHERE + NOT _u_0._u_1 IS NULL + GROUP BY + patients.patient_id +), _t0 AS ( + SELECT + min_year_start_dt, + COUNT(*) AS n_rows + FROM _t1 + GROUP BY + 1 +) +SELECT + CAST(min_year_start_dt AS VARCHAR) AS year, + n_rows AS number_of_new_patients, + CASE + WHEN ( + n_rows - COALESCE(LAG(n_rows, 1) OVER (ORDER BY min_year_start_dt), n_rows) + ) <> 0 + THEN n_rows - COALESCE(LAG(n_rows, 1) OVER (ORDER BY min_year_start_dt), n_rows) + ELSE NULL + END AS npi +FROM _t0 +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/defog_dermtreatment_adv6_trino.sql b/tests/test_sql_refsols/defog_dermtreatment_adv6_trino.sql new file mode 100644 index 000000000..1fc248180 --- /dev/null +++ b/tests/test_sql_refsols/defog_dermtreatment_adv6_trino.sql @@ -0,0 +1,16 @@ +WITH _s1 AS ( + SELECT + doc_id, + COUNT(DISTINCT drug_id) AS ndistinct_drug_id + FROM main.treatments + GROUP BY + 1 +) +SELECT + doctors.doc_id, + doctors.specialty, + _s1.ndistinct_drug_id AS num_distinct_drugs, + DENSE_RANK() OVER (PARTITION BY doctors.specialty ORDER BY _s1.ndistinct_drug_id DESC NULLS FIRST) AS SDRSDR +FROM main.doctors AS doctors +JOIN _s1 AS _s1 + ON _s1.doc_id = doctors.doc_id diff --git a/tests/test_sql_refsols/defog_dermtreatment_adv7_trino.sql b/tests/test_sql_refsols/defog_dermtreatment_adv7_trino.sql new file mode 100644 index 000000000..c2d33a444 --- /dev/null +++ b/tests/test_sql_refsols/defog_dermtreatment_adv7_trino.sql @@ -0,0 +1,9 @@ +SELECT + COUNT(*) AS num_treatments +FROM main.treatments AS treatments +JOIN main.patients AS patients + ON LOWER(patients.first_name) = 'alice' + AND patients.patient_id = treatments.patient_id +WHERE + treatments.start_dt < DATE_TRUNC('MONTH', CURRENT_TIMESTAMP) + AND treatments.start_dt >= DATE_ADD('MONTH', -6, DATE_TRUNC('MONTH', CURRENT_TIMESTAMP)) diff --git a/tests/test_sql_refsols/defog_dermtreatment_adv8_trino.sql b/tests/test_sql_refsols/defog_dermtreatment_adv8_trino.sql new file mode 100644 index 000000000..85bf0b8cc --- /dev/null +++ b/tests/test_sql_refsols/defog_dermtreatment_adv8_trino.sql @@ -0,0 +1,23 @@ +WITH _t0 AS ( + SELECT + DATE_TRUNC('MONTH', CAST(start_dt AS TIMESTAMP)) AS start_month, + COUNT(*) AS n_rows, + COUNT(DISTINCT diag_id) AS ndistinct_diag_id + FROM main.treatments + WHERE + DATE_ADD('MONTH', -12, DATE_TRUNC('MONTH', CURRENT_TIMESTAMP)) <= DATE_TRUNC('MONTH', CAST(start_dt AS TIMESTAMP)) + AND DATE_TRUNC('MONTH', CAST(start_dt AS TIMESTAMP)) < DATE_TRUNC('MONTH', CURRENT_TIMESTAMP) + GROUP BY + 1 +) +SELECT + CONCAT_WS( + '-', + YEAR(CAST(start_month AS TIMESTAMP)), + LPAD(MONTH(CAST(start_month AS TIMESTAMP)), 2, '0') + ) AS start_month, + ndistinct_diag_id AS PMPD, + n_rows AS PMTC +FROM _t0 +ORDER BY + 1 DESC diff --git a/tests/test_sql_refsols/defog_dermtreatment_adv9_trino.sql b/tests/test_sql_refsols/defog_dermtreatment_adv9_trino.sql new file mode 100644 index 000000000..51ff19e68 --- /dev/null +++ b/tests/test_sql_refsols/defog_dermtreatment_adv9_trino.sql @@ -0,0 +1,40 @@ +WITH _s2 AS ( + SELECT + CONCAT_WS( + '-', + YEAR(CAST(start_dt AS TIMESTAMP)), + LPAD(MONTH(CAST(start_dt AS TIMESTAMP)), 2, '0') + ) AS treatment_month, + COUNT(DISTINCT patient_id) AS ndistinct_patient_id + FROM main.treatments + WHERE + start_dt < DATE_TRUNC('MONTH', CURRENT_TIMESTAMP) + AND start_dt >= DATE_ADD('MONTH', -3, DATE_TRUNC('MONTH', CURRENT_TIMESTAMP)) + GROUP BY + 1 +), _s3 AS ( + SELECT + CONCAT_WS( + '-', + YEAR(CAST(treatments.start_dt AS TIMESTAMP)), + LPAD(MONTH(CAST(treatments.start_dt AS TIMESTAMP)), 2, '0') + ) AS treatment_month, + COUNT(DISTINCT treatments.patient_id) AS ndistinct_patient_id + FROM main.treatments AS treatments + JOIN main.drugs AS drugs + ON drugs.drug_id = treatments.drug_id AND drugs.drug_type = 'biologic' + WHERE + treatments.start_dt < DATE_TRUNC('MONTH', CURRENT_TIMESTAMP) + AND treatments.start_dt >= DATE_ADD('MONTH', -3, DATE_TRUNC('MONTH', CURRENT_TIMESTAMP)) + GROUP BY + 1 +) +SELECT + _s2.treatment_month AS month, + _s2.ndistinct_patient_id AS patient_count, + COALESCE(_s3.ndistinct_patient_id, 0) AS biologic_treatment_count +FROM _s2 AS _s2 +LEFT JOIN _s3 AS _s3 + ON _s2.treatment_month = _s3.treatment_month +ORDER BY + 1 DESC diff --git a/tests/test_sql_refsols/defog_dermtreatment_basic10_trino.sql b/tests/test_sql_refsols/defog_dermtreatment_basic10_trino.sql new file mode 100644 index 000000000..f8775f649 --- /dev/null +++ b/tests/test_sql_refsols/defog_dermtreatment_basic10_trino.sql @@ -0,0 +1,15 @@ +WITH _u_0 AS ( + SELECT + drug_id AS _u_1 + FROM main.treatments + GROUP BY + 1 +) +SELECT + drugs.drug_id, + drugs.drug_name +FROM main.drugs AS drugs +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = drugs.drug_id +WHERE + _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/defog_dermtreatment_basic1_trino.sql b/tests/test_sql_refsols/defog_dermtreatment_basic1_trino.sql new file mode 100644 index 000000000..6566c1802 --- /dev/null +++ b/tests/test_sql_refsols/defog_dermtreatment_basic1_trino.sql @@ -0,0 +1,31 @@ +WITH _s1 AS ( + SELECT + doc_id, + COUNT(*) AS n_rows, + SUM(tot_drug_amt) AS sum_tot_drug_amt + FROM main.treatments + WHERE + start_dt >= DATE_TRUNC('DAY', DATE_ADD('MONTH', -6, CURRENT_TIMESTAMP)) + GROUP BY + 1 +), _t1 AS ( + SELECT + doctors.specialty, + SUM(_s1.n_rows) AS sum_n_rows, + SUM(_s1.sum_tot_drug_amt) AS sum_sum_tot_drug_amt + FROM main.doctors AS doctors + LEFT JOIN _s1 AS _s1 + ON _s1.doc_id = doctors.doc_id + GROUP BY + 1 +) +SELECT + specialty, + sum_n_rows AS num_treatments, + COALESCE(sum_sum_tot_drug_amt, 0) AS total_drug_amount +FROM _t1 +WHERE + sum_n_rows <> 0 +ORDER BY + 3 DESC +LIMIT 3 diff --git a/tests/test_sql_refsols/defog_dermtreatment_basic2_trino.sql b/tests/test_sql_refsols/defog_dermtreatment_basic2_trino.sql new file mode 100644 index 000000000..bd00e81c8 --- /dev/null +++ b/tests/test_sql_refsols/defog_dermtreatment_basic2_trino.sql @@ -0,0 +1,79 @@ +WITH _t2 AS ( + SELECT + end_dt, + patient_id, + treatment_id + FROM main.treatments + WHERE + YEAR(CAST(end_dt AS TIMESTAMP)) = 2022 +), _t3 AS ( + SELECT + day100_pasi_score, + treatment_id + FROM main.outcomes + WHERE + NOT day100_pasi_score IS NULL +), _u_0 AS ( + SELECT + treatment_id AS _u_1 + FROM _t3 + GROUP BY + 1 +), _s3 AS ( + SELECT + ins_type, + patient_id + FROM main.patients +), _s10 AS ( + SELECT + _s3.ins_type, + COUNT(DISTINCT _t2.patient_id) AS ndistinct_patient_id + FROM _t2 AS _t2 + LEFT JOIN _u_0 AS _u_0 + ON _t2.treatment_id = _u_0._u_1 + JOIN _s3 AS _s3 + ON _s3.patient_id = _t2.patient_id + WHERE + NOT _u_0._u_1 IS NULL + GROUP BY + 1 +), _u_2 AS ( + SELECT + treatment_id AS _u_3 + FROM _t3 + GROUP BY + 1 +), _s9 AS ( + SELECT + treatment_id, + COUNT(day100_pasi_score) AS count_day100_pasi_score, + SUM(day100_pasi_score) AS sum_day100_pasi_score + FROM main.outcomes + GROUP BY + 1 +), _s11 AS ( + SELECT + CAST(SUM(_s9.sum_day100_pasi_score) AS DOUBLE) / SUM(_s9.count_day100_pasi_score) AS avg_day100_pasi_score, + _s7.ins_type + FROM _t2 AS _t6 + LEFT JOIN _u_2 AS _u_2 + ON _t6.treatment_id = _u_2._u_3 + JOIN _s3 AS _s7 + ON _s7.patient_id = _t6.patient_id + JOIN _s9 AS _s9 + ON _s9.treatment_id = _t6.treatment_id + WHERE + NOT _u_2._u_3 IS NULL + GROUP BY + 2 +) +SELECT + _s10.ins_type AS insurance_type, + _s10.ndistinct_patient_id AS num_distinct_patients, + _s11.avg_day100_pasi_score AS avg_pasi_score_day100 +FROM _s10 AS _s10 +LEFT JOIN _s11 AS _s11 + ON _s10.ins_type = _s11.ins_type +ORDER BY + 3 NULLS FIRST +LIMIT 5 diff --git a/tests/test_sql_refsols/defog_dermtreatment_basic3_trino.sql b/tests/test_sql_refsols/defog_dermtreatment_basic3_trino.sql new file mode 100644 index 000000000..94f3f1f32 --- /dev/null +++ b/tests/test_sql_refsols/defog_dermtreatment_basic3_trino.sql @@ -0,0 +1,21 @@ +WITH _s1 AS ( + SELECT + drug_id, + AVG(tot_drug_amt) AS avg_tot_drug_amt, + COUNT(*) AS n_rows + FROM main.treatments + GROUP BY + 1 +) +SELECT + drugs.drug_name, + COALESCE(_s1.n_rows, 0) AS num_treatments, + _s1.avg_tot_drug_amt AS avg_drug_amount +FROM main.drugs AS drugs +LEFT JOIN _s1 AS _s1 + ON _s1.drug_id = drugs.drug_id +ORDER BY + 2 DESC, + 3 DESC, + 1 NULLS FIRST +LIMIT 5 diff --git a/tests/test_sql_refsols/defog_dermtreatment_basic4_trino.sql b/tests/test_sql_refsols/defog_dermtreatment_basic4_trino.sql new file mode 100644 index 000000000..241f6c4c2 --- /dev/null +++ b/tests/test_sql_refsols/defog_dermtreatment_basic4_trino.sql @@ -0,0 +1,30 @@ +WITH _t1 AS ( + SELECT + ARBITRARY(treatments.diag_id) AS anything_diag_id, + ARBITRARY(treatments.patient_id) AS anything_patient_id, + MAX(outcomes.day100_itch_vas) AS max_day100_itch_vas + FROM main.treatments AS treatments + JOIN main.outcomes AS outcomes + ON outcomes.treatment_id = treatments.treatment_id + GROUP BY + outcomes.treatment_id +), _s3 AS ( + SELECT + anything_diag_id, + MAX(max_day100_itch_vas) AS max_max_day100_itch_vas, + COUNT(DISTINCT anything_patient_id) AS ndistinct_anything_patient_id + FROM _t1 + GROUP BY + 1 +) +SELECT + diagnoses.diag_name AS diagnosis_name, + _s3.ndistinct_anything_patient_id AS num_patients, + _s3.max_max_day100_itch_vas AS max_itch_score +FROM main.diagnoses AS diagnoses +JOIN _s3 AS _s3 + ON _s3.anything_diag_id = diagnoses.diag_id +ORDER BY + 3 DESC, + 2 DESC +LIMIT 3 diff --git a/tests/test_sql_refsols/defog_dermtreatment_basic5_trino.sql b/tests/test_sql_refsols/defog_dermtreatment_basic5_trino.sql new file mode 100644 index 000000000..f48f8fd90 --- /dev/null +++ b/tests/test_sql_refsols/defog_dermtreatment_basic5_trino.sql @@ -0,0 +1,16 @@ +WITH _u_0 AS ( + SELECT + doc_id AS _u_1 + FROM main.treatments + GROUP BY + 1 +) +SELECT + doctors.doc_id, + doctors.first_name, + doctors.last_name +FROM main.doctors AS doctors +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = doctors.doc_id +WHERE + NOT _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/defog_dermtreatment_basic6_trino.sql b/tests/test_sql_refsols/defog_dermtreatment_basic6_trino.sql new file mode 100644 index 000000000..5999efcc3 --- /dev/null +++ b/tests/test_sql_refsols/defog_dermtreatment_basic6_trino.sql @@ -0,0 +1,18 @@ +WITH _u_0 AS ( + SELECT + treatments.patient_id AS _u_1 + FROM main.treatments AS treatments + JOIN main.outcomes AS outcomes + ON outcomes.treatment_id = treatments.treatment_id + GROUP BY + 1 +) +SELECT + patients.patient_id, + patients.first_name, + patients.last_name +FROM main.patients AS patients +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = patients.patient_id +WHERE + NOT _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/defog_dermtreatment_basic7_trino.sql b/tests/test_sql_refsols/defog_dermtreatment_basic7_trino.sql new file mode 100644 index 000000000..b080ec2e7 --- /dev/null +++ b/tests/test_sql_refsols/defog_dermtreatment_basic7_trino.sql @@ -0,0 +1,10 @@ +SELECT + ins_type AS insurance_type, + AVG(height_cm) AS avg_height, + AVG(weight_kg) AS avg_weight +FROM main.patients +GROUP BY + 1 +ORDER BY + 2 DESC +LIMIT 3 diff --git a/tests/test_sql_refsols/defog_dermtreatment_basic8_trino.sql b/tests/test_sql_refsols/defog_dermtreatment_basic8_trino.sql new file mode 100644 index 000000000..04c6f17e8 --- /dev/null +++ b/tests/test_sql_refsols/defog_dermtreatment_basic8_trino.sql @@ -0,0 +1,9 @@ +SELECT + specialty, + COUNT(*) AS num_doctors +FROM main.doctors +GROUP BY + 1 +ORDER BY + 2 DESC +LIMIT 2 diff --git a/tests/test_sql_refsols/defog_dermtreatment_basic9_trino.sql b/tests/test_sql_refsols/defog_dermtreatment_basic9_trino.sql new file mode 100644 index 000000000..4d0660da7 --- /dev/null +++ b/tests/test_sql_refsols/defog_dermtreatment_basic9_trino.sql @@ -0,0 +1,16 @@ +WITH _u_0 AS ( + SELECT + patient_id AS _u_1 + FROM main.treatments + GROUP BY + 1 +) +SELECT + patients.patient_id, + patients.first_name, + patients.last_name +FROM main.patients AS patients +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = patients.patient_id +WHERE + _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/defog_dermtreatment_gen1_trino.sql b/tests/test_sql_refsols/defog_dermtreatment_gen1_trino.sql new file mode 100644 index 000000000..c8a2d0547 --- /dev/null +++ b/tests/test_sql_refsols/defog_dermtreatment_gen1_trino.sql @@ -0,0 +1,13 @@ +SELECT + adverse_events.treatment_id, + treatments.start_dt AS treatment_start_date, + adverse_events.reported_dt AS adverse_event_date, + adverse_events.description +FROM main.adverse_events AS adverse_events +JOIN main.treatments AS treatments + ON DATE_DIFF( + 'DAY', + CAST(treatments.start_dt AS TIMESTAMP), + CAST(adverse_events.reported_dt AS TIMESTAMP) + ) <= 10 + AND adverse_events.treatment_id = treatments.treatment_id diff --git a/tests/test_sql_refsols/defog_dermtreatment_gen2_trino.sql b/tests/test_sql_refsols/defog_dermtreatment_gen2_trino.sql new file mode 100644 index 000000000..83939bf2f --- /dev/null +++ b/tests/test_sql_refsols/defog_dermtreatment_gen2_trino.sql @@ -0,0 +1,26 @@ +WITH _t AS ( + SELECT + doc_id, + start_dt, + treatment_id, + ROW_NUMBER() OVER (PARTITION BY doc_id ORDER BY start_dt) AS _w + FROM main.treatments +), _s1 AS ( + SELECT + doc_id, + start_dt, + treatment_id + FROM _t + WHERE + _w = 1 +) +SELECT + doctors.last_name, + doctors.year_reg, + _s1.start_dt AS first_treatment_date, + _s1.treatment_id AS first_treatment_id +FROM main.doctors AS doctors +LEFT JOIN _s1 AS _s1 + ON _s1.doc_id = doctors.doc_id +WHERE + doctors.year_reg = YEAR(DATE_ADD('YEAR', -2, CURRENT_TIMESTAMP)) diff --git a/tests/test_sql_refsols/defog_dermtreatment_gen3_trino.sql b/tests/test_sql_refsols/defog_dermtreatment_gen3_trino.sql new file mode 100644 index 000000000..94e9465d9 --- /dev/null +++ b/tests/test_sql_refsols/defog_dermtreatment_gen3_trino.sql @@ -0,0 +1,5 @@ +SELECT + AVG(DATE_DIFF('YEAR', CAST(date_of_birth AS TIMESTAMP), CURRENT_TIMESTAMP)) AS average_age +FROM main.patients +WHERE + gender = 'Male' AND ins_type = 'private' diff --git a/tests/test_sql_refsols/defog_dermtreatment_gen4_trino.sql b/tests/test_sql_refsols/defog_dermtreatment_gen4_trino.sql new file mode 100644 index 000000000..848da18a5 --- /dev/null +++ b/tests/test_sql_refsols/defog_dermtreatment_gen4_trino.sql @@ -0,0 +1,19 @@ +SELECT + treatments_2.treatment_id, + treatments_2.start_dt AS treatment_start_date, + treatments_2.end_dt AS treatment_end_date, + concomitant_meds.start_dt AS concomitant_med_start_date, + concomitant_meds.end_dt AS concomitant_med_end_date +FROM main.treatments AS treatments +JOIN main.concomitant_meds AS concomitant_meds + ON concomitant_meds.treatment_id = treatments.treatment_id +JOIN main.treatments AS treatments_2 + ON DATE_DIFF( + 'DAY', + CAST(treatments_2.start_dt AS TIMESTAMP), + CAST(concomitant_meds.start_dt AS TIMESTAMP) + ) <= 14 + AND concomitant_meds.treatment_id = treatments_2.treatment_id + AND treatments_2.is_placebo +WHERE + treatments.is_placebo = TRUE diff --git a/tests/test_sql_refsols/defog_dermtreatment_gen5_trino.sql b/tests/test_sql_refsols/defog_dermtreatment_gen5_trino.sql new file mode 100644 index 000000000..2f40d00cc --- /dev/null +++ b/tests/test_sql_refsols/defog_dermtreatment_gen5_trino.sql @@ -0,0 +1,11 @@ +SELECT + COUNT(*) AS num_treatments +FROM main.treatments AS treatments +JOIN main.diagnoses AS diagnoses + ON LOWER(diagnoses.diag_name) LIKE '%psoriasis%' + AND diagnoses.diag_id = treatments.diag_id +JOIN main.drugs AS drugs + ON NOT drugs.fda_appr_dt IS NULL AND drugs.drug_id = treatments.drug_id +WHERE + NOT treatments.end_dt IS NULL + AND treatments.end_dt >= DATE_TRUNC('DAY', DATE_ADD('MONTH', -6, CURRENT_TIMESTAMP)) diff --git a/tests/test_sql_refsols/defog_ewallet_adv10_trino.sql b/tests/test_sql_refsols/defog_ewallet_adv10_trino.sql new file mode 100644 index 000000000..f3eae3bdb --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_adv10_trino.sql @@ -0,0 +1,9 @@ +SELECT + wallet_transactions_daily.sender_id AS user_id, + COUNT(*) AS total_transactions +FROM main.users AS users +JOIN main.wallet_transactions_daily AS wallet_transactions_daily + ON users.uid = wallet_transactions_daily.sender_id + AND wallet_transactions_daily.sender_type = 0 +GROUP BY + 1 diff --git a/tests/test_sql_refsols/defog_ewallet_adv11_trino.sql b/tests/test_sql_refsols/defog_ewallet_adv11_trino.sql new file mode 100644 index 000000000..5a506f83c --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_adv11_trino.sql @@ -0,0 +1,18 @@ +SELECT + user_sessions.user_id AS uid, + SUM( + DATE_DIFF( + 'SECOND', + CAST(user_sessions.session_start_ts AS TIMESTAMP), + CAST(user_sessions.session_end_ts AS TIMESTAMP) + ) + ) AS total_duration +FROM main.users AS users +JOIN main.user_sessions AS user_sessions + ON user_sessions.session_end_ts < '2023-06-08' + AND user_sessions.session_start_ts >= '2023-06-01' + AND user_sessions.user_id = users.uid +GROUP BY + 1 +ORDER BY + 2 DESC diff --git a/tests/test_sql_refsols/defog_ewallet_adv12_trino.sql b/tests/test_sql_refsols/defog_ewallet_adv12_trino.sql new file mode 100644 index 000000000..e7b28d656 --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_adv12_trino.sql @@ -0,0 +1,10 @@ +SELECT + coupons.cid AS coupon_id, + COALESCE(SUM(wallet_transactions_daily.amount), 0) AS total_discount +FROM main.coupons AS coupons +LEFT JOIN main.wallet_transactions_daily AS wallet_transactions_daily + ON coupons.cid = wallet_transactions_daily.coupon_id +WHERE + coupons.merchant_id = '1' +GROUP BY + 1 diff --git a/tests/test_sql_refsols/defog_ewallet_adv13_trino.sql b/tests/test_sql_refsols/defog_ewallet_adv13_trino.sql new file mode 100644 index 000000000..f213b9c22 --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_adv13_trino.sql @@ -0,0 +1,6 @@ +SELECT + COUNT(*) AS TUC +FROM main.user_sessions +WHERE + session_end_ts >= DATE_TRUNC('DAY', DATE_ADD('MONTH', -1, CURRENT_TIMESTAMP)) + OR session_start_ts >= DATE_TRUNC('DAY', DATE_ADD('MONTH', -1, CURRENT_TIMESTAMP)) diff --git a/tests/test_sql_refsols/defog_ewallet_adv14_trino.sql b/tests/test_sql_refsols/defog_ewallet_adv14_trino.sql new file mode 100644 index 000000000..f4a110e90 --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_adv14_trino.sql @@ -0,0 +1,5 @@ +SELECT + CAST(COALESCE(SUM(status = 'success'), 0) AS DOUBLE) / NULLIF(COUNT(*), 0) AS _expr0 +FROM main.wallet_transactions_daily +WHERE + DATE_DIFF('MONTH', CAST(created_at AS TIMESTAMP), CURRENT_TIMESTAMP) = 1 diff --git a/tests/test_sql_refsols/defog_ewallet_adv15_trino.sql b/tests/test_sql_refsols/defog_ewallet_adv15_trino.sql new file mode 100644 index 000000000..3e3908483 --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_adv15_trino.sql @@ -0,0 +1,25 @@ +WITH _s3 AS ( + SELECT + coupons.merchant_id, + COUNT(*) AS n_rows + FROM main.coupons AS coupons + JOIN main.merchants AS merchants + ON DATE_DIFF( + 'MONTH', + CAST(merchants.created_at AS TIMESTAMP), + CAST(coupons.created_at AS TIMESTAMP) + ) = 0 + AND coupons.merchant_id = merchants.mid + GROUP BY + 1 +) +SELECT + merchants.mid AS merchant_id, + merchants.name AS merchant_name, + COALESCE(_s3.n_rows, 0) AS coupons_per_merchant +FROM main.merchants AS merchants +LEFT JOIN _s3 AS _s3 + ON _s3.merchant_id = merchants.mid +ORDER BY + 3 DESC +LIMIT 1 diff --git a/tests/test_sql_refsols/defog_ewallet_adv16_trino.sql b/tests/test_sql_refsols/defog_ewallet_adv16_trino.sql new file mode 100644 index 000000000..76c7aa540 --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_adv16_trino.sql @@ -0,0 +1,12 @@ +SELECT + ARBITRARY(users.username) AS username, + COUNT(*) AS total_unread_notifs +FROM main.users AS users +JOIN main.notifications AS notifications + ON notifications.status = 'unread' + AND notifications.type = 'promotion' + AND notifications.user_id = users.uid +WHERE + LOWER(users.country) = 'us' +GROUP BY + notifications.user_id diff --git a/tests/test_sql_refsols/defog_ewallet_adv1_trino.sql b/tests/test_sql_refsols/defog_ewallet_adv1_trino.sql new file mode 100644 index 000000000..6e5e7ac3e --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_adv1_trino.sql @@ -0,0 +1,9 @@ +SELECT + ARBITRARY(merchants.name) AS name, + CAST(COUNT(DISTINCT wallet_transactions_daily.coupon_id) AS DOUBLE) / NULLIF(COUNT(DISTINCT wallet_transactions_daily.txid), 0) AS CPUR +FROM main.merchants AS merchants +JOIN main.wallet_transactions_daily AS wallet_transactions_daily + ON merchants.mid = wallet_transactions_daily.receiver_id + AND wallet_transactions_daily.status = 'success' +GROUP BY + wallet_transactions_daily.receiver_id diff --git a/tests/test_sql_refsols/defog_ewallet_adv2_trino.sql b/tests/test_sql_refsols/defog_ewallet_adv2_trino.sql new file mode 100644 index 000000000..aba388471 --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_adv2_trino.sql @@ -0,0 +1,68 @@ +SELECT + DATE_TRUNC( + 'DAY', + DATE_ADD( + 'DAY', + ( + ( + ( + ( + DAY_OF_WEEK(CAST(notifications.created_at AS TIMESTAMP)) % 7 + ) + 1 + ) + -1 + ) % 7 + ) * -1, + CAST(notifications.created_at AS TIMESTAMP) + ) + ) AS week, + COUNT(*) AS num_notifs, + COALESCE( + SUM( + ( + ( + ( + DAY_OF_WEEK(notifications.created_at) % 7 + ) + 0 + ) % 7 + ) IN (5, 6) + ), + 0 + ) AS weekend_notifs +FROM main.notifications AS notifications +JOIN main.users AS users + ON notifications.user_id = users.uid AND users.country IN ('US', 'CA') +WHERE + notifications.created_at < DATE_TRUNC( + 'DAY', + DATE_ADD( + 'DAY', + ( + ( + ( + DAY_OF_WEEK(CURRENT_TIMESTAMP) % 7 + ) + 0 + ) % 7 + ) * -1, + CURRENT_TIMESTAMP + ) + ) + AND notifications.created_at >= DATE_ADD( + 'WEEK', + -3, + DATE_TRUNC( + 'DAY', + DATE_ADD( + 'DAY', + ( + ( + ( + DAY_OF_WEEK(CURRENT_TIMESTAMP) % 7 + ) + 0 + ) % 7 + ) * -1, + CURRENT_TIMESTAMP + ) + ) + ) +GROUP BY + 1 diff --git a/tests/test_sql_refsols/defog_ewallet_adv3_trino.sql b/tests/test_sql_refsols/defog_ewallet_adv3_trino.sql new file mode 100644 index 000000000..6e53abfb9 --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_adv3_trino.sql @@ -0,0 +1,10 @@ +SELECT + ARBITRARY(merchants.name) AS merchant_name, + COUNT(*) AS total_coupons +FROM main.merchants AS merchants +JOIN main.coupons AS coupons + ON coupons.merchant_id = merchants.mid +WHERE + LOWER(merchants.category) LIKE '%retail%' AND merchants.status = 'active' +GROUP BY + coupons.merchant_id diff --git a/tests/test_sql_refsols/defog_ewallet_adv4_trino.sql b/tests/test_sql_refsols/defog_ewallet_adv4_trino.sql new file mode 100644 index 000000000..61589266c --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_adv4_trino.sql @@ -0,0 +1,12 @@ +SELECT + COUNT(*) AS num_transactions, + CASE + WHEN COUNT(*) <> 0 + THEN COALESCE(SUM(wallet_transactions_daily.amount), 0) + ELSE NULL + END AS total_amount +FROM main.wallet_transactions_daily AS wallet_transactions_daily +JOIN main.users AS users + ON users.country = 'US' AND users.uid = wallet_transactions_daily.sender_id +WHERE + DATE_DIFF('DAY', CAST(wallet_transactions_daily.created_at AS TIMESTAMP), CURRENT_TIMESTAMP) <= 7 diff --git a/tests/test_sql_refsols/defog_ewallet_adv5_trino.sql b/tests/test_sql_refsols/defog_ewallet_adv5_trino.sql new file mode 100644 index 000000000..d6b5a66ba --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_adv5_trino.sql @@ -0,0 +1,5 @@ +SELECT + AVG(balance) AS AMB +FROM main.wallet_user_balance_daily +WHERE + DATE_DIFF('DAY', CAST(updated_at AS TIMESTAMP), CURRENT_TIMESTAMP) <= 7 diff --git a/tests/test_sql_refsols/defog_ewallet_adv6_trino.sql b/tests/test_sql_refsols/defog_ewallet_adv6_trino.sql new file mode 100644 index 000000000..2760d7e87 --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_adv6_trino.sql @@ -0,0 +1,13 @@ +WITH _t AS ( + SELECT + balance, + user_id, + ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY updated_at DESC NULLS FIRST) AS _w + FROM main.wallet_user_balance_daily +) +SELECT + user_id, + balance AS latest_balance +FROM _t +WHERE + _w = 1 diff --git a/tests/test_sql_refsols/defog_ewallet_adv7_trino.sql b/tests/test_sql_refsols/defog_ewallet_adv7_trino.sql new file mode 100644 index 000000000..35ce27841 --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_adv7_trino.sql @@ -0,0 +1,13 @@ +WITH _t AS ( + SELECT + marketing_opt_in, + user_id, + ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY created_at DESC NULLS FIRST) AS _w + FROM main.user_setting_snapshot +) +SELECT + users.uid, + _t.marketing_opt_in +FROM main.users AS users +JOIN _t AS _t + ON _t._w = 1 AND _t.user_id = users.uid diff --git a/tests/test_sql_refsols/defog_ewallet_adv8_trino.sql b/tests/test_sql_refsols/defog_ewallet_adv8_trino.sql new file mode 100644 index 000000000..a1e18feda --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_adv8_trino.sql @@ -0,0 +1,13 @@ +SELECT + wallet_transactions_daily.receiver_id AS merchants_id, + ARBITRARY(merchants.name) AS merchants_name, + ARBITRARY(merchants.category) AS category, + COALESCE(SUM(wallet_transactions_daily.amount), 0) AS total_revenue, + ROW_NUMBER() OVER (ORDER BY COALESCE(SUM(wallet_transactions_daily.amount), 0) DESC NULLS FIRST) AS mrr +FROM main.merchants AS merchants +JOIN main.wallet_transactions_daily AS wallet_transactions_daily + ON merchants.mid = wallet_transactions_daily.receiver_id + AND wallet_transactions_daily.receiver_type = 1 + AND wallet_transactions_daily.status = 'success' +GROUP BY + 1 diff --git a/tests/test_sql_refsols/defog_ewallet_adv9_trino.sql b/tests/test_sql_refsols/defog_ewallet_adv9_trino.sql new file mode 100644 index 000000000..ef9897b80 --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_adv9_trino.sql @@ -0,0 +1,10 @@ +SELECT + DATE_TRUNC('MONTH', CAST(created_at AS TIMESTAMP)) AS year_month, + COUNT(DISTINCT sender_id) AS active_users +FROM main.wallet_transactions_daily +WHERE + created_at < DATE_TRUNC('MONTH', CURRENT_TIMESTAMP) + AND created_at >= DATE_ADD('MONTH', -2, DATE_TRUNC('MONTH', CURRENT_TIMESTAMP)) + AND sender_type = 0 +GROUP BY + 1 diff --git a/tests/test_sql_refsols/defog_ewallet_basic10_trino.sql b/tests/test_sql_refsols/defog_ewallet_basic10_trino.sql new file mode 100644 index 000000000..5bb90cebd --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_basic10_trino.sql @@ -0,0 +1,21 @@ +WITH _s1 AS ( + SELECT + amount, + receiver_id + FROM main.wallet_transactions_daily + WHERE + created_at >= DATE_TRUNC('DAY', DATE_ADD('DAY', -150, CURRENT_TIMESTAMP)) + AND receiver_type = 1 +) +SELECT + ARBITRARY(merchants.name) AS merchant_name, + COUNT(_s1.receiver_id) AS total_transactions, + COALESCE(SUM(_s1.amount), 0) AS total_amount +FROM main.merchants AS merchants +LEFT JOIN _s1 AS _s1 + ON _s1.receiver_id = merchants.mid +GROUP BY + merchants.mid +ORDER BY + 3 DESC +LIMIT 2 diff --git a/tests/test_sql_refsols/defog_ewallet_basic1_trino.sql b/tests/test_sql_refsols/defog_ewallet_basic1_trino.sql new file mode 100644 index 000000000..5ea6919b3 --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_basic1_trino.sql @@ -0,0 +1,12 @@ +SELECT + DATE_TRUNC('MONTH', CAST(wallet_transactions_daily.created_at AS TIMESTAMP)) AS month, + COUNT(DISTINCT wallet_transactions_daily.sender_id) AS active_users +FROM main.wallet_transactions_daily AS wallet_transactions_daily +JOIN main.users AS users + ON users.status = 'active' AND users.uid = wallet_transactions_daily.sender_id +WHERE + YEAR(CAST(wallet_transactions_daily.created_at AS TIMESTAMP)) = 2023 + AND wallet_transactions_daily.sender_type = 0 + AND wallet_transactions_daily.status = 'success' +GROUP BY + 1 diff --git a/tests/test_sql_refsols/defog_ewallet_basic2_trino.sql b/tests/test_sql_refsols/defog_ewallet_basic2_trino.sql new file mode 100644 index 000000000..9ca419d96 --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_basic2_trino.sql @@ -0,0 +1,15 @@ +WITH _u_0 AS ( + SELECT + merchant_id AS _u_1 + FROM main.coupons + GROUP BY + 1 +) +SELECT + merchants.mid AS merchant_id, + merchants.name AS merchant_name +FROM main.merchants AS merchants +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = merchants.mid +WHERE + _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/defog_ewallet_basic3_trino.sql b/tests/test_sql_refsols/defog_ewallet_basic3_trino.sql new file mode 100644 index 000000000..80b4b414d --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_basic3_trino.sql @@ -0,0 +1,16 @@ +WITH _u_0 AS ( + SELECT + receiver_id AS _u_1 + FROM main.wallet_transactions_daily + WHERE + receiver_type = 1 + GROUP BY + 1 +) +SELECT + merchants.mid AS merchant +FROM main.merchants AS merchants +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = merchants.mid +WHERE + NOT _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/defog_ewallet_basic4_trino.sql b/tests/test_sql_refsols/defog_ewallet_basic4_trino.sql new file mode 100644 index 000000000..b831e9388 --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_basic4_trino.sql @@ -0,0 +1,16 @@ +WITH _u_0 AS ( + SELECT + user_id AS _u_1 + FROM main.notifications + WHERE + type = 'transaction' + GROUP BY + 1 +) +SELECT + users.uid AS user_id +FROM main.users AS users +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = users.uid +WHERE + NOT _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/defog_ewallet_basic5_trino.sql b/tests/test_sql_refsols/defog_ewallet_basic5_trino.sql new file mode 100644 index 000000000..b1a33dd01 --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_basic5_trino.sql @@ -0,0 +1,15 @@ +WITH _u_0 AS ( + SELECT + user_id AS _u_1 + FROM main.notifications + GROUP BY + 1 +) +SELECT + users.uid, + users.username +FROM main.users AS users +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = users.uid +WHERE + _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/defog_ewallet_basic6_trino.sql b/tests/test_sql_refsols/defog_ewallet_basic6_trino.sql new file mode 100644 index 000000000..a6f22926c --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_basic6_trino.sql @@ -0,0 +1,9 @@ +SELECT + device_type, + COUNT(*) AS count +FROM main.user_sessions +GROUP BY + 1 +ORDER BY + 2 DESC +LIMIT 2 diff --git a/tests/test_sql_refsols/defog_ewallet_basic7_trino.sql b/tests/test_sql_refsols/defog_ewallet_basic7_trino.sql new file mode 100644 index 000000000..ed2752247 --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_basic7_trino.sql @@ -0,0 +1,9 @@ +SELECT + status, + COUNT(*) AS count +FROM main.wallet_transactions_daily +GROUP BY + 1 +ORDER BY + 2 DESC +LIMIT 3 diff --git a/tests/test_sql_refsols/defog_ewallet_basic8_trino.sql b/tests/test_sql_refsols/defog_ewallet_basic8_trino.sql new file mode 100644 index 000000000..a7189744c --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_basic8_trino.sql @@ -0,0 +1,12 @@ +SELECT + ARBITRARY(coupons.code) AS coupon_code, + COUNT(wallet_transactions_daily.txid) AS redemption_count, + COALESCE(SUM(wallet_transactions_daily.amount), 0) AS total_discount +FROM main.coupons AS coupons +LEFT JOIN main.wallet_transactions_daily AS wallet_transactions_daily + ON coupons.cid = wallet_transactions_daily.coupon_id +GROUP BY + coupons.cid +ORDER BY + 2 DESC +LIMIT 3 diff --git a/tests/test_sql_refsols/defog_ewallet_basic9_trino.sql b/tests/test_sql_refsols/defog_ewallet_basic9_trino.sql new file mode 100644 index 000000000..64fef3152 --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_basic9_trino.sql @@ -0,0 +1,14 @@ +SELECT + users.country, + COUNT(DISTINCT wallet_transactions_daily.sender_id) AS user_count, + COALESCE(SUM(wallet_transactions_daily.amount), 0) AS total_amount +FROM main.wallet_transactions_daily AS wallet_transactions_daily +LEFT JOIN main.users AS users + ON users.uid = wallet_transactions_daily.sender_id +WHERE + wallet_transactions_daily.sender_type = 0 +GROUP BY + 1 +ORDER BY + 3 DESC +LIMIT 5 diff --git a/tests/test_sql_refsols/defog_ewallet_gen1_trino.sql b/tests/test_sql_refsols/defog_ewallet_gen1_trino.sql new file mode 100644 index 000000000..fb40453a6 --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_gen1_trino.sql @@ -0,0 +1,26 @@ +WITH _t0 AS ( + SELECT + CASE + WHEN ABS( + ( + ROW_NUMBER() OVER (ORDER BY wallet_merchant_balance_daily.balance DESC) - 1.0 + ) - ( + CAST(( + COUNT(wallet_merchant_balance_daily.balance) OVER () - 1.0 + ) AS DOUBLE) / 2.0 + ) + ) < 1.0 + THEN wallet_merchant_balance_daily.balance + ELSE NULL + END AS expr_1 + FROM main.wallet_merchant_balance_daily AS wallet_merchant_balance_daily + JOIN main.merchants AS merchants + ON LOWER(merchants.category) LIKE '%retail%' + AND merchants.mid = wallet_merchant_balance_daily.merchant_id + AND merchants.status = 'active' + WHERE + DATE_TRUNC('DAY', CAST(wallet_merchant_balance_daily.updated_at AS TIMESTAMP)) = DATE_TRUNC('DAY', CURRENT_TIMESTAMP) +) +SELECT + AVG(expr_1) AS _expr0 +FROM _t0 diff --git a/tests/test_sql_refsols/defog_ewallet_gen2_trino.sql b/tests/test_sql_refsols/defog_ewallet_gen2_trino.sql new file mode 100644 index 000000000..5bea5fe47 --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_gen2_trino.sql @@ -0,0 +1,23 @@ +WITH _s0 AS ( + SELECT + MIN(snapshot_date) AS min_snapshot_date + FROM main.user_setting_snapshot + WHERE + YEAR(CAST(snapshot_date AS TIMESTAMP)) = 2023 +), _s1 AS ( + SELECT + snapshot_date, + AVG(tx_limit_daily) AS avg_tx_limit_daily, + AVG(tx_limit_monthly) AS avg_tx_limit_monthly + FROM main.user_setting_snapshot + WHERE + YEAR(CAST(snapshot_date AS TIMESTAMP)) = 2023 + GROUP BY + 1 +) +SELECT + _s1.avg_tx_limit_daily AS avg_daily_limit, + _s1.avg_tx_limit_monthly AS avg_monthly_limit +FROM _s0 AS _s0 +LEFT JOIN _s1 AS _s1 + ON _s0.min_snapshot_date = _s1.snapshot_date diff --git a/tests/test_sql_refsols/defog_ewallet_gen3_trino.sql b/tests/test_sql_refsols/defog_ewallet_gen3_trino.sql new file mode 100644 index 000000000..5f45bdefc --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_gen3_trino.sql @@ -0,0 +1,8 @@ +SELECT + device_type, + AVG( + DATE_DIFF('SECOND', CAST(session_start_ts AS TIMESTAMP), CAST(session_end_ts AS TIMESTAMP)) + ) AS avg_session_duration_seconds +FROM main.user_sessions +GROUP BY + 1 diff --git a/tests/test_sql_refsols/defog_ewallet_gen4_trino.sql b/tests/test_sql_refsols/defog_ewallet_gen4_trino.sql new file mode 100644 index 000000000..fdf246601 --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_gen4_trino.sql @@ -0,0 +1,36 @@ +WITH _t1 AS ( + SELECT + merchant_id, + start_date + FROM main.coupons +), _s1 AS ( + SELECT + merchant_id, + MIN(start_date) AS min_start_date + FROM _t1 + GROUP BY + 1 +), _s4 AS ( + SELECT + merchants.mid, + _s1.min_start_date, + ARBITRARY(merchants.created_at) AS anything_created_at, + MAX(coupons.cid) AS max_cid + FROM main.merchants AS merchants + LEFT JOIN _s1 AS _s1 + ON _s1.merchant_id = merchants.mid + LEFT JOIN main.coupons AS coupons + ON _s1.min_start_date = coupons.start_date AND coupons.merchant_id = merchants.mid + GROUP BY + 1, + 2 +) +SELECT + _s4.mid AS merchants_id, + _s4.anything_created_at AS merchant_registration_date, + _s4.min_start_date AS earliest_coupon_start_date, + _s4.max_cid AS earliest_coupon_id +FROM _s4 AS _s4 +JOIN _t1 AS _s5 + ON _s4.mid = _s5.merchant_id + AND _s5.start_date <= DATE_ADD('YEAR', 1, CAST(_s4.anything_created_at AS TIMESTAMP)) diff --git a/tests/test_sql_refsols/defog_ewallet_gen5_trino.sql b/tests/test_sql_refsols/defog_ewallet_gen5_trino.sql new file mode 100644 index 000000000..7650af022 --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_gen5_trino.sql @@ -0,0 +1,20 @@ +WITH _u_0 AS ( + SELECT + notifications.user_id AS _u_1 + FROM main.notifications AS notifications + JOIN main.users AS users + ON notifications.created_at <= DATE_ADD('YEAR', 1, CAST(users.created_at AS TIMESTAMP)) + AND notifications.created_at >= users.created_at + AND notifications.user_id = users.uid + GROUP BY + 1 +) +SELECT + users.username, + users.email, + users.created_at +FROM main.users AS users +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = users.uid +WHERE + _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/defog_restaurants_gen10_trino.sql b/tests/test_sql_refsols/defog_restaurants_gen10_trino.sql new file mode 100644 index 000000000..8501b302a --- /dev/null +++ b/tests/test_sql_refsols/defog_restaurants_gen10_trino.sql @@ -0,0 +1,7 @@ +SELECT + name, + rating +FROM main.restaurant +ORDER BY + 2 DESC, + 1 DESC diff --git a/tests/test_sql_refsols/defog_restaurants_gen11_trino.sql b/tests/test_sql_refsols/defog_restaurants_gen11_trino.sql new file mode 100644 index 000000000..27341d0bf --- /dev/null +++ b/tests/test_sql_refsols/defog_restaurants_gen11_trino.sql @@ -0,0 +1,3 @@ +SELECT + CAST(SUM(rating > 4.5) AS DOUBLE) / NULLIF(COUNT(*), 0) AS ratio +FROM main.restaurant diff --git a/tests/test_sql_refsols/defog_restaurants_gen12_trino.sql b/tests/test_sql_refsols/defog_restaurants_gen12_trino.sql new file mode 100644 index 000000000..10f5c0a80 --- /dev/null +++ b/tests/test_sql_refsols/defog_restaurants_gen12_trino.sql @@ -0,0 +1,3 @@ +SELECT + CAST(COALESCE(SUM(rating > 4.0), 0) AS DOUBLE) / NULLIF(SUM(rating < 4.0), 0) AS ratio +FROM main.restaurant diff --git a/tests/test_sql_refsols/defog_restaurants_gen13_trino.sql b/tests/test_sql_refsols/defog_restaurants_gen13_trino.sql new file mode 100644 index 000000000..422ca8ee8 --- /dev/null +++ b/tests/test_sql_refsols/defog_restaurants_gen13_trino.sql @@ -0,0 +1,5 @@ +SELECT + CAST(COALESCE(SUM(rating > 4.0), 0) AS DOUBLE) / NULLIF(SUM(rating < 4.0), 0) AS ratio +FROM main.restaurant +WHERE + LOWER(city_name) = 'new york' diff --git a/tests/test_sql_refsols/defog_restaurants_gen14_trino.sql b/tests/test_sql_refsols/defog_restaurants_gen14_trino.sql new file mode 100644 index 000000000..07ec5db04 --- /dev/null +++ b/tests/test_sql_refsols/defog_restaurants_gen14_trino.sql @@ -0,0 +1,8 @@ +SELECT + CAST(SUM(LOWER(food_type) = 'vegan') AS DOUBLE) / NULLIF(SUM(LOWER(food_type) <> 'vegan'), 0) AS ratio +FROM main.restaurant +WHERE + LOWER(city_name) = 'san francisco' + AND ( + LOWER(food_type) <> 'vegan' OR LOWER(food_type) = 'vegan' + ) diff --git a/tests/test_sql_refsols/defog_restaurants_gen15_trino.sql b/tests/test_sql_refsols/defog_restaurants_gen15_trino.sql new file mode 100644 index 000000000..46e24e888 --- /dev/null +++ b/tests/test_sql_refsols/defog_restaurants_gen15_trino.sql @@ -0,0 +1,5 @@ +SELECT + CAST(SUM(LOWER(food_type) = 'italian') AS DOUBLE) / NULLIF(COUNT(*), 0) AS ratio +FROM main.restaurant +WHERE + LOWER(city_name) = 'los angeles' diff --git a/tests/test_sql_refsols/defog_restaurants_gen16_trino.sql b/tests/test_sql_refsols/defog_restaurants_gen16_trino.sql new file mode 100644 index 000000000..09d3455d4 --- /dev/null +++ b/tests/test_sql_refsols/defog_restaurants_gen16_trino.sql @@ -0,0 +1,17 @@ +WITH _t0 AS ( + SELECT + city_name, + name, + COUNT(*) AS n_rows + FROM main.restaurant + GROUP BY + 1, + 2 +) +SELECT + city_name, + name, + n_rows AS n_restaurants +FROM _t0 +WHERE + n_rows > 1 diff --git a/tests/test_sql_refsols/defog_restaurants_gen17_trino.sql b/tests/test_sql_refsols/defog_restaurants_gen17_trino.sql new file mode 100644 index 000000000..7a8c02f1d --- /dev/null +++ b/tests/test_sql_refsols/defog_restaurants_gen17_trino.sql @@ -0,0 +1,8 @@ +SELECT + city_name, + AVG(rating) AS avg_rating +FROM main.restaurant +WHERE + LOWER(food_type) = 'mexican' +GROUP BY + 1 diff --git a/tests/test_sql_refsols/defog_restaurants_gen18_trino.sql b/tests/test_sql_refsols/defog_restaurants_gen18_trino.sql new file mode 100644 index 000000000..95b9160e2 --- /dev/null +++ b/tests/test_sql_refsols/defog_restaurants_gen18_trino.sql @@ -0,0 +1,59 @@ +WITH _s0 AS ( + SELECT + city_name, + region + FROM main.geographic +), _s1 AS ( + SELECT + city_name + FROM main.restaurant +), _u_0 AS ( + SELECT + city_name AS _u_1 + FROM _s1 + GROUP BY + 1 +), _s6 AS ( + SELECT DISTINCT + _s0.region + FROM _s0 AS _s0 + LEFT JOIN _u_0 AS _u_0 + ON _s0.city_name = _u_0._u_1 + WHERE + NOT _u_0._u_1 IS NULL +), _u_2 AS ( + SELECT + city_name AS _u_3 + FROM _s1 + GROUP BY + 1 +), _s5 AS ( + SELECT + city_name, + COUNT(rating) AS count_rating, + SUM(rating) AS sum_rating + FROM main.restaurant + GROUP BY + 1 +), _s7 AS ( + SELECT + CAST(SUM(_s5.sum_rating) AS DOUBLE) / SUM(_s5.count_rating) AS avg_rating, + _s2.region + FROM _s0 AS _s2 + LEFT JOIN _u_2 AS _u_2 + ON _s2.city_name = _u_2._u_3 + JOIN _s5 AS _s5 + ON _s2.city_name = _s5.city_name + WHERE + NOT _u_2._u_3 IS NULL + GROUP BY + 2 +) +SELECT + _s6.region AS rest_region, + _s7.avg_rating +FROM _s6 AS _s6 +LEFT JOIN _s7 AS _s7 + ON _s6.region = _s7.region +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/defog_restaurants_gen19_trino.sql b/tests/test_sql_refsols/defog_restaurants_gen19_trino.sql new file mode 100644 index 000000000..be12b71b7 --- /dev/null +++ b/tests/test_sql_refsols/defog_restaurants_gen19_trino.sql @@ -0,0 +1,21 @@ +WITH _s0 AS ( + SELECT + city_name, + COUNT(*) AS n_rows + FROM main.restaurant + WHERE + LOWER(food_type) = 'italian' + GROUP BY + 1 +) +SELECT + geographic.region AS rest_region, + SUM(_s0.n_rows) AS n_restaurants +FROM _s0 AS _s0 +JOIN main.geographic AS geographic + ON _s0.city_name = geographic.city_name +GROUP BY + 1 +ORDER BY + 2 DESC, + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/defog_restaurants_gen1_trino.sql b/tests/test_sql_refsols/defog_restaurants_gen1_trino.sql new file mode 100644 index 000000000..eb6929934 --- /dev/null +++ b/tests/test_sql_refsols/defog_restaurants_gen1_trino.sql @@ -0,0 +1,6 @@ +SELECT + food_type, + COUNT(*) AS restaurants +FROM main.restaurant +GROUP BY + 1 diff --git a/tests/test_sql_refsols/defog_restaurants_gen20_trino.sql b/tests/test_sql_refsols/defog_restaurants_gen20_trino.sql new file mode 100644 index 000000000..55fa0886c --- /dev/null +++ b/tests/test_sql_refsols/defog_restaurants_gen20_trino.sql @@ -0,0 +1,19 @@ +WITH _s0 AS ( + SELECT + city_name, + COUNT(*) AS n_rows + FROM main.restaurant + GROUP BY + 1 +) +SELECT + geographic.region AS rest_region, + SUM(_s0.n_rows) AS n_restaurants +FROM _s0 AS _s0 +JOIN main.geographic AS geographic + ON _s0.city_name = geographic.city_name +GROUP BY + 1 +ORDER BY + 2 DESC, + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/defog_restaurants_gen21_trino.sql b/tests/test_sql_refsols/defog_restaurants_gen21_trino.sql new file mode 100644 index 000000000..6651009f8 --- /dev/null +++ b/tests/test_sql_refsols/defog_restaurants_gen21_trino.sql @@ -0,0 +1,6 @@ +SELECT + city_name +FROM main.restaurant +ORDER BY + rating DESC +LIMIT 1 diff --git a/tests/test_sql_refsols/defog_restaurants_gen22_trino.sql b/tests/test_sql_refsols/defog_restaurants_gen22_trino.sql new file mode 100644 index 000000000..4de0f8792 --- /dev/null +++ b/tests/test_sql_refsols/defog_restaurants_gen22_trino.sql @@ -0,0 +1,6 @@ +SELECT + name, + rating +FROM main.restaurant +WHERE + LOWER(city_name) = 'new york' AND rating > 4 diff --git a/tests/test_sql_refsols/defog_restaurants_gen23_trino.sql b/tests/test_sql_refsols/defog_restaurants_gen23_trino.sql new file mode 100644 index 000000000..78a287d66 --- /dev/null +++ b/tests/test_sql_refsols/defog_restaurants_gen23_trino.sql @@ -0,0 +1,9 @@ +SELECT + restaurant.name, + restaurant.food_type +FROM main.location AS location +JOIN main.restaurant AS restaurant + ON location.restaurant_id = restaurant.id +WHERE + LOWER(location.city_name) = 'san francisco' + AND LOWER(location.street_name) = 'market st' diff --git a/tests/test_sql_refsols/defog_restaurants_gen24_trino.sql b/tests/test_sql_refsols/defog_restaurants_gen24_trino.sql new file mode 100644 index 000000000..de54aae26 --- /dev/null +++ b/tests/test_sql_refsols/defog_restaurants_gen24_trino.sql @@ -0,0 +1,5 @@ +SELECT + name +FROM main.restaurant +WHERE + LOWER(food_type) = 'italian' diff --git a/tests/test_sql_refsols/defog_restaurants_gen25_trino.sql b/tests/test_sql_refsols/defog_restaurants_gen25_trino.sql new file mode 100644 index 000000000..a7373300c --- /dev/null +++ b/tests/test_sql_refsols/defog_restaurants_gen25_trino.sql @@ -0,0 +1,5 @@ +SELECT + name +FROM main.restaurant +WHERE + LOWER(city_name) = 'los angeles' AND rating > 4 diff --git a/tests/test_sql_refsols/defog_restaurants_gen2_trino.sql b/tests/test_sql_refsols/defog_restaurants_gen2_trino.sql new file mode 100644 index 000000000..462da428a --- /dev/null +++ b/tests/test_sql_refsols/defog_restaurants_gen2_trino.sql @@ -0,0 +1,6 @@ +SELECT + city_name, + COUNT(*) AS total_count +FROM main.location +GROUP BY + 1 diff --git a/tests/test_sql_refsols/defog_restaurants_gen3_trino.sql b/tests/test_sql_refsols/defog_restaurants_gen3_trino.sql new file mode 100644 index 000000000..9b8f8595c --- /dev/null +++ b/tests/test_sql_refsols/defog_restaurants_gen3_trino.sql @@ -0,0 +1,9 @@ +SELECT + food_type, + AVG(rating) AS avg_rating +FROM main.restaurant +GROUP BY + 1 +ORDER BY + 2 DESC, + 1 DESC diff --git a/tests/test_sql_refsols/defog_restaurants_gen4_trino.sql b/tests/test_sql_refsols/defog_restaurants_gen4_trino.sql new file mode 100644 index 000000000..9ab4be5a0 --- /dev/null +++ b/tests/test_sql_refsols/defog_restaurants_gen4_trino.sql @@ -0,0 +1,11 @@ +SELECT + city_name, + COUNT(*) AS num_restaurants +FROM main.restaurant +WHERE + LOWER(food_type) = 'italian' +GROUP BY + 1 +ORDER BY + 2 DESC, + 1 DESC diff --git a/tests/test_sql_refsols/defog_restaurants_gen5_trino.sql b/tests/test_sql_refsols/defog_restaurants_gen5_trino.sql new file mode 100644 index 000000000..f0a1066a5 --- /dev/null +++ b/tests/test_sql_refsols/defog_restaurants_gen5_trino.sql @@ -0,0 +1,9 @@ +SELECT + city_name, + COUNT(*) AS num_restaurants +FROM main.location +GROUP BY + 1 +ORDER BY + 2 DESC, + 1 DESC diff --git a/tests/test_sql_refsols/defog_restaurants_gen6_trino.sql b/tests/test_sql_refsols/defog_restaurants_gen6_trino.sql new file mode 100644 index 000000000..508db2eec --- /dev/null +++ b/tests/test_sql_refsols/defog_restaurants_gen6_trino.sql @@ -0,0 +1,8 @@ +SELECT + street_name +FROM main.location +GROUP BY + 1 +ORDER BY + COUNT(*) DESC +LIMIT 1 diff --git a/tests/test_sql_refsols/defog_restaurants_gen7_trino.sql b/tests/test_sql_refsols/defog_restaurants_gen7_trino.sql new file mode 100644 index 000000000..009763d1e --- /dev/null +++ b/tests/test_sql_refsols/defog_restaurants_gen7_trino.sql @@ -0,0 +1,5 @@ +SELECT + name +FROM main.restaurant +WHERE + LOWER(city_name) = 'new york' OR LOWER(food_type) = 'italian' diff --git a/tests/test_sql_refsols/defog_restaurants_gen8_trino.sql b/tests/test_sql_refsols/defog_restaurants_gen8_trino.sql new file mode 100644 index 000000000..1dae46b96 --- /dev/null +++ b/tests/test_sql_refsols/defog_restaurants_gen8_trino.sql @@ -0,0 +1,33 @@ +WITH _s1 AS ( + SELECT + city_name, + region + FROM main.geographic +), _s6 AS ( + SELECT DISTINCT + _s1.region + FROM main.location AS location + LEFT JOIN _s1 AS _s1 + ON _s1.city_name = location.city_name +), _s7 AS ( + SELECT + _s3.region, + SUM(NOT restaurant.rating IS NULL) AS sum_expr, + SUM(restaurant.rating) AS sum_rating + FROM main.location AS location + LEFT JOIN _s1 AS _s3 + ON _s3.city_name = location.city_name + JOIN main.restaurant AS restaurant + ON location.restaurant_id = restaurant.id + GROUP BY + 1 +) +SELECT + _s6.region AS region_name, + CAST(_s7.sum_rating AS DOUBLE) / _s7.sum_expr AS avg_rating +FROM _s6 AS _s6 +JOIN _s7 AS _s7 + ON _s6.region = _s7.region +ORDER BY + 1 NULLS FIRST, + 2 DESC diff --git a/tests/test_sql_refsols/defog_restaurants_gen9_trino.sql b/tests/test_sql_refsols/defog_restaurants_gen9_trino.sql new file mode 100644 index 000000000..33f54de55 --- /dev/null +++ b/tests/test_sql_refsols/defog_restaurants_gen9_trino.sql @@ -0,0 +1,7 @@ +SELECT + name +FROM main.restaurant +ORDER BY + rating DESC, + 1 DESC +LIMIT 3 diff --git a/tests/test_sql_refsols/division_by_zero_database_trino.sql b/tests/test_sql_refsols/division_by_zero_database_trino.sql new file mode 100644 index 000000000..6c01e76c6 --- /dev/null +++ b/tests/test_sql_refsols/division_by_zero_database_trino.sql @@ -0,0 +1,6 @@ +SELECT + CAST(l_extendedprice AS DOUBLE) / l_discount AS computed_value +FROM tpch.lineitem +ORDER BY + 1 NULLS FIRST +LIMIT 1 diff --git a/tests/test_sql_refsols/division_by_zero_null_trino.sql b/tests/test_sql_refsols/division_by_zero_null_trino.sql new file mode 100644 index 000000000..bd411beb4 --- /dev/null +++ b/tests/test_sql_refsols/division_by_zero_null_trino.sql @@ -0,0 +1,6 @@ +SELECT + CAST(l_extendedprice AS DOUBLE) / NULLIF(l_discount, 0) AS computed_value +FROM tpch.lineitem +ORDER BY + 1 NULLS FIRST +LIMIT 1 diff --git a/tests/test_sql_refsols/division_by_zero_zero_trino.sql b/tests/test_sql_refsols/division_by_zero_zero_trino.sql new file mode 100644 index 000000000..a9e49779f --- /dev/null +++ b/tests/test_sql_refsols/division_by_zero_zero_trino.sql @@ -0,0 +1,6 @@ +SELECT + IF(l_discount = 0, 0, CAST(l_extendedprice AS DOUBLE) / l_discount) AS computed_value +FROM tpch.lineitem +ORDER BY + 1 NULLS FIRST +LIMIT 1 diff --git a/tests/test_sql_refsols/division_complex_operands_null_trino.sql b/tests/test_sql_refsols/division_complex_operands_null_trino.sql new file mode 100644 index 000000000..e7f8e4df8 --- /dev/null +++ b/tests/test_sql_refsols/division_complex_operands_null_trino.sql @@ -0,0 +1,10 @@ +SELECT + CAST(( + l_extendedprice + l_tax + ) AS DOUBLE) / CASE WHEN ( + l_discount * 2 + ) <> 0 THEN l_discount * 2 ELSE NULL END AS computed_value +FROM tpch.lineitem +ORDER BY + 1 NULLS FIRST +LIMIT 1 diff --git a/tests/test_sql_refsols/division_complex_operands_zero_trino.sql b/tests/test_sql_refsols/division_complex_operands_zero_trino.sql new file mode 100644 index 000000000..247d8c6e4 --- /dev/null +++ b/tests/test_sql_refsols/division_complex_operands_zero_trino.sql @@ -0,0 +1,16 @@ +SELECT + IF( + ( + l_discount * 2 + ) = 0, + 0, + CAST(( + l_extendedprice + l_tax + ) AS DOUBLE) / ( + l_discount * 2 + ) + ) AS computed_value +FROM tpch.lineitem +ORDER BY + 1 NULLS FIRST +LIMIT 1 diff --git a/tests/test_sql_refsols/division_iff_false_branch_null_trino.sql b/tests/test_sql_refsols/division_iff_false_branch_null_trino.sql new file mode 100644 index 000000000..77e375891 --- /dev/null +++ b/tests/test_sql_refsols/division_iff_false_branch_null_trino.sql @@ -0,0 +1,6 @@ +SELECT + CAST(l_extendedprice AS DOUBLE) / NULLIF(IF(l_discount > 0, 1, l_discount), 0) AS computed_value +FROM tpch.lineitem +ORDER BY + 1 NULLS FIRST +LIMIT 1 diff --git a/tests/test_sql_refsols/division_iff_false_branch_zero_trino.sql b/tests/test_sql_refsols/division_iff_false_branch_zero_trino.sql new file mode 100644 index 000000000..d8fad0aaa --- /dev/null +++ b/tests/test_sql_refsols/division_iff_false_branch_zero_trino.sql @@ -0,0 +1,10 @@ +SELECT + IF( + IF(l_discount > 0, 1, l_discount) = 0, + 0, + CAST(l_extendedprice AS DOUBLE) / IF(l_discount > 0, 1, l_discount) + ) AS computed_value +FROM tpch.lineitem +ORDER BY + 1 NULLS FIRST +LIMIT 1 diff --git a/tests/test_sql_refsols/division_iff_true_branch_null_trino.sql b/tests/test_sql_refsols/division_iff_true_branch_null_trino.sql new file mode 100644 index 000000000..eaf00fe91 --- /dev/null +++ b/tests/test_sql_refsols/division_iff_true_branch_null_trino.sql @@ -0,0 +1,6 @@ +SELECT + CAST(l_extendedprice AS DOUBLE) / NULLIF(IF(l_discount > 0, l_discount, 1), 0) AS computed_value +FROM tpch.lineitem +ORDER BY + 1 NULLS FIRST +LIMIT 1 diff --git a/tests/test_sql_refsols/division_iff_true_branch_zero_trino.sql b/tests/test_sql_refsols/division_iff_true_branch_zero_trino.sql new file mode 100644 index 000000000..7ce40af16 --- /dev/null +++ b/tests/test_sql_refsols/division_iff_true_branch_zero_trino.sql @@ -0,0 +1,10 @@ +SELECT + IF( + IF(l_discount > 0, l_discount, 1) = 0, + 0, + CAST(l_extendedprice AS DOUBLE) / IF(l_discount > 0, l_discount, 1) + ) AS computed_value +FROM tpch.lineitem +ORDER BY + 1 NULLS FIRST +LIMIT 1 diff --git a/tests/test_sql_refsols/division_keep_if_denom_null_trino.sql b/tests/test_sql_refsols/division_keep_if_denom_null_trino.sql new file mode 100644 index 000000000..1228ca574 --- /dev/null +++ b/tests/test_sql_refsols/division_keep_if_denom_null_trino.sql @@ -0,0 +1,6 @@ +SELECT + CAST(l_extendedprice AS DOUBLE) / NULLIF(CASE WHEN l_discount > 0.05 THEN l_discount ELSE NULL END, 0) AS computed_value +FROM tpch.lineitem +ORDER BY + 1 NULLS FIRST +LIMIT 1 diff --git a/tests/test_sql_refsols/division_keep_if_denom_zero_trino.sql b/tests/test_sql_refsols/division_keep_if_denom_zero_trino.sql new file mode 100644 index 000000000..eb45456aa --- /dev/null +++ b/tests/test_sql_refsols/division_keep_if_denom_zero_trino.sql @@ -0,0 +1,10 @@ +SELECT + IF( + CASE WHEN l_discount > 0.05 THEN l_discount ELSE NULL END = 0, + 0, + CAST(l_extendedprice AS DOUBLE) / CASE WHEN l_discount > 0.05 THEN l_discount ELSE NULL END + ) AS computed_value +FROM tpch.lineitem +ORDER BY + 1 NULLS FIRST +LIMIT 1 diff --git a/tests/test_sql_refsols/division_multiple_ops_null_trino.sql b/tests/test_sql_refsols/division_multiple_ops_null_trino.sql new file mode 100644 index 000000000..f103dffd0 --- /dev/null +++ b/tests/test_sql_refsols/division_multiple_ops_null_trino.sql @@ -0,0 +1,8 @@ +SELECT + l_quantity * ( + CAST(l_extendedprice AS DOUBLE) / NULLIF(l_discount, 0) + ) + l_tax AS computed_value +FROM tpch.lineitem +ORDER BY + 1 NULLS FIRST +LIMIT 1 diff --git a/tests/test_sql_refsols/division_multiple_ops_zero_trino.sql b/tests/test_sql_refsols/division_multiple_ops_zero_trino.sql new file mode 100644 index 000000000..758efd257 --- /dev/null +++ b/tests/test_sql_refsols/division_multiple_ops_zero_trino.sql @@ -0,0 +1,6 @@ +SELECT + l_quantity * IF(l_discount = 0, 0, CAST(l_extendedprice AS DOUBLE) / l_discount) + l_tax AS computed_value +FROM tpch.lineitem +ORDER BY + 1 NULLS FIRST +LIMIT 1 diff --git a/tests/test_sql_refsols/double_cross_trino.sql b/tests/test_sql_refsols/double_cross_trino.sql new file mode 100644 index 000000000..5be995538 --- /dev/null +++ b/tests/test_sql_refsols/double_cross_trino.sql @@ -0,0 +1,72 @@ +WITH _t3 AS ( + SELECT + o_orderdate + FROM tpch.orders +), _s0 AS ( + SELECT + MIN(o_orderdate) AS min_o_orderdate + FROM _t3 +), _s2 AS ( + SELECT + DATE_DIFF( + 'WEEK', + CAST(_s0.min_o_orderdate AS TIMESTAMP), + CAST(orders.o_orderdate AS TIMESTAMP) + ) AS ord_wk, + COUNT(*) AS n_rows + FROM _s0 AS _s0 + JOIN tpch.orders AS orders + ON DATE_DIFF( + 'WEEK', + CAST(_s0.min_o_orderdate AS TIMESTAMP), + CAST(orders.o_orderdate AS TIMESTAMP) + ) < 10 + AND orders.o_orderpriority = '1-URGENT' + AND orders.o_orderstatus = 'F' + GROUP BY + 1 +), _s3 AS ( + SELECT + MIN(o_orderdate) AS min_o_orderdate + FROM _t3 +), _t0 AS ( + SELECT + DATE_DIFF( + 'WEEK', + CAST(_s3.min_o_orderdate AS TIMESTAMP), + CAST(lineitem.l_receiptdate AS TIMESTAMP) + ) AS line_wk, + _s2.ord_wk, + ARBITRARY(_s2.n_rows) AS anything_n_rows, + COUNT(*) AS n_rows + FROM _s2 AS _s2 + CROSS JOIN _s3 AS _s3 + JOIN tpch.lineitem AS lineitem + ON DATE_DIFF( + 'WEEK', + CAST(_s3.min_o_orderdate AS TIMESTAMP), + CAST(lineitem.l_receiptdate AS TIMESTAMP) + ) < 10 + AND YEAR(CAST(lineitem.l_receiptdate AS TIMESTAMP)) = 1992 + AND _s2.ord_wk = DATE_DIFF( + 'WEEK', + CAST(_s3.min_o_orderdate AS TIMESTAMP), + CAST(lineitem.l_receiptdate AS TIMESTAMP) + ) + AND lineitem.l_returnflag = 'R' + AND lineitem.l_shipmode = 'RAIL' + GROUP BY + 1, + 2 +) +SELECT + ord_wk AS wk, + n_rows AS n_lines, + anything_n_rows AS n_orders, + ROUND( + CAST(SUM(n_rows) OVER (ORDER BY line_wk ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS DOUBLE) / SUM(anything_n_rows) OVER (ORDER BY ord_wk ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), + 4 + ) AS lpo +FROM _t0 +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/double_partition_trino.sql b/tests/test_sql_refsols/double_partition_trino.sql new file mode 100644 index 000000000..58235137d --- /dev/null +++ b/tests/test_sql_refsols/double_partition_trino.sql @@ -0,0 +1,15 @@ +WITH _t0 AS ( + SELECT + YEAR(CAST(o_orderdate AS TIMESTAMP)) AS year_o_orderdate, + COUNT(*) AS n_rows + FROM tpch.orders + GROUP BY + 1, + MONTH(CAST(o_orderdate AS TIMESTAMP)) +) +SELECT + year_o_orderdate AS year, + MAX(n_rows) AS best_month +FROM _t0 +GROUP BY + 1 diff --git a/tests/test_sql_refsols/dumb_aggregation_trino.sql b/tests/test_sql_refsols/dumb_aggregation_trino.sql new file mode 100644 index 000000000..a70bfcbe0 --- /dev/null +++ b/tests/test_sql_refsols/dumb_aggregation_trino.sql @@ -0,0 +1,46 @@ +WITH _s0 AS ( + SELECT + n_name, + n_regionkey + FROM tpch.nation + ORDER BY + 1 NULLS FIRST + LIMIT 2 +), _s1 AS ( + SELECT + r_name, + r_regionkey, + CASE + WHEN ABS( + ( + ROW_NUMBER() OVER (PARTITION BY r_regionkey ORDER BY r_regionkey DESC) - 1.0 + ) - ( + CAST(( + COUNT(r_regionkey) OVER (PARTITION BY r_regionkey) - 1.0 + ) AS DOUBLE) / 2.0 + ) + ) < 1.0 + THEN r_regionkey + ELSE NULL + END AS avg_expr + FROM tpch.region +) +SELECT + _s0.n_name AS nation_name, + _s1.r_name AS a1, + _s1.r_name AS a2, + _s1.r_regionkey AS a3, + IF( + NOT CASE WHEN _s1.r_name <> 'AMERICA' THEN _s1.r_regionkey ELSE NULL END IS NULL, + 1, + 0 + ) AS a4, + 1 AS a5, + _s1.r_regionkey AS a6, + _s1.r_name AS a7, + _s1.avg_expr AS a8 +FROM _s0 AS _s0 +JOIN _s1 AS _s1 + ON _s0.n_regionkey = _s1.r_regionkey +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/epoch_culture_events_info_trino.sql b/tests/test_sql_refsols/epoch_culture_events_info_trino.sql new file mode 100644 index 000000000..7db29730d --- /dev/null +++ b/tests/test_sql_refsols/epoch_culture_events_info_trino.sql @@ -0,0 +1,32 @@ +WITH _s2 AS ( + SELECT + ev_dt, + ev_key + FROM events +) +SELECT + events.ev_name AS event_name, + eras.er_name AS era_name, + YEAR(CAST(events.ev_dt AS TIMESTAMP)) AS event_year, + seasons.s_name AS season_name, + times.t_name AS tod +FROM events AS events +JOIN eras AS eras + ON eras.er_end_year > YEAR(CAST(events.ev_dt AS TIMESTAMP)) + AND eras.er_start_year <= YEAR(CAST(events.ev_dt AS TIMESTAMP)) +JOIN _s2 AS _s2 + ON _s2.ev_key = events.ev_key +JOIN seasons AS seasons + ON seasons.s_month1 = MONTH(CAST(_s2.ev_dt AS TIMESTAMP)) + OR seasons.s_month2 = MONTH(CAST(_s2.ev_dt AS TIMESTAMP)) + OR seasons.s_month3 = MONTH(CAST(_s2.ev_dt AS TIMESTAMP)) +JOIN _s2 AS _s6 + ON _s6.ev_key = events.ev_key +JOIN times AS times + ON times.t_end_hour > HOUR(CAST(_s6.ev_dt AS TIMESTAMP)) + AND times.t_start_hour <= HOUR(CAST(_s6.ev_dt AS TIMESTAMP)) +WHERE + events.ev_typ = 'culture' +ORDER BY + events.ev_dt NULLS FIRST +LIMIT 6 diff --git a/tests/test_sql_refsols/epoch_event_gap_per_era_trino.sql b/tests/test_sql_refsols/epoch_event_gap_per_era_trino.sql new file mode 100644 index 000000000..a556e35fe --- /dev/null +++ b/tests/test_sql_refsols/epoch_event_gap_per_era_trino.sql @@ -0,0 +1,27 @@ +WITH _t2 AS ( + SELECT + eras.er_end_year, + eras.er_name, + eras.er_start_year, + events.ev_dt, + DATE_DIFF( + 'DAY', + CAST(LAG(events.ev_dt, 1) OVER (PARTITION BY eras.er_name, eras.er_name ORDER BY events.ev_dt) AS TIMESTAMP), + CAST(events.ev_dt AS TIMESTAMP) + ) AS day_gap + FROM eras AS eras + JOIN events AS events + ON eras.er_end_year > YEAR(CAST(events.ev_dt AS TIMESTAMP)) + AND eras.er_start_year <= YEAR(CAST(events.ev_dt AS TIMESTAMP)) +) +SELECT + er_name AS era_name, + AVG(day_gap) AS avg_event_gap +FROM _t2 +WHERE + er_end_year > YEAR(CAST(ev_dt AS TIMESTAMP)) + AND er_start_year <= YEAR(CAST(ev_dt AS TIMESTAMP)) +GROUP BY + 1 +ORDER BY + ARBITRARY(er_start_year) NULLS FIRST diff --git a/tests/test_sql_refsols/epoch_events_per_season_trino.sql b/tests/test_sql_refsols/epoch_events_per_season_trino.sql new file mode 100644 index 000000000..102cd9a08 --- /dev/null +++ b/tests/test_sql_refsols/epoch_events_per_season_trino.sql @@ -0,0 +1,13 @@ +SELECT + seasons.s_name AS season_name, + COUNT(*) AS n_events +FROM seasons AS seasons +JOIN events AS events + ON seasons.s_month1 = MONTH(CAST(events.ev_dt AS TIMESTAMP)) + OR seasons.s_month2 = MONTH(CAST(events.ev_dt AS TIMESTAMP)) + OR seasons.s_month3 = MONTH(CAST(events.ev_dt AS TIMESTAMP)) +GROUP BY + 1 +ORDER BY + 2 DESC, + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/epoch_first_event_per_era_trino.sql b/tests/test_sql_refsols/epoch_first_event_per_era_trino.sql new file mode 100644 index 000000000..2a64d4e66 --- /dev/null +++ b/tests/test_sql_refsols/epoch_first_event_per_era_trino.sql @@ -0,0 +1,19 @@ +WITH _t AS ( + SELECT + eras.er_name, + eras.er_start_year, + events.ev_name, + ROW_NUMBER() OVER (PARTITION BY eras.er_name ORDER BY events.ev_dt) AS _w + FROM eras AS eras + JOIN events AS events + ON eras.er_end_year > YEAR(CAST(events.ev_dt AS TIMESTAMP)) + AND eras.er_start_year <= YEAR(CAST(events.ev_dt AS TIMESTAMP)) +) +SELECT + er_name AS era_name, + ev_name AS event_name +FROM _t +WHERE + _w = 1 +ORDER BY + er_start_year NULLS FIRST diff --git a/tests/test_sql_refsols/epoch_intra_season_searches_trino.sql b/tests/test_sql_refsols/epoch_intra_season_searches_trino.sql new file mode 100644 index 000000000..d88996fbf --- /dev/null +++ b/tests/test_sql_refsols/epoch_intra_season_searches_trino.sql @@ -0,0 +1,86 @@ +WITH _s0 AS ( + SELECT + s_month1, + s_month2, + s_month3, + s_name + FROM seasons +), _s5 AS ( + SELECT + ev_dt, + ev_name + FROM events +), _s9 AS ( + SELECT + _s2.s_name, + searches.search_id + FROM _s0 AS _s2 + JOIN searches AS searches + ON _s2.s_month1 = MONTH(CAST(searches.search_ts AS TIMESTAMP)) + OR _s2.s_month2 = MONTH(CAST(searches.search_ts AS TIMESTAMP)) + OR _s2.s_month3 = MONTH(CAST(searches.search_ts AS TIMESTAMP)) + JOIN _s5 AS _s5 + ON LOWER(searches.search_string) LIKE CONCAT('%', LOWER(_s5.ev_name), '%') + JOIN _s0 AS _s7 + ON _s2.s_name = _s7.s_name + AND ( + _s7.s_month1 = MONTH(CAST(_s5.ev_dt AS TIMESTAMP)) + OR _s7.s_month2 = MONTH(CAST(_s5.ev_dt AS TIMESTAMP)) + OR _s7.s_month3 = MONTH(CAST(_s5.ev_dt AS TIMESTAMP)) + ) +), _t1 AS ( + SELECT + _s0.s_name, + COUNT(_s9.search_id) AS count_search_id + FROM _s0 AS _s0 + JOIN searches AS searches + ON _s0.s_month1 = MONTH(CAST(searches.search_ts AS TIMESTAMP)) + OR _s0.s_month2 = MONTH(CAST(searches.search_ts AS TIMESTAMP)) + OR _s0.s_month3 = MONTH(CAST(searches.search_ts AS TIMESTAMP)) + LEFT JOIN _s9 AS _s9 + ON _s0.s_name = _s9.s_name AND _s9.search_id = searches.search_id + GROUP BY + searches.search_id, + 1 +), _s16 AS ( + SELECT + s_name, + COUNT(*) AS n_rows, + SUM(( + NOT NULLIF(count_search_id, 0) IS NULL AND NULLIF(count_search_id, 0) > 0 + )) AS sum_is_intra_season + FROM _t1 + GROUP BY + 1 +), _s17 AS ( + SELECT + _s10.s_name, + COUNT(*) AS n_rows, + SUM(_s15.s_name = _s10.s_name) AS sum_is_intra_season + FROM _s0 AS _s10 + JOIN _s5 AS _s11 + ON _s10.s_month1 = MONTH(CAST(_s11.ev_dt AS TIMESTAMP)) + OR _s10.s_month2 = MONTH(CAST(_s11.ev_dt AS TIMESTAMP)) + OR _s10.s_month3 = MONTH(CAST(_s11.ev_dt AS TIMESTAMP)) + JOIN searches AS searches + ON LOWER(searches.search_string) LIKE CONCAT('%', LOWER(_s11.ev_name), '%') + JOIN _s0 AS _s15 + ON _s15.s_month1 = MONTH(CAST(searches.search_ts AS TIMESTAMP)) + OR _s15.s_month2 = MONTH(CAST(searches.search_ts AS TIMESTAMP)) + OR _s15.s_month3 = MONTH(CAST(searches.search_ts AS TIMESTAMP)) + GROUP BY + 1 +) +SELECT + _s16.s_name AS season_name, + ROUND(( + 100.0 * COALESCE(_s16.sum_is_intra_season, 0) + ) / _s16.n_rows, 2) AS pct_season_searches, + ROUND(( + 100.0 * COALESCE(_s17.sum_is_intra_season, 0) + ) / COALESCE(_s17.n_rows, 0), 2) AS pct_event_searches +FROM _s16 AS _s16 +LEFT JOIN _s17 AS _s17 + ON _s16.s_name = _s17.s_name +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/epoch_most_popular_search_engine_per_tod_trino.sql b/tests/test_sql_refsols/epoch_most_popular_search_engine_per_tod_trino.sql new file mode 100644 index 000000000..9046a34ee --- /dev/null +++ b/tests/test_sql_refsols/epoch_most_popular_search_engine_per_tod_trino.sql @@ -0,0 +1,29 @@ +WITH _t2 AS ( + SELECT + searches.search_engine, + times.t_name, + COUNT(*) AS n_rows + FROM times AS times + JOIN searches AS searches + ON times.t_end_hour > HOUR(CAST(searches.search_ts AS TIMESTAMP)) + AND times.t_start_hour <= HOUR(CAST(searches.search_ts AS TIMESTAMP)) + GROUP BY + 1, + 2 +), _t AS ( + SELECT + search_engine, + t_name, + n_rows, + ROW_NUMBER() OVER (PARTITION BY t_name ORDER BY n_rows DESC NULLS FIRST, search_engine) AS _w + FROM _t2 +) +SELECT + t_name AS tod, + search_engine, + n_rows AS n_searches +FROM _t +WHERE + _w = 1 +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/epoch_most_popular_topic_per_region_trino.sql b/tests/test_sql_refsols/epoch_most_popular_topic_per_region_trino.sql new file mode 100644 index 000000000..7dd64b819 --- /dev/null +++ b/tests/test_sql_refsols/epoch_most_popular_topic_per_region_trino.sql @@ -0,0 +1,28 @@ +WITH _t1 AS ( + SELECT + events.ev_typ, + users.user_region, + COUNT(DISTINCT searches.search_id) AS ndistinct_search_id + FROM events AS events + JOIN searches AS searches + ON LOWER(searches.search_string) LIKE CONCAT('%', LOWER(events.ev_name), '%') + JOIN users AS users + ON searches.search_user_id = users.user_id + GROUP BY + 1, + 2 +), _t AS ( + SELECT + ev_typ, + user_region, + ndistinct_search_id, + ROW_NUMBER() OVER (PARTITION BY user_region ORDER BY ndistinct_search_id DESC NULLS FIRST) AS _w + FROM _t1 +) +SELECT + user_region AS region, + ev_typ AS event_type, + ndistinct_search_id AS n_searches +FROM _t +WHERE + _w = 1 diff --git a/tests/test_sql_refsols/epoch_num_predawn_cold_war_trino.sql b/tests/test_sql_refsols/epoch_num_predawn_cold_war_trino.sql new file mode 100644 index 000000000..5aae6c921 --- /dev/null +++ b/tests/test_sql_refsols/epoch_num_predawn_cold_war_trino.sql @@ -0,0 +1,27 @@ +WITH _s0 AS ( + SELECT + ev_dt, + ev_key + FROM events +), _u_0 AS ( + SELECT + _s2.ev_key AS _u_1 + FROM _s0 AS _s2 + JOIN eras AS eras + ON eras.er_end_year > YEAR(CAST(_s2.ev_dt AS TIMESTAMP)) + AND eras.er_name = 'Cold War' + AND eras.er_start_year <= YEAR(CAST(_s2.ev_dt AS TIMESTAMP)) + GROUP BY + 1 +) +SELECT + COUNT(DISTINCT _s0.ev_key) AS n_events +FROM _s0 AS _s0 +JOIN times AS times + ON times.t_end_hour > HOUR(CAST(_s0.ev_dt AS TIMESTAMP)) + AND times.t_name = 'Pre-Dawn' + AND times.t_start_hour <= HOUR(CAST(_s0.ev_dt AS TIMESTAMP)) +LEFT JOIN _u_0 AS _u_0 + ON _s0.ev_key = _u_0._u_1 +WHERE + NOT _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/epoch_overlapping_event_search_other_users_per_user_trino.sql b/tests/test_sql_refsols/epoch_overlapping_event_search_other_users_per_user_trino.sql new file mode 100644 index 000000000..6ebcb2fc9 --- /dev/null +++ b/tests/test_sql_refsols/epoch_overlapping_event_search_other_users_per_user_trino.sql @@ -0,0 +1,29 @@ +WITH _s0 AS ( + SELECT + user_id, + user_name + FROM users +), _s1 AS ( + SELECT + search_string, + search_user_id + FROM searches +) +SELECT + ARBITRARY(_s0.user_name) AS user_name, + COUNT(DISTINCT _s7.user_id) AS n_other_users +FROM _s0 AS _s0 +JOIN _s1 AS _s1 + ON _s0.user_id = _s1.search_user_id +JOIN events AS events + ON LOWER(_s1.search_string) LIKE CONCAT('%', LOWER(events.ev_name), '%') +JOIN _s1 AS _s5 + ON LOWER(_s5.search_string) LIKE CONCAT('%', LOWER(events.ev_name), '%') +JOIN _s0 AS _s7 + ON _s0.user_name <> _s7.user_name AND _s5.search_user_id = _s7.user_id +GROUP BY + _s0.user_id +ORDER BY + 2 DESC, + 1 NULLS FIRST +LIMIT 7 diff --git a/tests/test_sql_refsols/epoch_overlapping_event_searches_per_user_trino.sql b/tests/test_sql_refsols/epoch_overlapping_event_searches_per_user_trino.sql new file mode 100644 index 000000000..2037c052f --- /dev/null +++ b/tests/test_sql_refsols/epoch_overlapping_event_searches_per_user_trino.sql @@ -0,0 +1,35 @@ +WITH _s0 AS ( + SELECT + user_id, + user_name + FROM users +), _t2 AS ( + SELECT + _s0.user_id, + ARBITRARY(searches.search_user_id) AS anything_search_user_id, + ARBITRARY(_s0.user_name) AS anything_user_name + FROM _s0 AS _s0 + JOIN searches AS searches + ON _s0.user_id = searches.search_user_id + JOIN events AS events + ON LOWER(searches.search_string) LIKE CONCAT('%', LOWER(events.ev_name), '%') + JOIN searches AS searches_2 + ON LOWER(searches_2.search_string) LIKE CONCAT('%', LOWER(events.ev_name), '%') + JOIN _s0 AS _s7 + ON _s0.user_name <> _s7.user_name AND _s7.user_id = searches_2.search_user_id + GROUP BY + searches.search_id, + 1 +) +SELECT + ARBITRARY(anything_user_name) AS user_name, + COUNT(*) AS n_searches +FROM _t2 +WHERE + anything_search_user_id = user_id +GROUP BY + user_id +ORDER BY + 2 DESC, + 1 NULLS FIRST +LIMIT 4 diff --git a/tests/test_sql_refsols/epoch_pct_searches_per_tod_trino.sql b/tests/test_sql_refsols/epoch_pct_searches_per_tod_trino.sql new file mode 100644 index 000000000..5ee1ca094 --- /dev/null +++ b/tests/test_sql_refsols/epoch_pct_searches_per_tod_trino.sql @@ -0,0 +1,20 @@ +WITH _t0 AS ( + SELECT + times.t_name, + ARBITRARY(times.t_start_hour) AS anything_t_start_hour, + COUNT(*) AS n_rows + FROM times AS times + JOIN searches AS searches + ON times.t_end_hour > HOUR(CAST(searches.search_ts AS TIMESTAMP)) + AND times.t_start_hour <= HOUR(CAST(searches.search_ts AS TIMESTAMP)) + GROUP BY + 1 +) +SELECT + t_name AS tod, + ROUND(( + 100.0 * n_rows + ) / SUM(n_rows) OVER (), 2) AS pct_searches +FROM _t0 +ORDER BY + anything_t_start_hour NULLS FIRST diff --git a/tests/test_sql_refsols/epoch_search_results_by_tod_trino.sql b/tests/test_sql_refsols/epoch_search_results_by_tod_trino.sql new file mode 100644 index 000000000..efd1b8a68 --- /dev/null +++ b/tests/test_sql_refsols/epoch_search_results_by_tod_trino.sql @@ -0,0 +1,22 @@ +WITH _t0 AS ( + SELECT + times.t_name, + ARBITRARY(times.t_start_hour) AS anything_t_start_hour, + AVG(searches.search_num_results) AS avg_search_num_results, + COUNT(*) AS n_rows + FROM times AS times + JOIN searches AS searches + ON times.t_end_hour > HOUR(CAST(searches.search_ts AS TIMESTAMP)) + AND times.t_start_hour <= HOUR(CAST(searches.search_ts AS TIMESTAMP)) + GROUP BY + 1 +) +SELECT + t_name AS tod, + ROUND(( + 100.0 * n_rows + ) / SUM(n_rows) OVER (), 2) AS pct_searches, + ROUND(avg_search_num_results, 2) AS avg_results +FROM _t0 +ORDER BY + anything_t_start_hour NULLS FIRST diff --git a/tests/test_sql_refsols/epoch_summer_events_per_type_trino.sql b/tests/test_sql_refsols/epoch_summer_events_per_type_trino.sql new file mode 100644 index 000000000..b7091de07 --- /dev/null +++ b/tests/test_sql_refsols/epoch_summer_events_per_type_trino.sql @@ -0,0 +1,15 @@ +SELECT + events.ev_typ AS event_type, + COUNT(*) AS n_events +FROM events AS events +JOIN seasons AS seasons + ON ( + seasons.s_month1 = MONTH(CAST(events.ev_dt AS TIMESTAMP)) + OR seasons.s_month2 = MONTH(CAST(events.ev_dt AS TIMESTAMP)) + OR seasons.s_month3 = MONTH(CAST(events.ev_dt AS TIMESTAMP)) + ) + AND seasons.s_name = 'Summer' +GROUP BY + 1 +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/epoch_unique_users_per_engine_trino.sql b/tests/test_sql_refsols/epoch_unique_users_per_engine_trino.sql new file mode 100644 index 000000000..b9bcb8e5b --- /dev/null +++ b/tests/test_sql_refsols/epoch_unique_users_per_engine_trino.sql @@ -0,0 +1,23 @@ +WITH _s0 AS ( + SELECT DISTINCT + search_engine + FROM searches +), _s1 AS ( + SELECT + search_engine, + COUNT(DISTINCT search_user_id) AS ndistinct_search_user_id + FROM searches + WHERE + YEAR(CAST(search_ts AS TIMESTAMP)) <= 2019 + AND YEAR(CAST(search_ts AS TIMESTAMP)) >= 2010 + GROUP BY + 1 +) +SELECT + _s0.search_engine AS engine, + COALESCE(_s1.ndistinct_search_user_id, 0) AS n_users +FROM _s0 AS _s0 +LEFT JOIN _s1 AS _s1 + ON _s0.search_engine = _s1.search_engine +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/epoch_users_most_cold_war_searches_trino.sql b/tests/test_sql_refsols/epoch_users_most_cold_war_searches_trino.sql new file mode 100644 index 000000000..3d2383c2b --- /dev/null +++ b/tests/test_sql_refsols/epoch_users_most_cold_war_searches_trino.sql @@ -0,0 +1,30 @@ +WITH _t1 AS ( + SELECT + ARBITRARY(searches.search_user_id) AS anything_search_user_id + FROM searches AS searches + JOIN events AS events + ON LOWER(searches.search_string) LIKE CONCAT('%', LOWER(events.ev_name), '%') + JOIN eras AS eras + ON eras.er_end_year > YEAR(CAST(events.ev_dt AS TIMESTAMP)) + AND eras.er_name = 'Cold War' + AND eras.er_start_year <= YEAR(CAST(events.ev_dt AS TIMESTAMP)) + GROUP BY + searches.search_id +), _s5 AS ( + SELECT + anything_search_user_id, + COUNT(*) AS n_rows + FROM _t1 + GROUP BY + 1 +) +SELECT + users.user_name, + _s5.n_rows AS n_cold_war_searches +FROM users AS users +JOIN _s5 AS _s5 + ON _s5.anything_search_user_id = users.user_id +ORDER BY + 2 DESC, + 1 NULLS FIRST +LIMIT 3 diff --git a/tests/test_sql_refsols/extract_colors_trino.sql b/tests/test_sql_refsols/extract_colors_trino.sql new file mode 100644 index 000000000..07a4abd25 --- /dev/null +++ b/tests/test_sql_refsols/extract_colors_trino.sql @@ -0,0 +1,12 @@ +SELECT + p_partkey AS key, + UPPER(SPLIT_PART(p_name, ' ', 1)) AS c1, + UPPER(SPLIT_PART(p_name, ' ', 2)) AS c2, + UPPER(SPLIT_PART(p_name, ' ', 3)) AS c3, + UPPER(SPLIT_PART(p_name, ' ', 4)) AS c4, + UPPER(SPLIT_PART(p_name, ' ', 5)) AS c5, + UPPER(SPLIT_PART(p_name, ' ', 6)) AS c6 +FROM tpch.part +ORDER BY + 1 NULLS FIRST +LIMIT 5 diff --git a/tests/test_sql_refsols/first_order_in_year_trino.sql b/tests/test_sql_refsols/first_order_in_year_trino.sql new file mode 100644 index 000000000..b82b013c2 --- /dev/null +++ b/tests/test_sql_refsols/first_order_in_year_trino.sql @@ -0,0 +1,19 @@ +WITH _t AS ( + SELECT + o_orderdate, + o_orderkey, + LAG(o_orderdate, 1) OVER (ORDER BY o_orderdate, o_orderkey) AS _w, + LAG(o_orderdate, 1) OVER (ORDER BY o_orderdate, o_orderkey) AS _w_2 + FROM tpch.orders + WHERE + MONTH(CAST(o_orderdate AS TIMESTAMP)) = 1 +) +SELECT + o_orderdate AS order_date, + o_orderkey AS key +FROM _t +WHERE + YEAR(CAST(_w_2 AS TIMESTAMP)) <> YEAR(CAST(o_orderdate AS TIMESTAMP)) + OR _w IS NULL +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/first_order_per_customer_trino.sql b/tests/test_sql_refsols/first_order_per_customer_trino.sql new file mode 100644 index 000000000..1adc4d43e --- /dev/null +++ b/tests/test_sql_refsols/first_order_per_customer_trino.sql @@ -0,0 +1,20 @@ +WITH _t AS ( + SELECT + o_custkey, + o_orderdate, + o_totalprice, + ROW_NUMBER() OVER (PARTITION BY o_custkey ORDER BY o_orderdate, o_orderkey) AS _w + FROM tpch.orders +) +SELECT + customer.c_name AS name, + _t.o_orderdate AS first_order_date, + _t.o_totalprice AS first_order_price +FROM tpch.customer AS customer +JOIN _t AS _t + ON _t._w = 1 AND _t.o_custkey = customer.c_custkey +WHERE + customer.c_acctbal >= 9000.0 +ORDER BY + 3 DESC +LIMIT 5 diff --git a/tests/test_sql_refsols/floor_and_ceil_2_trino.sql b/tests/test_sql_refsols/floor_and_ceil_2_trino.sql new file mode 100644 index 000000000..2d60308c2 --- /dev/null +++ b/tests/test_sql_refsols/floor_and_ceil_2_trino.sql @@ -0,0 +1,9 @@ +SELECT + ps_suppkey AS supplier_key, + ps_partkey AS part_key, + FLOOR(ps_availqty) AS complete_parts, + CEIL(ps_supplycost * FLOOR(ps_availqty)) AS total_cost +FROM tpch.partsupp +ORDER BY + 4 DESC +LIMIT 10 diff --git a/tests/test_sql_refsols/floor_and_ceil_trino.sql b/tests/test_sql_refsols/floor_and_ceil_trino.sql new file mode 100644 index 000000000..38b321fcd --- /dev/null +++ b/tests/test_sql_refsols/floor_and_ceil_trino.sql @@ -0,0 +1,11 @@ +SELECT + FLOOR(5.6) AS floor_frac, + CEIL(5.4) AS ceil_frac, + FLOOR(-5.4) AS floor_frac_neg, + CEIL(-5.6) AS ceil_frac_neg, + FLOOR(6) AS floor_int, + CEIL(6) AS ceil_int, + FLOOR(-6) AS floor_int_neg, + CEIL(-6) AS ceil_int_neg +FROM (VALUES + (NULL)) AS _q_0(_col_0) diff --git a/tests/test_sql_refsols/function_sampler_trino.sql b/tests/test_sql_refsols/function_sampler_trino.sql new file mode 100644 index 000000000..19d0d6601 --- /dev/null +++ b/tests/test_sql_refsols/function_sampler_trino.sql @@ -0,0 +1,17 @@ +SELECT + CONCAT_WS('-', region.r_name, nation.n_name, SUBSTRING(customer.c_name, 17)) AS a, + ROUND(customer.c_acctbal, 1) AS b, + CASE WHEN SUBSTRING(customer.c_phone, 1, 1) = '3' THEN customer.c_name ELSE NULL END AS c, + NOT CASE WHEN SUBSTRING(customer.c_phone, 2, 1) = '1' THEN customer.c_name ELSE NULL END IS NULL AS d, + CASE WHEN SUBSTRING(customer.c_phone, 15) = '7' THEN customer.c_name ELSE NULL END IS NULL AS e, + ROUND(customer.c_acctbal, 0) AS f +FROM tpch.region AS region +JOIN tpch.nation AS nation + ON nation.n_regionkey = region.r_regionkey +JOIN tpch.customer AS customer + ON customer.c_acctbal <= 100.0 + AND customer.c_acctbal >= 0.0 + AND customer.c_nationkey = nation.n_nationkey +ORDER BY + customer.c_address NULLS FIRST +LIMIT 10 diff --git a/tests/test_sql_refsols/get_part_multiple_trino.sql b/tests/test_sql_refsols/get_part_multiple_trino.sql new file mode 100644 index 000000000..e52d4abc3 --- /dev/null +++ b/tests/test_sql_refsols/get_part_multiple_trino.sql @@ -0,0 +1,25 @@ +SELECT + CAST(SUBSTRING(sbcustid, 2) AS BIGINT) AS _expr0, + SPLIT_PART(sbcustname, ' ', CAST(SUBSTRING(sbcustid, 2) AS BIGINT)) AS p1, + SPLIT_PART(sbcustname, ' ', 0 - CAST(SUBSTRING(sbcustid, 2) AS BIGINT)) AS p2, + SPLIT_PART(sbcustemail, '.', CAST(SUBSTRING(sbcustid, 2) AS BIGINT)) AS p3, + SPLIT_PART(sbcustemail, '.', 0 - CAST(SUBSTRING(sbcustid, 2) AS BIGINT)) AS p4, + SPLIT_PART(sbcustphone, '-', CAST(SUBSTRING(sbcustid, 2) AS BIGINT)) AS p5, + SPLIT_PART(sbcustphone, '-', 0 - CAST(SUBSTRING(sbcustid, 2) AS BIGINT)) AS p6, + SPLIT_PART(sbcustpostalcode, '00', CAST(SUBSTRING(sbcustid, 2) AS BIGINT)) AS p7, + SPLIT_PART(sbcustpostalcode, '00', 0 - CAST(SUBSTRING(sbcustid, 2) AS BIGINT)) AS p8, + SPLIT_PART(sbcustname, '!', CAST(SUBSTRING(sbcustid, 2) AS BIGINT)) AS p9, + SPLIT_PART(sbcustname, '@', 0 - CAST(SUBSTRING(sbcustid, 2) AS BIGINT)) AS p10, + SPLIT_PART(sbcustname, 'aa', CAST(SUBSTRING(sbcustid, 2) AS BIGINT)) AS p11, + SPLIT_PART(sbcustname, '#$*', 0 - CAST(SUBSTRING(sbcustid, 2) AS BIGINT)) AS p12, + SPLIT_PART(sbcustname, '', CAST(SUBSTRING(sbcustid, 2) AS BIGINT)) AS p13, + SPLIT_PART('', ' ', CAST(SUBSTRING(sbcustid, 2) AS BIGINT)) AS p14, + SPLIT_PART(sbcustname, ' ', 0) AS p15, + SPLIT_PART(sbcuststate, sbcuststate, CAST(SUBSTRING(sbcustid, 2) AS BIGINT)) AS p16, + SPLIT_PART(SPLIT_PART(sbcustphone, '-', 1), '5', CAST(SUBSTRING(sbcustid, 2) AS BIGINT)) AS p17, + SPLIT_PART(sbcustpostalcode, '0', CAST(SUBSTRING(sbcustid, 2) AS BIGINT)) AS p18 +FROM main.sbcustomer +WHERE + CAST(SUBSTRING(sbcustid, 2) AS BIGINT) <= 4 +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/get_part_single_trino.sql b/tests/test_sql_refsols/get_part_single_trino.sql new file mode 100644 index 000000000..a9694cfb1 --- /dev/null +++ b/tests/test_sql_refsols/get_part_single_trino.sql @@ -0,0 +1,5 @@ +SELECT + SPLIT_PART(sbcustname, ' ', -1) AS last_name +FROM main.sbcustomer +WHERE + sbcustname = 'Alex Rodriguez' diff --git a/tests/test_sql_refsols/global_acctbal_breakdown_trino.sql b/tests/test_sql_refsols/global_acctbal_breakdown_trino.sql new file mode 100644 index 000000000..a3506e8ee --- /dev/null +++ b/tests/test_sql_refsols/global_acctbal_breakdown_trino.sql @@ -0,0 +1,51 @@ +WITH _t0 AS ( + SELECT + c_acctbal, + CASE + WHEN ABS( + ( + ROW_NUMBER() OVER (ORDER BY CASE WHEN c_acctbal >= 0 THEN c_acctbal ELSE NULL END DESC) - 1.0 + ) - ( + CAST(( + COUNT(CASE WHEN c_acctbal >= 0 THEN c_acctbal ELSE NULL END) OVER () - 1.0 + ) AS DOUBLE) / 2.0 + ) + ) < 1.0 + THEN CASE WHEN c_acctbal >= 0 THEN c_acctbal ELSE NULL END + ELSE NULL + END AS expr_5, + CASE + WHEN ABS( + ( + ROW_NUMBER() OVER (ORDER BY c_acctbal DESC) - 1.0 + ) - ( + CAST(( + COUNT(c_acctbal) OVER () - 1.0 + ) AS DOUBLE) / 2.0 + ) + ) < 1.0 + THEN c_acctbal + ELSE NULL + END AS expr_6, + CASE + WHEN ABS( + ( + ROW_NUMBER() OVER (ORDER BY CASE WHEN c_acctbal < 0 THEN c_acctbal ELSE NULL END DESC) - 1.0 + ) - ( + CAST(( + COUNT(CASE WHEN c_acctbal < 0 THEN c_acctbal ELSE NULL END) OVER () - 1.0 + ) AS DOUBLE) / 2.0 + ) + ) < 1.0 + THEN CASE WHEN c_acctbal < 0 THEN c_acctbal ELSE NULL END + ELSE NULL + END AS expr_7 + FROM tpch.customer +) +SELECT + COUNT(CASE WHEN c_acctbal < 0 THEN c_acctbal ELSE NULL END) AS n_red_acctbal, + COUNT(CASE WHEN c_acctbal >= 0 THEN c_acctbal ELSE NULL END) AS n_black_acctbal, + AVG(expr_7) AS median_red_acctbal, + AVG(expr_5) AS median_black_acctbal, + AVG(expr_6) AS median_overall_acctbal +FROM _t0 diff --git a/tests/test_sql_refsols/has_cross_correlated_singular_trino.sql b/tests/test_sql_refsols/has_cross_correlated_singular_trino.sql new file mode 100644 index 000000000..afc70b571 --- /dev/null +++ b/tests/test_sql_refsols/has_cross_correlated_singular_trino.sql @@ -0,0 +1,14 @@ +WITH _u_0 AS ( + SELECT + n_nationkey AS _u_1 + FROM tpch.nation + GROUP BY + 1 +) +SELECT + COUNT(*) AS n +FROM tpch.customer AS customer +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = customer.c_nationkey +WHERE + NOT _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/has_cross_correlated_trino.sql b/tests/test_sql_refsols/has_cross_correlated_trino.sql new file mode 100644 index 000000000..1f66f7742 --- /dev/null +++ b/tests/test_sql_refsols/has_cross_correlated_trino.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(DISTINCT customer.c_custkey) AS n +FROM tpch.customer AS customer +JOIN tpch.supplier AS supplier + ON customer.c_nationkey = supplier.s_nationkey diff --git a/tests/test_sql_refsols/has_equals_one_trino.sql b/tests/test_sql_refsols/has_equals_one_trino.sql new file mode 100644 index 000000000..756bdfe74 --- /dev/null +++ b/tests/test_sql_refsols/has_equals_one_trino.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(DISTINCT o_custkey) AS n +FROM tpch.orders +WHERE + o_orderpriority = '1-URGENT' diff --git a/tests/test_sql_refsols/hasnot_equals_one_trino.sql b/tests/test_sql_refsols/hasnot_equals_one_trino.sql new file mode 100644 index 000000000..f44d0fc02 --- /dev/null +++ b/tests/test_sql_refsols/hasnot_equals_one_trino.sql @@ -0,0 +1,17 @@ +WITH _s1 AS ( + SELECT + o_custkey, + COUNT(*) AS n_rows + FROM tpch.orders + WHERE + o_orderpriority = '1-URGENT' + GROUP BY + 1 +) +SELECT + COUNT(*) AS n +FROM tpch.customer AS customer +LEFT JOIN _s1 AS _s1 + ON _s1.o_custkey = customer.c_custkey +WHERE + _s1.n_rows = 0 OR _s1.n_rows IS NULL diff --git a/tests/test_sql_refsols/highest_priority_per_year_trino.sql b/tests/test_sql_refsols/highest_priority_per_year_trino.sql new file mode 100644 index 000000000..8d68322a0 --- /dev/null +++ b/tests/test_sql_refsols/highest_priority_per_year_trino.sql @@ -0,0 +1,34 @@ +WITH _t3 AS ( + SELECT + YEAR(CAST(o_orderdate AS TIMESTAMP)) AS year_o_orderdate, + o_orderpriority, + COUNT(*) AS n_rows + FROM tpch.orders + GROUP BY + 1, + 2 +), _t2 AS ( + SELECT + o_orderpriority, + year_o_orderdate, + ( + 100.0 * n_rows + ) / SUM(n_rows) OVER (PARTITION BY year_o_orderdate) AS priority_pct + FROM _t3 +), _t AS ( + SELECT + o_orderpriority, + year_o_orderdate, + priority_pct, + ROW_NUMBER() OVER (PARTITION BY year_o_orderdate ORDER BY priority_pct DESC NULLS FIRST) AS _w + FROM _t2 +) +SELECT + year_o_orderdate AS order_year, + o_orderpriority AS highest_priority, + priority_pct +FROM _t +WHERE + _w = 1 +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/hour_minute_day_trino.sql b/tests/test_sql_refsols/hour_minute_day_trino.sql new file mode 100644 index 000000000..350d60a86 --- /dev/null +++ b/tests/test_sql_refsols/hour_minute_day_trino.sql @@ -0,0 +1,13 @@ +SELECT + sbtransaction.sbtxid AS transaction_id, + HOUR(CAST(sbtransaction.sbtxdatetime AS TIMESTAMP)) AS _expr0, + MINUTE(CAST(sbtransaction.sbtxdatetime AS TIMESTAMP)) AS _expr1, + SECOND(CAST(sbtransaction.sbtxdatetime AS TIMESTAMP)) AS _expr2 +FROM main.sbtransaction AS sbtransaction +JOIN main.sbticker AS sbticker + ON sbticker.sbtickerid = sbtransaction.sbtxtickerid + AND sbticker.sbtickersymbol IN ('AAPL', 'GOOGL', 'NFLX') +WHERE + YEAR(CAST(sbtransaction.sbtxdatetime AS TIMESTAMP)) = 2023 +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/keywords_alias_reserved_word_trino.sql b/tests/test_sql_refsols/keywords_alias_reserved_word_trino.sql new file mode 100644 index 000000000..c7b09e3f5 --- /dev/null +++ b/tests/test_sql_refsols/keywords_alias_reserved_word_trino.sql @@ -0,0 +1,12 @@ +SELECT + COALESCE("where".default_to, "where".".calculate") AS calculate2, + calculate.".where" AS _where, + calculate."like" AS _like, + calculate.datetime, + "where".abs, + "where".has +FROM keywords."where" AS "where" +JOIN keywords.calculate AS calculate + ON "where".".calculate" = calculate.".where" +WHERE + "where".".calculate" = 4 AND "where".present IS NULL diff --git a/tests/test_sql_refsols/keywords_cast_alias_and_missing_alias_trino.sql b/tests/test_sql_refsols/keywords_cast_alias_and_missing_alias_trino.sql new file mode 100644 index 000000000..0837c18bf --- /dev/null +++ b/tests/test_sql_refsols/keywords_cast_alias_and_missing_alias_trino.sql @@ -0,0 +1,12 @@ +SELECT + "cast".id2 AS id1, + "cast".id AS id2, + "lowercase_detail"."select" AS fk1_select, + "lowercase_detail"."as" AS fk1_as, + lowercase_detail_2."two words" AS fk2_two_words +FROM keywords."cast" AS "cast" +JOIN keywords."lowercase_detail" AS "lowercase_detail" + ON "cast".id2 = "lowercase_detail".id + AND "lowercase_detail"."0 = 0 and '" = '2 "0 = 0 and ''" field name' +JOIN keywords."lowercase_detail" AS lowercase_detail_2 + ON "cast".id = lowercase_detail_2.id AND lowercase_detail_2.id = 1 diff --git a/tests/test_sql_refsols/keywords_column_alias_reserved_trino.sql b/tests/test_sql_refsols/keywords_column_alias_reserved_trino.sql new file mode 100644 index 000000000..4fc43615b --- /dev/null +++ b/tests/test_sql_refsols/keywords_column_alias_reserved_trino.sql @@ -0,0 +1,14 @@ +SELECT + "mixedcase_1:1"."id" AS id_, + "mixedcase_1:1"."lowercaseid" AS LowerCaseID, + "uppercase_master"."integer", + "lowercase_detail"."as" AS as_, + "uppercase_master"."order by" AS order_ +FROM keywords."mixedcase_1:1" AS "mixedcase_1:1" +JOIN keywords."lowercase_detail" AS "lowercase_detail" + ON "lowercase_detail"."as" = '10 as reserved word' + AND "lowercase_detail".id = "mixedcase_1:1"."lowercaseid" +JOIN keywords."uppercase_master" AS "uppercase_master" + ON "mixedcase_1:1"."id" = "uppercase_master".id +WHERE + "mixedcase_1:1"."(parentheses)" = '5 (parentheses)' diff --git a/tests/test_sql_refsols/keywords_expr_call_quoted_names_trino.sql b/tests/test_sql_refsols/keywords_expr_call_quoted_names_trino.sql new file mode 100644 index 000000000..cf0e22681 --- /dev/null +++ b/tests/test_sql_refsols/keywords_expr_call_quoted_names_trino.sql @@ -0,0 +1,22 @@ +WITH _s0 AS ( + SELECT + MAX("where") AS max_where + FROM keywords."partition" +), _s1 AS ( + SELECT + AVG("= ""quote""") AS avg_quote, + COUNT("`cast`") AS count_cast, + MAX("`name""[") AS max_name, + MIN("= ""quote""") AS min_quote, + SUM("`name""[") AS sum_name + FROM keywords."""quoted table_name""" +) +SELECT + _s0.max_where, + _s1.min_quote, + _s1.max_name, + _s1.count_cast, + _s1.avg_quote AS quote_avg, + COALESCE(_s1.sum_name, 0) AS sum_name +FROM _s0 AS _s0 +CROSS JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/keywords_locals_globals_eval_trino.sql b/tests/test_sql_refsols/keywords_locals_globals_eval_trino.sql new file mode 100644 index 000000000..db4124e69 --- /dev/null +++ b/tests/test_sql_refsols/keywords_locals_globals_eval_trino.sql @@ -0,0 +1,9 @@ +SELECT + "count".node, + "cast".types, + 2.0 AS expr +FROM keywords."count" AS "count" +JOIN keywords."cast" AS "cast" + ON "cast".pk_field_name = "count".this +WHERE + "count".node = 4071 diff --git a/tests/test_sql_refsols/keywords_python_sql_reserved_trino.sql b/tests/test_sql_refsols/keywords_python_sql_reserved_trino.sql new file mode 100644 index 000000000..68610a67c --- /dev/null +++ b/tests/test_sql_refsols/keywords_python_sql_reserved_trino.sql @@ -0,0 +1,12 @@ +SELECT + """," AS dbl_quote_dot, + "." AS dot, + "." + COALESCE("float", str, 1) AS addition, + "__col__" AS col, + "__col1__" AS col1, + def AS def_, + del AS __del__, + "__init__" +FROM keywords."count" +WHERE + "int" = 8051 diff --git a/tests/test_sql_refsols/keywords_quoted_table_name_trino.sql b/tests/test_sql_refsols/keywords_quoted_table_name_trino.sql new file mode 100644 index 000000000..e130322a3 --- /dev/null +++ b/tests/test_sql_refsols/keywords_quoted_table_name_trino.sql @@ -0,0 +1,12 @@ +SELECT + """quoted table_name"""."`cast`" AS cast_, + """quoted table_name"""."`name""[" AS name, + """quoted table_name"""."= ""quote""" AS quote_, + "lowercase_detail"."0 = 0 and '" AS _0_0_and, + "lowercase_detail"."as" AS as_ +FROM keywords."""quoted table_name""" AS """quoted table_name""" +JOIN keywords."lowercase_detail" AS "lowercase_detail" + ON """quoted table_name"""."`name""[" = "lowercase_detail".id +WHERE + """quoted table_name"""."= ""quote""" = 4 + AND """quoted table_name"""."`name""[" = 7 diff --git a/tests/test_sql_refsols/keywords_single_quote_use_trino.sql b/tests/test_sql_refsols/keywords_single_quote_use_trino.sql new file mode 100644 index 000000000..99062419a --- /dev/null +++ b/tests/test_sql_refsols/keywords_single_quote_use_trino.sql @@ -0,0 +1,5 @@ +SELECT + description +FROM keywords.master +WHERE + description <> 'One-One ''master row' AND id1 = 1 AND id2 = 1 diff --git a/tests/test_sql_refsols/many_net_filter_10_trino.sql b/tests/test_sql_refsols/many_net_filter_10_trino.sql new file mode 100644 index 000000000..bc972f0c9 --- /dev/null +++ b/tests/test_sql_refsols/many_net_filter_10_trino.sql @@ -0,0 +1,10 @@ +SELECT + COUNT(*) AS n +FROM tpch.supplier AS supplier +JOIN tpch.nation AS nation + ON nation.n_nationkey = supplier.s_nationkey +JOIN tpch.nation AS nation_2 + ON nation.n_regionkey = nation_2.n_regionkey AND nation_2.n_regionkey = 2 +JOIN tpch.customer AS customer + ON customer.c_custkey = supplier.s_suppkey + AND customer.c_nationkey = nation_2.n_nationkey diff --git a/tests/test_sql_refsols/many_net_filter_11_trino.sql b/tests/test_sql_refsols/many_net_filter_11_trino.sql new file mode 100644 index 000000000..5d896027f --- /dev/null +++ b/tests/test_sql_refsols/many_net_filter_11_trino.sql @@ -0,0 +1,13 @@ +SELECT + COUNT(*) AS n +FROM tpch.supplier AS supplier +JOIN tpch.nation AS nation + ON nation.n_nationkey = supplier.s_nationkey AND nation.n_regionkey < 3 +JOIN tpch.nation AS nation_2 + ON nation.n_regionkey = nation_2.n_regionkey AND nation_2.n_regionkey > 0 +JOIN tpch.customer AS customer + ON NOT customer.c_nationkey IN (1, 4, 7, 10, 13, 16, 19, 22) + AND customer.c_custkey = supplier.s_suppkey + AND customer.c_nationkey = nation_2.n_nationkey +WHERE + NOT supplier.s_nationkey IN (0, 3, 6, 9, 12, 15, 18, 21, 24) diff --git a/tests/test_sql_refsols/many_net_filter_1_trino.sql b/tests/test_sql_refsols/many_net_filter_1_trino.sql new file mode 100644 index 000000000..ca2507af2 --- /dev/null +++ b/tests/test_sql_refsols/many_net_filter_1_trino.sql @@ -0,0 +1,8 @@ +SELECT + COUNT(*) AS n +FROM tpch.supplier AS supplier +JOIN tpch.customer AS customer + ON customer.c_custkey = supplier.s_suppkey + AND customer.c_nationkey = supplier.s_nationkey +WHERE + supplier.s_nationkey = 1 diff --git a/tests/test_sql_refsols/many_net_filter_2_trino.sql b/tests/test_sql_refsols/many_net_filter_2_trino.sql new file mode 100644 index 000000000..cc3739106 --- /dev/null +++ b/tests/test_sql_refsols/many_net_filter_2_trino.sql @@ -0,0 +1,8 @@ +SELECT + COUNT(*) AS n +FROM tpch.supplier AS supplier +JOIN tpch.customer AS customer + ON customer.c_custkey = supplier.s_suppkey + AND customer.c_nationkey = supplier.s_nationkey +WHERE + supplier.s_nationkey = 2 diff --git a/tests/test_sql_refsols/many_net_filter_3_trino.sql b/tests/test_sql_refsols/many_net_filter_3_trino.sql new file mode 100644 index 000000000..91b4854e6 --- /dev/null +++ b/tests/test_sql_refsols/many_net_filter_3_trino.sql @@ -0,0 +1,7 @@ +SELECT + COUNT(*) AS n +FROM tpch.supplier AS supplier +JOIN tpch.customer AS customer + ON customer.c_custkey = supplier.s_suppkey + AND customer.c_nationkey = 3 + AND customer.c_nationkey = supplier.s_nationkey diff --git a/tests/test_sql_refsols/many_net_filter_4_trino.sql b/tests/test_sql_refsols/many_net_filter_4_trino.sql new file mode 100644 index 000000000..cc3d7ff4f --- /dev/null +++ b/tests/test_sql_refsols/many_net_filter_4_trino.sql @@ -0,0 +1,18 @@ +WITH _s1 AS ( + SELECT + n_nationkey, + n_regionkey + FROM tpch.nation +) +SELECT + COUNT(*) AS n +FROM tpch.supplier AS supplier +JOIN _s1 AS _s1 + ON _s1.n_nationkey = supplier.s_nationkey +JOIN _s1 AS _s3 + ON _s1.n_regionkey = _s3.n_regionkey +JOIN tpch.customer AS customer + ON _s3.n_nationkey = customer.c_nationkey + AND customer.c_custkey = supplier.s_suppkey +WHERE + supplier.s_nationkey = 4 diff --git a/tests/test_sql_refsols/many_net_filter_5_trino.sql b/tests/test_sql_refsols/many_net_filter_5_trino.sql new file mode 100644 index 000000000..229fa9262 --- /dev/null +++ b/tests/test_sql_refsols/many_net_filter_5_trino.sql @@ -0,0 +1,10 @@ +SELECT + COUNT(*) AS n +FROM tpch.supplier AS supplier +JOIN tpch.nation AS nation + ON nation.n_nationkey = 5 AND nation.n_nationkey = supplier.s_nationkey +JOIN tpch.nation AS nation_2 + ON nation.n_regionkey = nation_2.n_regionkey +JOIN tpch.customer AS customer + ON customer.c_custkey = supplier.s_suppkey + AND customer.c_nationkey = nation_2.n_nationkey diff --git a/tests/test_sql_refsols/many_net_filter_6_trino.sql b/tests/test_sql_refsols/many_net_filter_6_trino.sql new file mode 100644 index 000000000..f01aebdd4 --- /dev/null +++ b/tests/test_sql_refsols/many_net_filter_6_trino.sql @@ -0,0 +1,10 @@ +SELECT + COUNT(*) AS n +FROM tpch.supplier AS supplier +JOIN tpch.nation AS nation + ON nation.n_nationkey = supplier.s_nationkey +JOIN tpch.nation AS nation_2 + ON nation.n_regionkey = nation_2.n_regionkey AND nation_2.n_nationkey = 6 +JOIN tpch.customer AS customer + ON customer.c_custkey = supplier.s_suppkey + AND customer.c_nationkey = nation_2.n_nationkey diff --git a/tests/test_sql_refsols/many_net_filter_7_trino.sql b/tests/test_sql_refsols/many_net_filter_7_trino.sql new file mode 100644 index 000000000..ca33be2de --- /dev/null +++ b/tests/test_sql_refsols/many_net_filter_7_trino.sql @@ -0,0 +1,17 @@ +WITH _s1 AS ( + SELECT + n_nationkey, + n_regionkey + FROM tpch.nation +) +SELECT + COUNT(*) AS n +FROM tpch.supplier AS supplier +JOIN _s1 AS _s1 + ON _s1.n_nationkey = supplier.s_nationkey +JOIN _s1 AS _s3 + ON _s1.n_regionkey = _s3.n_regionkey +JOIN tpch.customer AS customer + ON _s3.n_nationkey = customer.c_nationkey + AND customer.c_custkey = supplier.s_suppkey + AND customer.c_nationkey = 7 diff --git a/tests/test_sql_refsols/many_net_filter_8_trino.sql b/tests/test_sql_refsols/many_net_filter_8_trino.sql new file mode 100644 index 000000000..e037426ff --- /dev/null +++ b/tests/test_sql_refsols/many_net_filter_8_trino.sql @@ -0,0 +1,10 @@ +SELECT + COUNT(*) AS n +FROM tpch.supplier AS supplier +JOIN tpch.nation AS nation + ON nation.n_nationkey = supplier.s_nationkey AND nation.n_regionkey = 0 +JOIN tpch.nation AS nation_2 + ON nation.n_regionkey = nation_2.n_regionkey +JOIN tpch.customer AS customer + ON customer.c_custkey = supplier.s_suppkey + AND customer.c_nationkey = nation_2.n_nationkey diff --git a/tests/test_sql_refsols/many_net_filter_9_trino.sql b/tests/test_sql_refsols/many_net_filter_9_trino.sql new file mode 100644 index 000000000..af22a24a1 --- /dev/null +++ b/tests/test_sql_refsols/many_net_filter_9_trino.sql @@ -0,0 +1,10 @@ +SELECT + COUNT(*) AS n +FROM tpch.supplier AS supplier +JOIN tpch.nation AS nation + ON nation.n_nationkey = supplier.s_nationkey AND nation.n_regionkey = 1 +JOIN tpch.nation AS nation_2 + ON nation.n_regionkey = nation_2.n_regionkey +JOIN tpch.customer AS customer + ON customer.c_custkey = supplier.s_suppkey + AND customer.c_nationkey = nation_2.n_nationkey diff --git a/tests/test_sql_refsols/month_year_sliding_windows_trino.sql b/tests/test_sql_refsols/month_year_sliding_windows_trino.sql new file mode 100644 index 000000000..c6635d0ab --- /dev/null +++ b/tests/test_sql_refsols/month_year_sliding_windows_trino.sql @@ -0,0 +1,52 @@ +WITH _t7 AS ( + SELECT + o_orderdate, + o_orderpriority, + o_totalprice + FROM tpch.orders + WHERE + o_orderpriority = '1-URGENT' +), _t5 AS ( + SELECT + YEAR(CAST(o_orderdate AS TIMESTAMP)) AS year_o_orderdate, + SUM(o_totalprice) AS sum_o_totalprice + FROM _t7 + GROUP BY + 1 +), _t4 AS ( + SELECT + sum_o_totalprice, + year_o_orderdate, + LEAD(COALESCE(sum_o_totalprice, 0), 1, 0.0) OVER (ORDER BY year_o_orderdate) AS next_year_total_spent + FROM _t5 +), _t2 AS ( + SELECT + MONTH(CAST(_t8.o_orderdate AS TIMESTAMP)) AS month_o_orderdate, + YEAR(CAST(_t8.o_orderdate AS TIMESTAMP)) AS year_o_orderdate, + SUM(_t8.o_totalprice) AS sum_o_totalprice + FROM _t4 AS _t4 + JOIN _t7 AS _t8 + ON _t4.year_o_orderdate = YEAR(CAST(_t8.o_orderdate AS TIMESTAMP)) + WHERE + _t4.next_year_total_spent < COALESCE(_t4.sum_o_totalprice, 0) + GROUP BY + 1, + 2 +), _t AS ( + SELECT + month_o_orderdate, + year_o_orderdate, + sum_o_totalprice, + LEAD(COALESCE(sum_o_totalprice, 0), 1, 0.0) OVER (ORDER BY year_o_orderdate, month_o_orderdate) AS _w, + LAG(COALESCE(sum_o_totalprice, 0), 1, 0.0) OVER (ORDER BY year_o_orderdate, month_o_orderdate) AS _w_2 + FROM _t2 +) +SELECT + year_o_orderdate AS year, + month_o_orderdate AS month +FROM _t +WHERE + _w < COALESCE(sum_o_totalprice, 0) AND _w_2 < COALESCE(sum_o_totalprice, 0) +ORDER BY + 1 NULLS FIRST, + 2 NULLS FIRST diff --git a/tests/test_sql_refsols/n_orders_first_day_trino.sql b/tests/test_sql_refsols/n_orders_first_day_trino.sql new file mode 100644 index 000000000..4fbb161fc --- /dev/null +++ b/tests/test_sql_refsols/n_orders_first_day_trino.sql @@ -0,0 +1,10 @@ +WITH _t AS ( + SELECT + RANK() OVER (ORDER BY o_orderdate) AS _w + FROM tpch.orders +) +SELECT + COUNT(*) AS n_orders +FROM _t +WHERE + _w = 1 diff --git a/tests/test_sql_refsols/nation_acctbal_breakdown_trino.sql b/tests/test_sql_refsols/nation_acctbal_breakdown_trino.sql new file mode 100644 index 000000000..c56fb4cfe --- /dev/null +++ b/tests/test_sql_refsols/nation_acctbal_breakdown_trino.sql @@ -0,0 +1,62 @@ +WITH _t1 AS ( + SELECT + customer.c_acctbal, + customer.c_nationkey, + nation.n_name, + CASE + WHEN ABS( + ( + ROW_NUMBER() OVER (PARTITION BY customer.c_nationkey ORDER BY CASE WHEN customer.c_acctbal >= 0 THEN customer.c_acctbal ELSE NULL END DESC) - 1.0 + ) - ( + CAST(( + COUNT(CASE WHEN customer.c_acctbal >= 0 THEN customer.c_acctbal ELSE NULL END) OVER (PARTITION BY customer.c_nationkey) - 1.0 + ) AS DOUBLE) / 2.0 + ) + ) < 1.0 + THEN CASE WHEN customer.c_acctbal >= 0 THEN customer.c_acctbal ELSE NULL END + ELSE NULL + END AS expr_5, + CASE + WHEN ABS( + ( + ROW_NUMBER() OVER (PARTITION BY customer.c_nationkey ORDER BY customer.c_acctbal DESC) - 1.0 + ) - ( + CAST(( + COUNT(customer.c_acctbal) OVER (PARTITION BY customer.c_nationkey) - 1.0 + ) AS DOUBLE) / 2.0 + ) + ) < 1.0 + THEN customer.c_acctbal + ELSE NULL + END AS expr_6, + CASE + WHEN ABS( + ( + ROW_NUMBER() OVER (PARTITION BY customer.c_nationkey ORDER BY CASE WHEN customer.c_acctbal < 0 THEN customer.c_acctbal ELSE NULL END DESC) - 1.0 + ) - ( + CAST(( + COUNT(CASE WHEN customer.c_acctbal < 0 THEN customer.c_acctbal ELSE NULL END) OVER (PARTITION BY customer.c_nationkey) - 1.0 + ) AS DOUBLE) / 2.0 + ) + ) < 1.0 + THEN CASE WHEN customer.c_acctbal < 0 THEN customer.c_acctbal ELSE NULL END + ELSE NULL + END AS expr_7 + FROM tpch.nation AS nation + JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'AMERICA' + JOIN tpch.customer AS customer + ON customer.c_nationkey = nation.n_nationkey +) +SELECT + ARBITRARY(n_name) AS nation_name, + COUNT(CASE WHEN c_acctbal < 0 THEN c_acctbal ELSE NULL END) AS n_red_acctbal, + COUNT(CASE WHEN c_acctbal >= 0 THEN c_acctbal ELSE NULL END) AS n_black_acctbal, + AVG(expr_7) AS median_red_acctbal, + AVG(expr_5) AS median_black_acctbal, + AVG(expr_6) AS median_overall_acctbal +FROM _t1 +GROUP BY + c_nationkey +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/nation_best_order_trino.sql b/tests/test_sql_refsols/nation_best_order_trino.sql new file mode 100644 index 000000000..b2b04d253 --- /dev/null +++ b/tests/test_sql_refsols/nation_best_order_trino.sql @@ -0,0 +1,36 @@ +WITH _t3 AS ( + SELECT + customer.c_name, + customer.c_nationkey, + orders.o_orderkey, + orders.o_totalprice, + CAST(( + 100.0 * orders.o_totalprice + ) AS DOUBLE) / SUM(orders.o_totalprice) OVER (PARTITION BY customer.c_nationkey) AS value_percentage + FROM tpch.customer AS customer + JOIN tpch.orders AS orders + ON YEAR(CAST(orders.o_orderdate AS TIMESTAMP)) = 1998 + AND customer.c_custkey = orders.o_custkey +), _t AS ( + SELECT + c_name, + c_nationkey, + o_orderkey, + o_totalprice, + value_percentage, + ROW_NUMBER() OVER (PARTITION BY c_nationkey ORDER BY o_totalprice DESC NULLS FIRST) AS _w + FROM _t3 +) +SELECT + nation.n_name AS nation_name, + _t.c_name AS customer_name, + _t.o_orderkey AS order_key, + _t.o_totalprice AS order_value, + _t.value_percentage +FROM tpch.nation AS nation +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'ASIA' +JOIN _t AS _t + ON _t._w = 1 AND _t.c_nationkey = nation.n_nationkey +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/nation_window_aggs_trino.sql b/tests/test_sql_refsols/nation_window_aggs_trino.sql new file mode 100644 index 000000000..74303b1b4 --- /dev/null +++ b/tests/test_sql_refsols/nation_window_aggs_trino.sql @@ -0,0 +1,12 @@ +SELECT + n_name AS nation_name, + SUM(n_nationkey) OVER () AS key_sum, + AVG(CAST(n_nationkey AS DOUBLE)) OVER () AS key_avg, + COUNT(CASE WHEN LENGTH(n_comment) < 75 THEN n_comment ELSE NULL END) OVER () AS n_short_comment, + COUNT(*) OVER () AS n_nations +FROM tpch.nation +WHERE + NOT SUBSTRING(n_name, 1, 1) IN ('A', 'E', 'I', 'O', 'U') +ORDER BY + n_regionkey NULLS FIRST, + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/numerical_functions_trino.sql b/tests/test_sql_refsols/numerical_functions_trino.sql new file mode 100644 index 000000000..d02e9691a --- /dev/null +++ b/tests/test_sql_refsols/numerical_functions_trino.sql @@ -0,0 +1,11 @@ +SELECT + ABS(c_acctbal) AS abs_value, + ROUND(c_acctbal, 2) AS round_value, + CEIL(c_acctbal) AS ceil_value, + FLOOR(c_acctbal) AS floor_value, + POWER(c_acctbal, 2) AS power_value, + POWER(c_acctbal, 0.5) AS sqrt_value, + SIGN(c_acctbal) AS sign_value, + LEAST(c_acctbal, 0) AS smallest_value, + GREATEST(c_acctbal, 0) AS largest_value +FROM tpch.customer diff --git a/tests/test_sql_refsols/odate_and_rdate_avggap_trino.sql b/tests/test_sql_refsols/odate_and_rdate_avggap_trino.sql new file mode 100644 index 000000000..92b05d1fc --- /dev/null +++ b/tests/test_sql_refsols/odate_and_rdate_avggap_trino.sql @@ -0,0 +1,13 @@ +SELECT + AVG( + DATE_DIFF( + 'DAY', + CAST(orders.o_orderdate AS TIMESTAMP), + CAST(LEAST(lineitem.l_commitdate, lineitem.l_receiptdate) AS TIMESTAMP) + ) + ) AS avg_gap +FROM tpch.lineitem AS lineitem +JOIN tpch.orders AS orders + ON lineitem.l_orderkey = orders.o_orderkey +WHERE + lineitem.l_shipmode = 'RAIL' diff --git a/tests/test_sql_refsols/order_info_per_priority_trino.sql b/tests/test_sql_refsols/order_info_per_priority_trino.sql new file mode 100644 index 000000000..ce4ed620d --- /dev/null +++ b/tests/test_sql_refsols/order_info_per_priority_trino.sql @@ -0,0 +1,19 @@ +WITH _t AS ( + SELECT + o_orderkey, + o_orderpriority, + o_totalprice, + ROW_NUMBER() OVER (PARTITION BY o_orderpriority ORDER BY o_totalprice DESC NULLS FIRST) AS _w + FROM tpch.orders + WHERE + YEAR(CAST(o_orderdate AS TIMESTAMP)) = 1992 +) +SELECT + o_orderpriority AS order_priority, + o_orderkey AS order_key, + o_totalprice AS order_total_price +FROM _t +WHERE + _w = 1 +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/order_quarter_test_trino.sql b/tests/test_sql_refsols/order_quarter_test_trino.sql new file mode 100644 index 000000000..546cc147b --- /dev/null +++ b/tests/test_sql_refsols/order_quarter_test_trino.sql @@ -0,0 +1,18 @@ +SELECT + o_orderdate AS order_date, + QUARTER(CAST(o_orderdate AS TIMESTAMP)) AS quarter, + DATE_TRUNC('QUARTER', CAST(o_orderdate AS TIMESTAMP)) AS quarter_start, + DATE_ADD('QUARTER', 1, CAST(o_orderdate AS TIMESTAMP)) AS next_quarter, + DATE_ADD('QUARTER', -1, CAST(o_orderdate AS TIMESTAMP)) AS prev_quarter, + DATE_ADD('QUARTER', 2, CAST(o_orderdate AS TIMESTAMP)) AS two_quarters_ahead, + DATE_ADD('QUARTER', -2, CAST(o_orderdate AS TIMESTAMP)) AS two_quarters_behind, + DATE_DIFF('QUARTER', CAST('1995-01-01' AS TIMESTAMP), CAST(o_orderdate AS TIMESTAMP)) AS quarters_since_1995, + DATE_DIFF('QUARTER', CAST(o_orderdate AS TIMESTAMP), CAST('2000-01-01' AS TIMESTAMP)) AS quarters_until_2000, + DATE_ADD('QUARTER', -4, CAST(o_orderdate AS TIMESTAMP)) AS same_quarter_prev_year, + DATE_ADD('QUARTER', 4, CAST(o_orderdate AS TIMESTAMP)) AS same_quarter_next_year +FROM tpch.orders +WHERE + YEAR(CAST(o_orderdate AS TIMESTAMP)) = 1995 +ORDER BY + 1 NULLS FIRST +LIMIT 1 diff --git a/tests/test_sql_refsols/orders_versus_first_orders_trino.sql b/tests/test_sql_refsols/orders_versus_first_orders_trino.sql new file mode 100644 index 000000000..a0dcf3301 --- /dev/null +++ b/tests/test_sql_refsols/orders_versus_first_orders_trino.sql @@ -0,0 +1,37 @@ +WITH _s4 AS ( + SELECT + o_custkey, + o_orderdate, + o_orderkey + FROM tpch.orders +), _t AS ( + SELECT + customer.c_custkey, + customer.c_name, + _s3.o_orderdate, + ROW_NUMBER() OVER (PARTITION BY _s3.o_custkey ORDER BY _s3.o_orderdate, _s3.o_orderkey) AS _w + FROM tpch.customer AS customer + JOIN tpch.nation AS nation + ON customer.c_nationkey = nation.n_nationkey AND nation.n_name = 'VIETNAM' + JOIN _s4 AS _s3 + ON _s3.o_custkey = customer.c_custkey +), _s5 AS ( + SELECT + c_custkey, + c_name, + o_orderdate + FROM _t + WHERE + _w = 1 +) +SELECT + _s5.c_name AS customer_name, + _s4.o_orderkey AS order_key, + DATE_DIFF('DAY', CAST(_s5.o_orderdate AS TIMESTAMP), CAST(_s4.o_orderdate AS TIMESTAMP)) AS days_since_first_order +FROM _s4 AS _s4 +LEFT JOIN _s5 AS _s5 + ON _s4.o_custkey = _s5.c_custkey +ORDER BY + 3 DESC, + 1 NULLS FIRST +LIMIT 5 diff --git a/tests/test_sql_refsols/part_cross_part_a_trino.sql b/tests/test_sql_refsols/part_cross_part_a_trino.sql new file mode 100644 index 000000000..a29b0a0f9 --- /dev/null +++ b/tests/test_sql_refsols/part_cross_part_a_trino.sql @@ -0,0 +1,35 @@ +WITH _s0 AS ( + SELECT DISTINCT + sbtickerexchange + FROM main.sbticker +), _s9 AS ( + SELECT + sbcustomer.sbcustid, + _s2.sbtickerexchange, + COUNT(*) AS n_rows + FROM _s0 AS _s2 + CROSS JOIN main.sbcustomer AS sbcustomer + JOIN main.sbtransaction AS sbtransaction + ON sbcustomer.sbcustid = sbtransaction.sbtxcustid + JOIN main.sbticker AS sbticker + ON _s2.sbtickerexchange = sbticker.sbtickerexchange + AND sbticker.sbtickerid = sbtransaction.sbtxtickerid + GROUP BY + 1, + 2 +) +SELECT + sbcustomer.sbcuststate AS state, + _s0.sbtickerexchange AS exchange, + COALESCE(SUM(_s9.n_rows), 0) AS n +FROM _s0 AS _s0 +CROSS JOIN main.sbcustomer AS sbcustomer +LEFT JOIN _s9 AS _s9 + ON _s0.sbtickerexchange = _s9.sbtickerexchange + AND _s9.sbcustid = sbcustomer.sbcustid +GROUP BY + 1, + 2 +ORDER BY + 1 NULLS FIRST, + 2 NULLS FIRST diff --git a/tests/test_sql_refsols/part_cross_part_b_trino.sql b/tests/test_sql_refsols/part_cross_part_b_trino.sql new file mode 100644 index 000000000..336acb273 --- /dev/null +++ b/tests/test_sql_refsols/part_cross_part_b_trino.sql @@ -0,0 +1,46 @@ +WITH _s0 AS ( + SELECT DISTINCT + sbcuststate + FROM main.sbcustomer +), _t2 AS ( + SELECT + sbtxdatetime + FROM main.sbtransaction + WHERE + YEAR(CAST(sbtxdatetime AS TIMESTAMP)) = 2023 +), _s1 AS ( + SELECT DISTINCT + DATE_TRUNC('MONTH', CAST(sbtxdatetime AS TIMESTAMP)) AS month + FROM _t2 +), _s3 AS ( + SELECT DISTINCT + DATE_TRUNC('MONTH', CAST(sbtxdatetime AS TIMESTAMP)) AS month + FROM _t2 +), _s9 AS ( + SELECT + _s3.month, + _s2.sbcuststate, + COUNT(*) AS n_rows + FROM _s0 AS _s2 + CROSS JOIN _s3 AS _s3 + JOIN main.sbtransaction AS sbtransaction + ON YEAR(CAST(sbtransaction.sbtxdatetime AS TIMESTAMP)) = 2023 + AND _s3.month = DATE_TRUNC('MONTH', CAST(sbtransaction.sbtxdatetime AS TIMESTAMP)) + JOIN main.sbcustomer AS sbcustomer + ON _s2.sbcuststate = sbcustomer.sbcuststate + AND sbcustomer.sbcustid = sbtransaction.sbtxcustid + GROUP BY + 1, + 2 +) +SELECT + _s0.sbcuststate AS state, + _s1.month AS month_of_year, + SUM(COALESCE(_s9.n_rows, 0)) OVER (PARTITION BY _s0.sbcuststate ORDER BY _s1.month ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS n +FROM _s0 AS _s0 +CROSS JOIN _s1 AS _s1 +LEFT JOIN _s9 AS _s9 + ON _s0.sbcuststate = _s9.sbcuststate AND _s1.month = _s9.month +ORDER BY + 1 NULLS FIRST, + 2 NULLS FIRST diff --git a/tests/test_sql_refsols/part_cross_part_c_trino.sql b/tests/test_sql_refsols/part_cross_part_c_trino.sql new file mode 100644 index 000000000..e6e8d9bca --- /dev/null +++ b/tests/test_sql_refsols/part_cross_part_c_trino.sql @@ -0,0 +1,44 @@ +WITH _s0 AS ( + SELECT DISTINCT + sbcuststate + FROM main.sbcustomer +), _t2 AS ( + SELECT + sbtxdatetime + FROM main.sbtransaction + WHERE + YEAR(CAST(sbtxdatetime AS TIMESTAMP)) = 2023 +), _s1 AS ( + SELECT DISTINCT + DATE_TRUNC('MONTH', CAST(sbtxdatetime AS TIMESTAMP)) AS month + FROM _t2 +), _s3 AS ( + SELECT DISTINCT + DATE_TRUNC('MONTH', CAST(sbtxdatetime AS TIMESTAMP)) AS month + FROM _t2 +), _s9 AS ( + SELECT + _s3.month, + _s2.sbcuststate, + COUNT(*) AS n_rows + FROM _s0 AS _s2 + CROSS JOIN _s3 AS _s3 + JOIN main.sbtransaction AS sbtransaction + ON YEAR(CAST(sbtransaction.sbtxdatetime AS TIMESTAMP)) = 2023 + AND _s3.month = DATE_TRUNC('MONTH', CAST(sbtransaction.sbtxdatetime AS TIMESTAMP)) + JOIN main.sbcustomer AS sbcustomer + ON _s2.sbcuststate = sbcustomer.sbcuststate + AND sbcustomer.sbcustid = sbtransaction.sbtxcustid + GROUP BY + 1, + 2 +) +SELECT + _s0.sbcuststate AS state, + MAX(COALESCE(_s9.n_rows, 0)) AS max_n +FROM _s0 AS _s0 +CROSS JOIN _s1 AS _s1 +LEFT JOIN _s9 AS _s9 + ON _s0.sbcuststate = _s9.sbcuststate AND _s1.month = _s9.month +GROUP BY + 1 diff --git a/tests/test_sql_refsols/part_reduced_size_trino.sql b/tests/test_sql_refsols/part_reduced_size_trino.sql new file mode 100644 index 000000000..8fc534836 --- /dev/null +++ b/tests/test_sql_refsols/part_reduced_size_trino.sql @@ -0,0 +1,25 @@ +WITH _s0 AS ( + SELECT + p_partkey, + p_retailprice, + p_size + FROM tpch.part + ORDER BY + CAST(p_retailprice AS BIGINT) NULLS FIRST + LIMIT 2 +) +SELECT + CAST(_s0.p_size AS DOUBLE) / 2.5 AS reduced_size, + CAST(_s0.p_retailprice AS BIGINT) AS retail_price_int, + CONCAT_WS('', 'old size: ', CAST(_s0.p_size AS VARCHAR)) AS message, + lineitem.l_discount AS discount, + DATE_FORMAT(lineitem.l_receiptdate, '%d-%m-%Y') AS date_dmy, + DATE_FORMAT(lineitem.l_receiptdate, '%m/%d') AS date_md, + DATE_FORMAT(lineitem.l_receiptdate, '%H:%i%p') AS am_pm +FROM _s0 AS _s0 +JOIN tpch.lineitem AS lineitem + ON _s0.p_partkey = lineitem.l_partkey +ORDER BY + 4 DESC, + 5 NULLS FIRST +LIMIT 5 diff --git a/tests/test_sql_refsols/parts_quantity_increase_95_96_trino.sql b/tests/test_sql_refsols/parts_quantity_increase_95_96_trino.sql new file mode 100644 index 000000000..84cc63d69 --- /dev/null +++ b/tests/test_sql_refsols/parts_quantity_increase_95_96_trino.sql @@ -0,0 +1,41 @@ +WITH _t4 AS ( + SELECT + l_orderkey, + l_partkey, + l_quantity, + l_shipmode + FROM tpch.lineitem + WHERE + l_shipmode = 'RAIL' +), _s6 AS ( + SELECT + _t4.l_partkey, + ARBITRARY(part.p_name) AS anything_p_name, + SUM(_t4.l_quantity) AS sum_l_quantity + FROM tpch.part AS part + JOIN _t4 AS _t4 + ON _t4.l_partkey = part.p_partkey + JOIN tpch.orders AS orders + ON YEAR(CAST(orders.o_orderdate AS TIMESTAMP)) = 1995 + AND _t4.l_orderkey = orders.o_orderkey + WHERE + STARTS_WITH(part.p_container, 'SM') + GROUP BY + 1 +) +SELECT + ARBITRARY(_s6.anything_p_name) AS name, + COALESCE(ARBITRARY(_s6.sum_l_quantity), 0) AS qty_95, + COALESCE(SUM(_t6.l_quantity), 0) AS qty_96 +FROM _s6 AS _s6 +JOIN _t4 AS _t6 + ON _s6.l_partkey = _t6.l_partkey +JOIN tpch.orders AS orders + ON YEAR(CAST(orders.o_orderdate AS TIMESTAMP)) = 1996 + AND _t6.l_orderkey = orders.o_orderkey +GROUP BY + _t6.l_partkey +ORDER BY + COALESCE(SUM(_t6.l_quantity), 0) - COALESCE(ARBITRARY(_s6.sum_l_quantity), 0) DESC, + 1 NULLS FIRST +LIMIT 3 diff --git a/tests/test_sql_refsols/percentile_customers_per_region_trino.sql b/tests/test_sql_refsols/percentile_customers_per_region_trino.sql new file mode 100644 index 000000000..d44306242 --- /dev/null +++ b/tests/test_sql_refsols/percentile_customers_per_region_trino.sql @@ -0,0 +1,16 @@ +WITH _t AS ( + SELECT + customer.c_name, + customer.c_phone, + NTILE(100) OVER (PARTITION BY nation.n_regionkey ORDER BY customer.c_acctbal) AS _w + FROM tpch.nation AS nation + JOIN tpch.customer AS customer + ON customer.c_nationkey = nation.n_nationkey +) +SELECT + c_name AS name +FROM _t +WHERE + _w = 95 AND c_phone LIKE '%00' +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/percentile_nations_trino.sql b/tests/test_sql_refsols/percentile_nations_trino.sql new file mode 100644 index 000000000..14068348c --- /dev/null +++ b/tests/test_sql_refsols/percentile_nations_trino.sql @@ -0,0 +1,5 @@ +SELECT + n_name AS name, + NTILE(5) OVER (ORDER BY n_name) AS p1, + NTILE(5) OVER (ORDER BY n_name) AS p2 +FROM tpch.nation diff --git a/tests/test_sql_refsols/prev_next_regions_trino.sql b/tests/test_sql_refsols/prev_next_regions_trino.sql new file mode 100644 index 000000000..c4f9bd633 --- /dev/null +++ b/tests/test_sql_refsols/prev_next_regions_trino.sql @@ -0,0 +1,9 @@ +SELECT + LAG(r_name, 2) OVER (ORDER BY r_name) AS two_preceding, + LAG(r_name, 1) OVER (ORDER BY r_name) AS one_preceding, + r_name AS current_region, + LEAD(r_name, 1) OVER (ORDER BY r_name) AS one_following, + LEAD(r_name, 2) OVER (ORDER BY r_name) AS two_following +FROM tpch.region +ORDER BY + 3 NULLS FIRST diff --git a/tests/test_sql_refsols/quantile_function_test_1_trino.sql b/tests/test_sql_refsols/quantile_function_test_1_trino.sql new file mode 100644 index 000000000..3defe357f --- /dev/null +++ b/tests/test_sql_refsols/quantile_function_test_1_trino.sql @@ -0,0 +1,14 @@ +WITH _t0 AS ( + SELECT + CASE + WHEN FLOOR(0.3 * COUNT(o_totalprice) OVER ()) < ROW_NUMBER() OVER (ORDER BY o_totalprice DESC) + THEN o_totalprice + ELSE NULL + END AS expr_1 + FROM tpch.orders + WHERE + YEAR(CAST(o_orderdate AS TIMESTAMP)) = 1998 +) +SELECT + MAX(expr_1) AS seventieth_order_price +FROM _t0 diff --git a/tests/test_sql_refsols/quantile_function_test_2_trino.sql b/tests/test_sql_refsols/quantile_function_test_2_trino.sql new file mode 100644 index 000000000..30fb6b89e --- /dev/null +++ b/tests/test_sql_refsols/quantile_function_test_2_trino.sql @@ -0,0 +1,81 @@ +WITH _s0 AS ( + SELECT + n_name, + n_nationkey, + n_regionkey + FROM tpch.nation + ORDER BY + 1 NULLS FIRST + LIMIT 5 +), _s5 AS ( + SELECT + customer.c_nationkey, + orders.o_totalprice + FROM tpch.customer AS customer + JOIN tpch.orders AS orders + ON YEAR(CAST(orders.o_orderdate AS TIMESTAMP)) = 1998 + AND customer.c_custkey = orders.o_custkey +), _t1 AS ( + SELECT + _s0.n_name, + _s0.n_nationkey, + _s5.o_totalprice, + region.r_name, + CASE + WHEN FLOOR(0.99 * COUNT(_s5.o_totalprice) OVER (PARTITION BY _s5.c_nationkey)) < ROW_NUMBER() OVER (PARTITION BY _s5.c_nationkey ORDER BY _s5.o_totalprice DESC) + THEN _s5.o_totalprice + ELSE NULL + END AS expr_10, + CASE + WHEN FLOOR(0.75 * COUNT(_s5.o_totalprice) OVER (PARTITION BY _s5.c_nationkey)) < ROW_NUMBER() OVER (PARTITION BY _s5.c_nationkey ORDER BY _s5.o_totalprice DESC) + THEN _s5.o_totalprice + ELSE NULL + END AS expr_11, + CASE + WHEN FLOOR(0.25 * COUNT(_s5.o_totalprice) OVER (PARTITION BY _s5.c_nationkey)) < ROW_NUMBER() OVER (PARTITION BY _s5.c_nationkey ORDER BY _s5.o_totalprice DESC) + THEN _s5.o_totalprice + ELSE NULL + END AS expr_12, + CASE + WHEN FLOOR(0.1 * COUNT(_s5.o_totalprice) OVER (PARTITION BY _s5.c_nationkey)) < ROW_NUMBER() OVER (PARTITION BY _s5.c_nationkey ORDER BY _s5.o_totalprice DESC) + THEN _s5.o_totalprice + ELSE NULL + END AS expr_13, + CASE + WHEN FLOOR(0.01 * COUNT(_s5.o_totalprice) OVER (PARTITION BY _s5.c_nationkey)) < ROW_NUMBER() OVER (PARTITION BY _s5.c_nationkey ORDER BY _s5.o_totalprice DESC) + THEN _s5.o_totalprice + ELSE NULL + END AS expr_14, + CASE + WHEN FLOOR(0.5 * COUNT(_s5.o_totalprice) OVER (PARTITION BY _s5.c_nationkey)) < ROW_NUMBER() OVER (PARTITION BY _s5.c_nationkey ORDER BY _s5.o_totalprice DESC) + THEN _s5.o_totalprice + ELSE NULL + END AS expr_15, + CASE + WHEN FLOOR(0.9 * COUNT(_s5.o_totalprice) OVER (PARTITION BY _s5.c_nationkey)) < ROW_NUMBER() OVER (PARTITION BY _s5.c_nationkey ORDER BY _s5.o_totalprice DESC) + THEN _s5.o_totalprice + ELSE NULL + END AS expr_9 + FROM _s0 AS _s0 + JOIN tpch.region AS region + ON _s0.n_regionkey = region.r_regionkey + LEFT JOIN _s5 AS _s5 + ON _s0.n_nationkey = _s5.c_nationkey +) +SELECT + ARBITRARY(r_name) AS region_name, + ARBITRARY(n_name) AS nation_name, + MIN(o_totalprice) AS orders_min, + MAX(expr_10) AS orders_1_percent, + MAX(expr_9) AS orders_10_percent, + MAX(expr_11) AS orders_25_percent, + MAX(expr_15) AS orders_median, + MAX(expr_12) AS orders_75_percent, + MAX(expr_13) AS orders_90_percent, + MAX(expr_14) AS orders_99_percent, + MAX(o_totalprice) AS orders_max +FROM _t1 +GROUP BY + n_nationkey +ORDER BY + 2 NULLS FIRST diff --git a/tests/test_sql_refsols/quantile_function_test_3_trino.sql b/tests/test_sql_refsols/quantile_function_test_3_trino.sql new file mode 100644 index 000000000..30fb6b89e --- /dev/null +++ b/tests/test_sql_refsols/quantile_function_test_3_trino.sql @@ -0,0 +1,81 @@ +WITH _s0 AS ( + SELECT + n_name, + n_nationkey, + n_regionkey + FROM tpch.nation + ORDER BY + 1 NULLS FIRST + LIMIT 5 +), _s5 AS ( + SELECT + customer.c_nationkey, + orders.o_totalprice + FROM tpch.customer AS customer + JOIN tpch.orders AS orders + ON YEAR(CAST(orders.o_orderdate AS TIMESTAMP)) = 1998 + AND customer.c_custkey = orders.o_custkey +), _t1 AS ( + SELECT + _s0.n_name, + _s0.n_nationkey, + _s5.o_totalprice, + region.r_name, + CASE + WHEN FLOOR(0.99 * COUNT(_s5.o_totalprice) OVER (PARTITION BY _s5.c_nationkey)) < ROW_NUMBER() OVER (PARTITION BY _s5.c_nationkey ORDER BY _s5.o_totalprice DESC) + THEN _s5.o_totalprice + ELSE NULL + END AS expr_10, + CASE + WHEN FLOOR(0.75 * COUNT(_s5.o_totalprice) OVER (PARTITION BY _s5.c_nationkey)) < ROW_NUMBER() OVER (PARTITION BY _s5.c_nationkey ORDER BY _s5.o_totalprice DESC) + THEN _s5.o_totalprice + ELSE NULL + END AS expr_11, + CASE + WHEN FLOOR(0.25 * COUNT(_s5.o_totalprice) OVER (PARTITION BY _s5.c_nationkey)) < ROW_NUMBER() OVER (PARTITION BY _s5.c_nationkey ORDER BY _s5.o_totalprice DESC) + THEN _s5.o_totalprice + ELSE NULL + END AS expr_12, + CASE + WHEN FLOOR(0.1 * COUNT(_s5.o_totalprice) OVER (PARTITION BY _s5.c_nationkey)) < ROW_NUMBER() OVER (PARTITION BY _s5.c_nationkey ORDER BY _s5.o_totalprice DESC) + THEN _s5.o_totalprice + ELSE NULL + END AS expr_13, + CASE + WHEN FLOOR(0.01 * COUNT(_s5.o_totalprice) OVER (PARTITION BY _s5.c_nationkey)) < ROW_NUMBER() OVER (PARTITION BY _s5.c_nationkey ORDER BY _s5.o_totalprice DESC) + THEN _s5.o_totalprice + ELSE NULL + END AS expr_14, + CASE + WHEN FLOOR(0.5 * COUNT(_s5.o_totalprice) OVER (PARTITION BY _s5.c_nationkey)) < ROW_NUMBER() OVER (PARTITION BY _s5.c_nationkey ORDER BY _s5.o_totalprice DESC) + THEN _s5.o_totalprice + ELSE NULL + END AS expr_15, + CASE + WHEN FLOOR(0.9 * COUNT(_s5.o_totalprice) OVER (PARTITION BY _s5.c_nationkey)) < ROW_NUMBER() OVER (PARTITION BY _s5.c_nationkey ORDER BY _s5.o_totalprice DESC) + THEN _s5.o_totalprice + ELSE NULL + END AS expr_9 + FROM _s0 AS _s0 + JOIN tpch.region AS region + ON _s0.n_regionkey = region.r_regionkey + LEFT JOIN _s5 AS _s5 + ON _s0.n_nationkey = _s5.c_nationkey +) +SELECT + ARBITRARY(r_name) AS region_name, + ARBITRARY(n_name) AS nation_name, + MIN(o_totalprice) AS orders_min, + MAX(expr_10) AS orders_1_percent, + MAX(expr_9) AS orders_10_percent, + MAX(expr_11) AS orders_25_percent, + MAX(expr_15) AS orders_median, + MAX(expr_12) AS orders_75_percent, + MAX(expr_13) AS orders_90_percent, + MAX(expr_14) AS orders_99_percent, + MAX(o_totalprice) AS orders_max +FROM _t1 +GROUP BY + n_nationkey +ORDER BY + 2 NULLS FIRST diff --git a/tests/test_sql_refsols/quantile_function_test_4_trino.sql b/tests/test_sql_refsols/quantile_function_test_4_trino.sql new file mode 100644 index 000000000..150fe05a5 --- /dev/null +++ b/tests/test_sql_refsols/quantile_function_test_4_trino.sql @@ -0,0 +1,80 @@ +WITH _s0 AS ( + SELECT + n_name, + n_nationkey, + n_regionkey + FROM tpch.nation + ORDER BY + 1 NULLS FIRST + LIMIT 5 +), _s5 AS ( + SELECT + customer.c_nationkey, + orders.o_totalprice + FROM tpch.customer AS customer + JOIN tpch.orders AS orders + ON customer.c_custkey = orders.o_custkey AND orders.o_clerk = 'Clerk#000000272' +), _t1 AS ( + SELECT + _s0.n_name, + _s0.n_nationkey, + _s5.o_totalprice, + region.r_name, + CASE + WHEN FLOOR(0.99 * COUNT(_s5.o_totalprice) OVER (PARTITION BY _s5.c_nationkey)) < ROW_NUMBER() OVER (PARTITION BY _s5.c_nationkey ORDER BY _s5.o_totalprice DESC) + THEN _s5.o_totalprice + ELSE NULL + END AS expr_10, + CASE + WHEN FLOOR(0.75 * COUNT(_s5.o_totalprice) OVER (PARTITION BY _s5.c_nationkey)) < ROW_NUMBER() OVER (PARTITION BY _s5.c_nationkey ORDER BY _s5.o_totalprice DESC) + THEN _s5.o_totalprice + ELSE NULL + END AS expr_11, + CASE + WHEN FLOOR(0.25 * COUNT(_s5.o_totalprice) OVER (PARTITION BY _s5.c_nationkey)) < ROW_NUMBER() OVER (PARTITION BY _s5.c_nationkey ORDER BY _s5.o_totalprice DESC) + THEN _s5.o_totalprice + ELSE NULL + END AS expr_12, + CASE + WHEN FLOOR(0.1 * COUNT(_s5.o_totalprice) OVER (PARTITION BY _s5.c_nationkey)) < ROW_NUMBER() OVER (PARTITION BY _s5.c_nationkey ORDER BY _s5.o_totalprice DESC) + THEN _s5.o_totalprice + ELSE NULL + END AS expr_13, + CASE + WHEN FLOOR(0.01 * COUNT(_s5.o_totalprice) OVER (PARTITION BY _s5.c_nationkey)) < ROW_NUMBER() OVER (PARTITION BY _s5.c_nationkey ORDER BY _s5.o_totalprice DESC) + THEN _s5.o_totalprice + ELSE NULL + END AS expr_14, + CASE + WHEN FLOOR(0.5 * COUNT(_s5.o_totalprice) OVER (PARTITION BY _s5.c_nationkey)) < ROW_NUMBER() OVER (PARTITION BY _s5.c_nationkey ORDER BY _s5.o_totalprice DESC) + THEN _s5.o_totalprice + ELSE NULL + END AS expr_15, + CASE + WHEN FLOOR(0.9 * COUNT(_s5.o_totalprice) OVER (PARTITION BY _s5.c_nationkey)) < ROW_NUMBER() OVER (PARTITION BY _s5.c_nationkey ORDER BY _s5.o_totalprice DESC) + THEN _s5.o_totalprice + ELSE NULL + END AS expr_9 + FROM _s0 AS _s0 + JOIN tpch.region AS region + ON _s0.n_regionkey = region.r_regionkey + LEFT JOIN _s5 AS _s5 + ON _s0.n_nationkey = _s5.c_nationkey +) +SELECT + ARBITRARY(r_name) AS region_name, + ARBITRARY(n_name) AS nation_name, + MIN(o_totalprice) AS orders_min, + MAX(expr_10) AS orders_1_percent, + MAX(expr_9) AS orders_10_percent, + MAX(expr_11) AS orders_25_percent, + MAX(expr_15) AS orders_median, + MAX(expr_12) AS orders_75_percent, + MAX(expr_13) AS orders_90_percent, + MAX(expr_14) AS orders_99_percent, + MAX(o_totalprice) AS orders_max +FROM _t1 +GROUP BY + n_nationkey +ORDER BY + 2 NULLS FIRST diff --git a/tests/test_sql_refsols/quantile_test_1_trino.sql b/tests/test_sql_refsols/quantile_test_1_trino.sql new file mode 100644 index 000000000..3defe357f --- /dev/null +++ b/tests/test_sql_refsols/quantile_test_1_trino.sql @@ -0,0 +1,14 @@ +WITH _t0 AS ( + SELECT + CASE + WHEN FLOOR(0.3 * COUNT(o_totalprice) OVER ()) < ROW_NUMBER() OVER (ORDER BY o_totalprice DESC) + THEN o_totalprice + ELSE NULL + END AS expr_1 + FROM tpch.orders + WHERE + YEAR(CAST(o_orderdate AS TIMESTAMP)) = 1998 +) +SELECT + MAX(expr_1) AS seventieth_order_price +FROM _t0 diff --git a/tests/test_sql_refsols/quantile_test_2_trino.sql b/tests/test_sql_refsols/quantile_test_2_trino.sql new file mode 100644 index 000000000..30fb6b89e --- /dev/null +++ b/tests/test_sql_refsols/quantile_test_2_trino.sql @@ -0,0 +1,81 @@ +WITH _s0 AS ( + SELECT + n_name, + n_nationkey, + n_regionkey + FROM tpch.nation + ORDER BY + 1 NULLS FIRST + LIMIT 5 +), _s5 AS ( + SELECT + customer.c_nationkey, + orders.o_totalprice + FROM tpch.customer AS customer + JOIN tpch.orders AS orders + ON YEAR(CAST(orders.o_orderdate AS TIMESTAMP)) = 1998 + AND customer.c_custkey = orders.o_custkey +), _t1 AS ( + SELECT + _s0.n_name, + _s0.n_nationkey, + _s5.o_totalprice, + region.r_name, + CASE + WHEN FLOOR(0.99 * COUNT(_s5.o_totalprice) OVER (PARTITION BY _s5.c_nationkey)) < ROW_NUMBER() OVER (PARTITION BY _s5.c_nationkey ORDER BY _s5.o_totalprice DESC) + THEN _s5.o_totalprice + ELSE NULL + END AS expr_10, + CASE + WHEN FLOOR(0.75 * COUNT(_s5.o_totalprice) OVER (PARTITION BY _s5.c_nationkey)) < ROW_NUMBER() OVER (PARTITION BY _s5.c_nationkey ORDER BY _s5.o_totalprice DESC) + THEN _s5.o_totalprice + ELSE NULL + END AS expr_11, + CASE + WHEN FLOOR(0.25 * COUNT(_s5.o_totalprice) OVER (PARTITION BY _s5.c_nationkey)) < ROW_NUMBER() OVER (PARTITION BY _s5.c_nationkey ORDER BY _s5.o_totalprice DESC) + THEN _s5.o_totalprice + ELSE NULL + END AS expr_12, + CASE + WHEN FLOOR(0.1 * COUNT(_s5.o_totalprice) OVER (PARTITION BY _s5.c_nationkey)) < ROW_NUMBER() OVER (PARTITION BY _s5.c_nationkey ORDER BY _s5.o_totalprice DESC) + THEN _s5.o_totalprice + ELSE NULL + END AS expr_13, + CASE + WHEN FLOOR(0.01 * COUNT(_s5.o_totalprice) OVER (PARTITION BY _s5.c_nationkey)) < ROW_NUMBER() OVER (PARTITION BY _s5.c_nationkey ORDER BY _s5.o_totalprice DESC) + THEN _s5.o_totalprice + ELSE NULL + END AS expr_14, + CASE + WHEN FLOOR(0.5 * COUNT(_s5.o_totalprice) OVER (PARTITION BY _s5.c_nationkey)) < ROW_NUMBER() OVER (PARTITION BY _s5.c_nationkey ORDER BY _s5.o_totalprice DESC) + THEN _s5.o_totalprice + ELSE NULL + END AS expr_15, + CASE + WHEN FLOOR(0.9 * COUNT(_s5.o_totalprice) OVER (PARTITION BY _s5.c_nationkey)) < ROW_NUMBER() OVER (PARTITION BY _s5.c_nationkey ORDER BY _s5.o_totalprice DESC) + THEN _s5.o_totalprice + ELSE NULL + END AS expr_9 + FROM _s0 AS _s0 + JOIN tpch.region AS region + ON _s0.n_regionkey = region.r_regionkey + LEFT JOIN _s5 AS _s5 + ON _s0.n_nationkey = _s5.c_nationkey +) +SELECT + ARBITRARY(r_name) AS region_name, + ARBITRARY(n_name) AS nation_name, + MIN(o_totalprice) AS orders_min, + MAX(expr_10) AS orders_1_percent, + MAX(expr_9) AS orders_10_percent, + MAX(expr_11) AS orders_25_percent, + MAX(expr_15) AS orders_median, + MAX(expr_12) AS orders_75_percent, + MAX(expr_13) AS orders_90_percent, + MAX(expr_14) AS orders_99_percent, + MAX(o_totalprice) AS orders_max +FROM _t1 +GROUP BY + n_nationkey +ORDER BY + 2 NULLS FIRST diff --git a/tests/test_sql_refsols/quarter_cum_ir_analysis_trino.sql b/tests/test_sql_refsols/quarter_cum_ir_analysis_trino.sql new file mode 100644 index 000000000..ddf8e46c7 --- /dev/null +++ b/tests/test_sql_refsols/quarter_cum_ir_analysis_trino.sql @@ -0,0 +1,83 @@ +WITH _t2 AS ( + SELECT + pr_name, + pr_release + FROM main.products + WHERE + pr_name = 'RubyCopper-Star' +), _s1 AS ( + SELECT + ca_dt + FROM main.calendar +), _s7 AS ( + SELECT + _s3.ca_dt, + COUNT(*) AS n_rows + FROM _t2 AS _t4 + JOIN _s1 AS _s3 + ON _s3.ca_dt < DATE_TRUNC('QUARTER', DATE_ADD('YEAR', 2, CAST(_t4.pr_release AS TIMESTAMP))) + AND _s3.ca_dt >= _t4.pr_release + JOIN main.devices AS devices + ON _s3.ca_dt = DATE_TRUNC('DAY', CAST(devices.de_purchase_ts AS TIMESTAMP)) + AND devices.de_product_id = 800544 + GROUP BY + 1 +), _s22 AS ( + SELECT + DATE_TRUNC('QUARTER', CAST(_s1.ca_dt AS TIMESTAMP)) AS quarter, + SUM(_s7.n_rows) AS sum_n_rows + FROM _t2 AS _t2 + JOIN _s1 AS _s1 + ON _s1.ca_dt < DATE_TRUNC('QUARTER', DATE_ADD('YEAR', 2, CAST(_t2.pr_release AS TIMESTAMP))) + AND _s1.ca_dt >= _t2.pr_release + LEFT JOIN _s7 AS _s7 + ON _s1.ca_dt = _s7.ca_dt + GROUP BY + 1 +), _s13 AS ( + SELECT DISTINCT + DATE_TRUNC('QUARTER', CAST(_s11.ca_dt AS TIMESTAMP)) AS quarter + FROM _t2 AS _t10 + JOIN _s1 AS _s11 + ON _s11.ca_dt < DATE_TRUNC('QUARTER', DATE_ADD('YEAR', 2, CAST(_t10.pr_release AS TIMESTAMP))) + AND _s11.ca_dt >= _t10.pr_release +), _s17 AS ( + SELECT + _s15.ca_dt + FROM _t2 AS _t11 + JOIN _s1 AS _s15 + ON _s15.ca_dt < DATE_TRUNC('QUARTER', DATE_ADD('YEAR', 2, CAST(_t11.pr_release AS TIMESTAMP))) + AND _s15.ca_dt >= _t11.pr_release +), _s23 AS ( + SELECT + _s13.quarter, + COUNT(DISTINCT incidents.in_device_id) AS ndistinct_in_device_id + FROM main.products AS products + JOIN main.countries AS countries + ON countries.co_name = 'CN' + CROSS JOIN _s13 AS _s13 + JOIN _s17 AS _s17 + ON _s13.quarter = DATE_TRUNC('QUARTER', CAST(_s17.ca_dt AS TIMESTAMP)) + JOIN main.incidents AS incidents + ON _s17.ca_dt = DATE_TRUNC('DAY', CAST(incidents.in_error_report_ts AS TIMESTAMP)) + AND countries.co_id = incidents.in_repair_country_id + JOIN main.devices AS devices + ON devices.de_id = incidents.in_device_id AND devices.de_product_id = 800544 + WHERE + products.pr_name = 'RubyCopper-Star' + GROUP BY + 1 +) +SELECT + _s22.quarter, + COALESCE(_s23.ndistinct_in_device_id, 0) AS n_incidents, + COALESCE(_s22.sum_n_rows, 0) AS n_sold, + ROUND( + CAST(SUM(COALESCE(_s23.ndistinct_in_device_id, 0)) OVER (ORDER BY _s22.quarter ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS DOUBLE) / SUM(COALESCE(_s22.sum_n_rows, 0)) OVER (ORDER BY _s22.quarter ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), + 2 + ) AS quarter_cum +FROM _s22 AS _s22 +LEFT JOIN _s23 AS _s23 + ON _s22.quarter = _s23.quarter +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/quarter_function_test_trino.sql b/tests/test_sql_refsols/quarter_function_test_trino.sql new file mode 100644 index 000000000..e0a49ab13 --- /dev/null +++ b/tests/test_sql_refsols/quarter_function_test_trino.sql @@ -0,0 +1,58 @@ +SELECT + 1 AS _expr0, + 1 AS _expr1, + 1 AS _expr2, + 2 AS _expr3, + 2 AS _expr4, + 2 AS _expr5, + 3 AS _expr6, + 3 AS _expr7, + 3 AS _expr8, + 4 AS _expr9, + 4 AS _expr10, + 4 AS _expr11, + 1 AS _expr12, + CAST('2023-01-01' AS DATE) AS q1_jan, + CAST('2023-01-01' AS DATE) AS q1_feb, + CAST('2023-01-01' AS DATE) AS q1_mar, + CAST('2023-04-01' AS DATE) AS q2_apr, + CAST('2023-04-01' AS DATE) AS q2_may, + CAST('2023-04-01' AS DATE) AS q2_jun, + CAST('2023-07-01' AS DATE) AS q3_jul, + CAST('2023-07-01' AS DATE) AS q3_aug, + CAST('2023-07-01' AS DATE) AS q3_sep, + CAST('2023-10-01' AS DATE) AS q4_oct, + CAST('2023-10-01' AS DATE) AS q4_nov, + CAST('2023-10-01' AS DATE) AS q4_dec, + CAST('2024-01-01' AS DATE) AS ts_q1, + CAST('2023-04-01' AS DATE) AS alias1, + CAST('2023-07-01' AS DATE) AS alias2, + CAST('2023-10-01' AS DATE) AS alias3, + CAST('2023-01-01' AS DATE) AS alias4, + CAST('2023-04-02 02:00:00' AS TIMESTAMP) AS chain1, + CAST('2023-07-01' AS DATE) AS chain2, + CAST('2023-10-01' AS DATE) AS chain3, + CAST('2023-04-15 12:30:45' AS TIMESTAMP) AS plus_1q, + CAST('2023-07-15 12:30:45' AS TIMESTAMP) AS plus_2q, + CAST('2023-10-15' AS DATE) AS plus_3q, + CAST('2022-10-15 12:30:45' AS TIMESTAMP) AS minus_1q, + CAST('2022-07-15 12:30:45' AS TIMESTAMP) AS minus_2q, + CAST('2022-04-15' AS DATE) AS minus_3q, + CAST('2023-08-15' AS DATE) AS syntax1, + CAST('2024-02-15' AS DATE) AS syntax2, + CAST('2024-08-15' AS DATE) AS syntax3, + CAST('2022-08-15' AS DATE) AS syntax4, + DATE_DIFF('QUARTER', CAST('2023-01-15' AS TIMESTAMP), CAST('2023-04-15' AS TIMESTAMP)) AS q_diff1, + DATE_DIFF('QUARTER', CAST('2023-01-15' AS TIMESTAMP), CAST('2023-07-15' AS TIMESTAMP)) AS q_diff2, + DATE_DIFF('QUARTER', CAST('2023-01-15' AS TIMESTAMP), CAST('2023-10-15' AS TIMESTAMP)) AS q_diff3, + DATE_DIFF('QUARTER', CAST('2023-01-15' AS TIMESTAMP), CAST('2023-12-31' AS TIMESTAMP)) AS q_diff4, + DATE_DIFF('QUARTER', CAST('2023-01-15' AS TIMESTAMP), CAST('2024-01-15' AS TIMESTAMP)) AS q_diff5, + DATE_DIFF('QUARTER', CAST('2023-01-15' AS TIMESTAMP), CAST('2024-04-15' AS TIMESTAMP)) AS q_diff6, + DATE_DIFF('QUARTER', CAST('2022-10-15' AS TIMESTAMP), CAST('2024-04-15' AS TIMESTAMP)) AS q_diff7, + DATE_DIFF('QUARTER', CAST('2020-01-01' AS TIMESTAMP), CAST('2025-01-01' AS TIMESTAMP)) AS q_diff8, + DATE_DIFF('QUARTER', CAST('2023-04-15' AS TIMESTAMP), CAST('2023-01-15' AS TIMESTAMP)) AS q_diff9, + DATE_DIFF('QUARTER', CAST('2024-01-15' AS TIMESTAMP), CAST('2023-01-15' AS TIMESTAMP)) AS q_diff10, + DATE_DIFF('QUARTER', CAST('2023-03-31' AS TIMESTAMP), CAST('2023-04-01' AS TIMESTAMP)) AS q_diff11, + DATE_DIFF('QUARTER', CAST('2023-12-31' AS TIMESTAMP), CAST('2024-01-01' AS TIMESTAMP)) AS q_diff12 +FROM (VALUES + (NULL)) AS _q_0(_col_0) diff --git a/tests/test_sql_refsols/rank_a_trino.sql b/tests/test_sql_refsols/rank_a_trino.sql new file mode 100644 index 000000000..4ff2c1007 --- /dev/null +++ b/tests/test_sql_refsols/rank_a_trino.sql @@ -0,0 +1,4 @@ +SELECT + c_custkey AS id, + ROW_NUMBER() OVER (ORDER BY c_acctbal DESC NULLS FIRST) AS rk +FROM tpch.customer diff --git a/tests/test_sql_refsols/rank_b_trino.sql b/tests/test_sql_refsols/rank_b_trino.sql new file mode 100644 index 000000000..14229140a --- /dev/null +++ b/tests/test_sql_refsols/rank_b_trino.sql @@ -0,0 +1,4 @@ +SELECT + o_orderkey AS order_key, + RANK() OVER (ORDER BY o_orderpriority) AS rank +FROM tpch.orders diff --git a/tests/test_sql_refsols/rank_c_trino.sql b/tests/test_sql_refsols/rank_c_trino.sql new file mode 100644 index 000000000..159e7b9ba --- /dev/null +++ b/tests/test_sql_refsols/rank_c_trino.sql @@ -0,0 +1,4 @@ +SELECT + o_orderdate AS order_date, + DENSE_RANK() OVER (ORDER BY o_orderdate) AS rank +FROM tpch.orders diff --git a/tests/test_sql_refsols/rank_nations_by_region_trino.sql b/tests/test_sql_refsols/rank_nations_by_region_trino.sql new file mode 100644 index 000000000..25635cf4d --- /dev/null +++ b/tests/test_sql_refsols/rank_nations_by_region_trino.sql @@ -0,0 +1,6 @@ +SELECT + nation.n_name AS name, + RANK() OVER (ORDER BY region.r_name) AS rank +FROM tpch.nation AS nation +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey diff --git a/tests/test_sql_refsols/rank_nations_per_region_by_customers_trino.sql b/tests/test_sql_refsols/rank_nations_per_region_by_customers_trino.sql new file mode 100644 index 000000000..5df527d81 --- /dev/null +++ b/tests/test_sql_refsols/rank_nations_per_region_by_customers_trino.sql @@ -0,0 +1,19 @@ +WITH _s1 AS ( + SELECT + c_nationkey, + COUNT(*) AS n_rows + FROM tpch.customer + GROUP BY + 1 +) +SELECT + nation.n_name AS nation_name, + ROW_NUMBER() OVER (PARTITION BY nation.n_regionkey ORDER BY _s1.n_rows DESC NULLS FIRST, region.r_name) AS rank +FROM tpch.nation AS nation +JOIN _s1 AS _s1 + ON _s1.c_nationkey = nation.n_nationkey +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey +ORDER BY + 2 NULLS FIRST +LIMIT 5 diff --git a/tests/test_sql_refsols/rank_parts_per_supplier_region_by_size_trino.sql b/tests/test_sql_refsols/rank_parts_per_supplier_region_by_size_trino.sql new file mode 100644 index 000000000..1ed06a5a9 --- /dev/null +++ b/tests/test_sql_refsols/rank_parts_per_supplier_region_by_size_trino.sql @@ -0,0 +1,17 @@ +SELECT + part.p_partkey AS key, + region.r_name AS region, + DENSE_RANK() OVER (PARTITION BY nation.n_regionkey ORDER BY part.p_size DESC NULLS FIRST, part.p_container DESC NULLS FIRST, part.p_type DESC NULLS FIRST) AS rank +FROM tpch.region AS region +JOIN tpch.nation AS nation + ON nation.n_regionkey = region.r_regionkey +JOIN tpch.supplier AS supplier + ON nation.n_nationkey = supplier.s_nationkey +JOIN tpch.partsupp AS partsupp + ON partsupp.ps_suppkey = supplier.s_suppkey +JOIN tpch.part AS part + ON part.p_partkey = partsupp.ps_partkey +ORDER BY + 1 NULLS FIRST, + 2 NULLS FIRST +LIMIT 15 diff --git a/tests/test_sql_refsols/rank_with_filters_a_trino.sql b/tests/test_sql_refsols/rank_with_filters_a_trino.sql new file mode 100644 index 000000000..45fcb5264 --- /dev/null +++ b/tests/test_sql_refsols/rank_with_filters_a_trino.sql @@ -0,0 +1,12 @@ +WITH _t0 AS ( + SELECT + ROW_NUMBER() OVER (ORDER BY c_acctbal DESC NULLS FIRST, c_name) AS r, + c_name + FROM tpch.customer +) +SELECT + c_name AS n, + r +FROM _t0 +WHERE + c_name LIKE '%0' AND r <= 30 diff --git a/tests/test_sql_refsols/rank_with_filters_b_trino.sql b/tests/test_sql_refsols/rank_with_filters_b_trino.sql new file mode 100644 index 000000000..45fcb5264 --- /dev/null +++ b/tests/test_sql_refsols/rank_with_filters_b_trino.sql @@ -0,0 +1,12 @@ +WITH _t0 AS ( + SELECT + ROW_NUMBER() OVER (ORDER BY c_acctbal DESC NULLS FIRST, c_name) AS r, + c_name + FROM tpch.customer +) +SELECT + c_name AS n, + r +FROM _t0 +WHERE + c_name LIKE '%0' AND r <= 30 diff --git a/tests/test_sql_refsols/rank_with_filters_c_trino.sql b/tests/test_sql_refsols/rank_with_filters_c_trino.sql new file mode 100644 index 000000000..161cc3c15 --- /dev/null +++ b/tests/test_sql_refsols/rank_with_filters_c_trino.sql @@ -0,0 +1,22 @@ +WITH _s0 AS ( + SELECT DISTINCT + p_size + FROM tpch.part + ORDER BY + 1 DESC + LIMIT 5 +), _t AS ( + SELECT + part.p_size AS size_1, + part.p_name, + ROW_NUMBER() OVER (PARTITION BY _s0.p_size ORDER BY part.p_retailprice DESC NULLS FIRST, part.p_partkey) AS _w + FROM _s0 AS _s0 + JOIN tpch.part AS part + ON _s0.p_size = part.p_size +) +SELECT + p_name AS pname, + size_1 AS psize +FROM _t +WHERE + _w = 1 diff --git a/tests/test_sql_refsols/redundant_has_nested_trino.sql b/tests/test_sql_refsols/redundant_has_nested_trino.sql new file mode 100644 index 000000000..666260dff --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_nested_trino.sql @@ -0,0 +1,7 @@ +SELECT + COUNT(*) AS n +FROM tpch.supplier AS supplier +JOIN tpch.nation AS nation + ON nation.n_nationkey = supplier.s_nationkey +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'AFRICA' diff --git a/tests/test_sql_refsols/redundant_has_not_on_singular_trino.sql b/tests/test_sql_refsols/redundant_has_not_on_singular_trino.sql new file mode 100644 index 000000000..431b9ed82 --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_not_on_singular_trino.sql @@ -0,0 +1,16 @@ +WITH _u_0 AS ( + SELECT + nation.n_nationkey AS _u_1 + FROM tpch.nation AS nation + JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'AFRICA' + GROUP BY + 1 +) +SELECT + COUNT(*) AS n +FROM tpch.supplier AS supplier +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = supplier.s_nationkey +WHERE + _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/redundant_has_on_plural_lineitems_trino.sql b/tests/test_sql_refsols/redundant_has_on_plural_lineitems_trino.sql new file mode 100644 index 000000000..80a8e58a1 --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_on_plural_lineitems_trino.sql @@ -0,0 +1,16 @@ +WITH _u_0 AS ( + SELECT + l_orderkey AS _u_1 + FROM tpch.lineitem + WHERE + l_quantity > 49 + GROUP BY + 1 +) +SELECT + COUNT(*) AS n +FROM tpch.orders AS orders +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = orders.o_orderkey +WHERE + NOT _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/redundant_has_on_plural_trino.sql b/tests/test_sql_refsols/redundant_has_on_plural_trino.sql new file mode 100644 index 000000000..921bad30a --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_on_plural_trino.sql @@ -0,0 +1,16 @@ +WITH _u_0 AS ( + SELECT + o_custkey AS _u_1 + FROM tpch.orders + WHERE + o_totalprice > 400000 + GROUP BY + 1 +) +SELECT + COUNT(*) AS n +FROM tpch.customer AS customer +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = customer.c_custkey +WHERE + NOT _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/redundant_has_singular_chain_trino.sql b/tests/test_sql_refsols/redundant_has_singular_chain_trino.sql new file mode 100644 index 000000000..bc66ae6c2 --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_singular_chain_trino.sql @@ -0,0 +1,7 @@ +SELECT + COUNT(*) AS n +FROM tpch.supplier AS supplier +JOIN tpch.nation AS nation + ON nation.n_nationkey = supplier.s_nationkey +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'EUROPE' diff --git a/tests/test_sql_refsols/redundant_has_trino.sql b/tests/test_sql_refsols/redundant_has_trino.sql new file mode 100644 index 000000000..5a8dfef8a --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_trino.sql @@ -0,0 +1,7 @@ +SELECT + COUNT(*) AS n +FROM tpch.customer AS customer +JOIN tpch.nation AS nation + ON customer.c_nationkey = nation.n_nationkey +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'ASIA' diff --git a/tests/test_sql_refsols/region_acctbal_breakdown_trino.sql b/tests/test_sql_refsols/region_acctbal_breakdown_trino.sql new file mode 100644 index 000000000..999a9df2b --- /dev/null +++ b/tests/test_sql_refsols/region_acctbal_breakdown_trino.sql @@ -0,0 +1,70 @@ +WITH _t1 AS ( + SELECT + customer.c_acctbal, + nation.n_regionkey, + CASE + WHEN ABS( + ( + ROW_NUMBER() OVER (PARTITION BY nation.n_regionkey ORDER BY CASE WHEN customer.c_acctbal >= 0 THEN customer.c_acctbal ELSE NULL END DESC) - 1.0 + ) - ( + CAST(( + COUNT(CASE WHEN customer.c_acctbal >= 0 THEN customer.c_acctbal ELSE NULL END) OVER (PARTITION BY nation.n_regionkey) - 1.0 + ) AS DOUBLE) / 2.0 + ) + ) < 1.0 + THEN CASE WHEN customer.c_acctbal >= 0 THEN customer.c_acctbal ELSE NULL END + ELSE NULL + END AS expr_5, + CASE + WHEN ABS( + ( + ROW_NUMBER() OVER (PARTITION BY nation.n_regionkey ORDER BY customer.c_acctbal DESC) - 1.0 + ) - ( + CAST(( + COUNT(customer.c_acctbal) OVER (PARTITION BY nation.n_regionkey) - 1.0 + ) AS DOUBLE) / 2.0 + ) + ) < 1.0 + THEN customer.c_acctbal + ELSE NULL + END AS expr_6, + CASE + WHEN ABS( + ( + ROW_NUMBER() OVER (PARTITION BY nation.n_regionkey ORDER BY CASE WHEN customer.c_acctbal < 0 THEN customer.c_acctbal ELSE NULL END DESC) - 1.0 + ) - ( + CAST(( + COUNT(CASE WHEN customer.c_acctbal < 0 THEN customer.c_acctbal ELSE NULL END) OVER (PARTITION BY nation.n_regionkey) - 1.0 + ) AS DOUBLE) / 2.0 + ) + ) < 1.0 + THEN CASE WHEN customer.c_acctbal < 0 THEN customer.c_acctbal ELSE NULL END + ELSE NULL + END AS expr_7 + FROM tpch.nation AS nation + JOIN tpch.customer AS customer + ON customer.c_nationkey = nation.n_nationkey +), _s3 AS ( + SELECT + n_regionkey, + AVG(expr_5) AS avg_expr_5, + AVG(expr_6) AS avg_expr_6, + AVG(expr_7) AS avg_expr_7, + COUNT(CASE WHEN c_acctbal < 0 THEN c_acctbal ELSE NULL END) AS count_negative_acctbal, + COUNT(CASE WHEN c_acctbal >= 0 THEN c_acctbal ELSE NULL END) AS count_non_negative_acctbal + FROM _t1 + GROUP BY + 1 +) +SELECT + region.r_name AS region_name, + _s3.count_negative_acctbal AS n_red_acctbal, + _s3.count_non_negative_acctbal AS n_black_acctbal, + _s3.avg_expr_7 AS median_red_acctbal, + _s3.avg_expr_5 AS median_black_acctbal, + _s3.avg_expr_6 AS median_overall_acctbal +FROM tpch.region AS region +JOIN _s3 AS _s3 + ON _s3.n_regionkey = region.r_regionkey +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/region_nation_window_aggs_trino.sql b/tests/test_sql_refsols/region_nation_window_aggs_trino.sql new file mode 100644 index 000000000..9093c8cb9 --- /dev/null +++ b/tests/test_sql_refsols/region_nation_window_aggs_trino.sql @@ -0,0 +1,12 @@ +SELECT + n_name AS nation_name, + SUM(n_nationkey) OVER (PARTITION BY n_regionkey) AS key_sum, + AVG(CAST(n_nationkey AS DOUBLE)) OVER (PARTITION BY n_regionkey) AS key_avg, + COUNT(CASE WHEN LENGTH(n_comment) < 75 THEN n_comment ELSE NULL END) OVER (PARTITION BY n_regionkey) AS n_short_comment, + COUNT(*) OVER (PARTITION BY n_regionkey) AS n_nations +FROM tpch.nation +WHERE + NOT SUBSTRING(n_name, 1, 1) IN ('A', 'E', 'I', 'O', 'U') +ORDER BY + n_regionkey NULLS FIRST, + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/region_orders_from_nations_richest_trino.sql b/tests/test_sql_refsols/region_orders_from_nations_richest_trino.sql new file mode 100644 index 000000000..10d8e291c --- /dev/null +++ b/tests/test_sql_refsols/region_orders_from_nations_richest_trino.sql @@ -0,0 +1,35 @@ +WITH _t AS ( + SELECT + customer.c_custkey, + nation.n_regionkey, + ROW_NUMBER() OVER (PARTITION BY customer.c_nationkey ORDER BY customer.c_acctbal DESC NULLS FIRST, customer.c_name) AS _w + FROM tpch.nation AS nation + JOIN tpch.customer AS customer + ON customer.c_nationkey = nation.n_nationkey +), _s3 AS ( + SELECT + o_custkey, + COUNT(*) AS n_rows + FROM tpch.orders + GROUP BY + 1 +), _s5 AS ( + SELECT + _t.n_regionkey, + SUM(_s3.n_rows) AS sum_n_rows + FROM _t AS _t + JOIN _s3 AS _s3 + ON _s3.o_custkey = _t.c_custkey + WHERE + _t._w = 1 + GROUP BY + 1 +) +SELECT + region.r_name AS region_name, + COALESCE(_s5.sum_n_rows, 0) AS n_orders +FROM tpch.region AS region +LEFT JOIN _s5 AS _s5 + ON _s5.n_regionkey = region.r_regionkey +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/regional_first_order_best_line_part_trino.sql b/tests/test_sql_refsols/regional_first_order_best_line_part_trino.sql new file mode 100644 index 000000000..61a377436 --- /dev/null +++ b/tests/test_sql_refsols/regional_first_order_best_line_part_trino.sql @@ -0,0 +1,40 @@ +WITH _t AS ( + SELECT + nation.n_regionkey, + orders.o_orderkey, + ROW_NUMBER() OVER (PARTITION BY nation.n_regionkey ORDER BY orders.o_orderdate, orders.o_orderkey) AS _w + FROM tpch.nation AS nation + JOIN tpch.customer AS customer + ON customer.c_nationkey = nation.n_nationkey + JOIN tpch.orders AS orders + ON YEAR(CAST(orders.o_orderdate AS TIMESTAMP)) = 1992 + AND customer.c_custkey = orders.o_custkey +), _t_2 AS ( + SELECT + lineitem.l_partkey, + _t.n_regionkey, + ROW_NUMBER() OVER (PARTITION BY _t.n_regionkey ORDER BY lineitem.l_quantity DESC NULLS FIRST, lineitem.l_linenumber) AS _w + FROM _t AS _t + JOIN tpch.lineitem AS lineitem + ON YEAR(CAST(lineitem.l_shipdate AS TIMESTAMP)) = 1992 + AND _t.o_orderkey = lineitem.l_orderkey + WHERE + _t._w = 1 +), _s9 AS ( + SELECT + _t.n_regionkey, + part.p_name + FROM _t_2 AS _t + JOIN tpch.part AS part + ON _t.l_partkey = part.p_partkey + WHERE + _t._w = 1 +) +SELECT + region.r_name AS region_name, + _s9.p_name AS part_name +FROM tpch.region AS region +LEFT JOIN _s9 AS _s9 + ON _s9.n_regionkey = region.r_regionkey +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/regional_suppliers_percentile_trino.sql b/tests/test_sql_refsols/regional_suppliers_percentile_trino.sql new file mode 100644 index 000000000..f451b4cf7 --- /dev/null +++ b/tests/test_sql_refsols/regional_suppliers_percentile_trino.sql @@ -0,0 +1,22 @@ +WITH _s3 AS ( + SELECT + ps_suppkey, + COUNT(*) AS n_rows + FROM tpch.partsupp + GROUP BY + 1 +), _t AS ( + SELECT + supplier.s_name, + NTILE(1000) OVER (PARTITION BY nation.n_regionkey ORDER BY _s3.n_rows, supplier.s_name) AS _w + FROM tpch.nation AS nation + JOIN tpch.supplier AS supplier + ON nation.n_nationkey = supplier.s_nationkey + JOIN _s3 AS _s3 + ON _s3.ps_suppkey = supplier.s_suppkey +) +SELECT + s_name AS name +FROM _t +WHERE + _w = 1000 diff --git a/tests/test_sql_refsols/richest_customer_key_per_region_trino.sql b/tests/test_sql_refsols/richest_customer_key_per_region_trino.sql new file mode 100644 index 000000000..59e462b7b --- /dev/null +++ b/tests/test_sql_refsols/richest_customer_key_per_region_trino.sql @@ -0,0 +1,13 @@ +WITH _t AS ( + SELECT + customer.c_custkey, + ROW_NUMBER() OVER (PARTITION BY nation.n_regionkey ORDER BY customer.c_acctbal DESC NULLS FIRST) AS _w + FROM tpch.nation AS nation + JOIN tpch.customer AS customer + ON customer.c_nationkey = nation.n_nationkey +) +SELECT + c_custkey AS key +FROM _t +WHERE + _w = 1 diff --git a/tests/test_sql_refsols/richest_customer_per_region_trino.sql b/tests/test_sql_refsols/richest_customer_per_region_trino.sql new file mode 100644 index 000000000..2c1199710 --- /dev/null +++ b/tests/test_sql_refsols/richest_customer_per_region_trino.sql @@ -0,0 +1,21 @@ +WITH _t AS ( + SELECT + customer.c_acctbal, + customer.c_name, + nation.n_name, + region.r_name, + ROW_NUMBER() OVER (PARTITION BY nation.n_regionkey ORDER BY customer.c_acctbal DESC NULLS FIRST, customer.c_name) AS _w + FROM tpch.region AS region + JOIN tpch.nation AS nation + ON nation.n_regionkey = region.r_regionkey + JOIN tpch.customer AS customer + ON customer.c_nationkey = nation.n_nationkey +) +SELECT + r_name AS region_name, + n_name AS nation_name, + c_name AS customer_name, + c_acctbal AS balance +FROM _t +WHERE + _w = 1 diff --git a/tests/test_sql_refsols/simple_cross_10_trino.sql b/tests/test_sql_refsols/simple_cross_10_trino.sql new file mode 100644 index 000000000..211b59c14 --- /dev/null +++ b/tests/test_sql_refsols/simple_cross_10_trino.sql @@ -0,0 +1,26 @@ +WITH _s4 AS ( + SELECT + r_name, + r_regionkey + FROM tpch.region +), _s5 AS ( + SELECT + _s0.r_regionkey, + COUNT(*) AS n_rows + FROM _s4 AS _s0 + JOIN _s4 AS _s1 + ON _s0.r_name <> _s1.r_name + JOIN tpch.nation AS nation + ON SUBSTRING(_s0.r_name, 1, 1) = SUBSTRING(nation.n_name, 1, 1) + AND _s1.r_regionkey = nation.n_regionkey + GROUP BY + 1 +) +SELECT + _s4.r_name AS region_name, + COALESCE(_s5.n_rows, 0) AS n_other_nations +FROM _s4 AS _s4 +LEFT JOIN _s5 AS _s5 + ON _s4.r_regionkey = _s5.r_regionkey +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/simple_cross_11_trino.sql b/tests/test_sql_refsols/simple_cross_11_trino.sql new file mode 100644 index 000000000..5ee3e6ed2 --- /dev/null +++ b/tests/test_sql_refsols/simple_cross_11_trino.sql @@ -0,0 +1,14 @@ +WITH _s0 AS ( + SELECT + o_orderdate + FROM tpch.orders +), _s1 AS ( + SELECT + MIN(o_orderdate) AS min_o_orderdate + FROM _s0 +) +SELECT + COUNT(*) AS n +FROM _s0 AS _s0 +JOIN _s1 AS _s1 + ON _s0.o_orderdate = _s1.min_o_orderdate diff --git a/tests/test_sql_refsols/simple_cross_12_trino.sql b/tests/test_sql_refsols/simple_cross_12_trino.sql new file mode 100644 index 000000000..042c5fe86 --- /dev/null +++ b/tests/test_sql_refsols/simple_cross_12_trino.sql @@ -0,0 +1,17 @@ +WITH _s0 AS ( + SELECT DISTINCT + o_orderpriority + FROM tpch.orders +), _s1 AS ( + SELECT DISTINCT + c_mktsegment + FROM tpch.customer +) +SELECT + _s0.o_orderpriority AS order_priority, + _s1.c_mktsegment AS market_segment +FROM _s0 AS _s0 +CROSS JOIN _s1 AS _s1 +ORDER BY + 1 NULLS FIRST, + 2 NULLS FIRST diff --git a/tests/test_sql_refsols/simple_cross_13_trino.sql b/tests/test_sql_refsols/simple_cross_13_trino.sql new file mode 100644 index 000000000..592b51f25 --- /dev/null +++ b/tests/test_sql_refsols/simple_cross_13_trino.sql @@ -0,0 +1,10 @@ +SELECT + 'foo' AS a, + 'bar' AS b, + 'fizz' AS c, + 'buzz' AS d, + 'foobar' AS e, + 'fizzbuzz' AS f, + 'yay' AS g +FROM (VALUES + (NULL)) AS _q_0(_col_0) diff --git a/tests/test_sql_refsols/simple_cross_14_trino.sql b/tests/test_sql_refsols/simple_cross_14_trino.sql new file mode 100644 index 000000000..8faae3dc7 --- /dev/null +++ b/tests/test_sql_refsols/simple_cross_14_trino.sql @@ -0,0 +1,19 @@ +WITH _s1 AS ( + SELECT + n_regionkey, + COUNT(*) AS n_rows + FROM tpch.nation + WHERE + SUBSTRING(n_name, 1, 1) IN ('A', 'B', 'C') + GROUP BY + 1 +) +SELECT + region.r_name AS region_name, + 'foo' AS x, + COALESCE(_s1.n_rows, 0) AS n +FROM tpch.region AS region +LEFT JOIN _s1 AS _s1 + ON _s1.n_regionkey = region.r_regionkey +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/simple_cross_15_trino.sql b/tests/test_sql_refsols/simple_cross_15_trino.sql new file mode 100644 index 000000000..6bfdf9979 --- /dev/null +++ b/tests/test_sql_refsols/simple_cross_15_trino.sql @@ -0,0 +1,35 @@ +WITH _t1 AS ( + SELECT + r_name + FROM tpch.region +), _s0 AS ( + SELECT DISTINCT + IF(r_name LIKE '%A%', 'A', '*') AS a + FROM _t1 +), _s1 AS ( + SELECT DISTINCT + IF(r_name LIKE '%E%', 'E', '*') AS e + FROM _t1 +), _s3 AS ( + SELECT DISTINCT + IF(r_name LIKE '%I%', 'I', '*') AS i + FROM _t1 +), _s5 AS ( + SELECT DISTINCT + IF(r_name LIKE '%O%', 'O', '*') AS o + FROM _t1 +) +SELECT + _s0.a, + _s1.e, + _s3.i, + _s5.o +FROM _s0 AS _s0 +CROSS JOIN _s1 AS _s1 +CROSS JOIN _s3 AS _s3 +CROSS JOIN _s5 AS _s5 +ORDER BY + 1 NULLS FIRST, + 2 NULLS FIRST, + 3 NULLS FIRST, + 4 NULLS FIRST diff --git a/tests/test_sql_refsols/simple_cross_16_trino.sql b/tests/test_sql_refsols/simple_cross_16_trino.sql new file mode 100644 index 000000000..c43455771 --- /dev/null +++ b/tests/test_sql_refsols/simple_cross_16_trino.sql @@ -0,0 +1,38 @@ +WITH _s0 AS ( + SELECT + c_acctbal + FROM tpch.customer +), _s1 AS ( + SELECT + MIN(c_acctbal) AS min_c_acctbal + FROM _s0 +), _s4 AS ( + SELECT + COUNT(*) AS n_rows + FROM _s0 AS _s0 + JOIN _s1 AS _s1 + ON _s0.c_acctbal <= ( + _s1.min_c_acctbal + 10.0 + ) +), _s2 AS ( + SELECT + s_acctbal + FROM tpch.supplier +), _s3 AS ( + SELECT + MAX(s_acctbal) AS max_s_acctbal + FROM _s2 +), _s5 AS ( + SELECT + COUNT(*) AS n_rows + FROM _s2 AS _s2 + JOIN _s3 AS _s3 + ON _s2.s_acctbal >= ( + _s3.max_s_acctbal - 10.0 + ) +) +SELECT + _s4.n_rows AS n1, + _s5.n_rows AS n2 +FROM _s4 AS _s4 +CROSS JOIN _s5 AS _s5 diff --git a/tests/test_sql_refsols/simple_cross_1_trino.sql b/tests/test_sql_refsols/simple_cross_1_trino.sql new file mode 100644 index 000000000..c1dc8969a --- /dev/null +++ b/tests/test_sql_refsols/simple_cross_1_trino.sql @@ -0,0 +1,13 @@ +WITH _s0 AS ( + SELECT + r_name + FROM tpch.region +) +SELECT + _s0.r_name AS r1, + _s1.r_name AS r2 +FROM _s0 AS _s0 +CROSS JOIN _s0 AS _s1 +ORDER BY + 1 NULLS FIRST, + 2 NULLS FIRST diff --git a/tests/test_sql_refsols/simple_cross_2_trino.sql b/tests/test_sql_refsols/simple_cross_2_trino.sql new file mode 100644 index 000000000..384a16c64 --- /dev/null +++ b/tests/test_sql_refsols/simple_cross_2_trino.sql @@ -0,0 +1,14 @@ +WITH _s0 AS ( + SELECT + r_name + FROM tpch.region +) +SELECT + _s0.r_name AS r1, + _s1.r_name AS r2 +FROM _s0 AS _s0 +JOIN _s0 AS _s1 + ON _s0.r_name <> _s1.r_name +ORDER BY + 1 NULLS FIRST, + 2 NULLS FIRST diff --git a/tests/test_sql_refsols/simple_cross_3_trino.sql b/tests/test_sql_refsols/simple_cross_3_trino.sql new file mode 100644 index 000000000..91cdf22bf --- /dev/null +++ b/tests/test_sql_refsols/simple_cross_3_trino.sql @@ -0,0 +1,36 @@ +WITH _s1 AS ( + SELECT + n_name, + n_nationkey, + n_regionkey + FROM tpch.nation +) +SELECT + ARBITRARY(_s1.n_name) AS supplier_nation, + ARBITRARY(_s5.n_name) AS customer_nation, + COUNT(*) AS nation_combinations +FROM tpch.region AS region +JOIN _s1 AS _s1 + ON _s1.n_regionkey = region.r_regionkey +JOIN tpch.region AS region_2 + ON region_2.r_name = 'AMERICA' +JOIN _s1 AS _s5 + ON _s5.n_regionkey = region_2.r_regionkey +JOIN tpch.customer AS customer + ON _s5.n_nationkey = customer.c_nationkey AND customer.c_acctbal < 0 +JOIN tpch.orders AS orders + ON MONTH(CAST(orders.o_orderdate AS TIMESTAMP)) = 4 + AND YEAR(CAST(orders.o_orderdate AS TIMESTAMP)) = 1992 + AND customer.c_custkey = orders.o_custkey +JOIN tpch.lineitem AS lineitem + ON lineitem.l_orderkey = orders.o_orderkey AND lineitem.l_shipmode = 'SHIP' +JOIN tpch.supplier AS supplier + ON _s1.n_nationkey = supplier.s_nationkey + AND lineitem.l_suppkey = supplier.s_suppkey +WHERE + region.r_name = 'ASIA' +GROUP BY + _s1.n_nationkey, + _s5.n_nationkey, + region.r_regionkey, + region_2.r_regionkey diff --git a/tests/test_sql_refsols/simple_cross_4_trino.sql b/tests/test_sql_refsols/simple_cross_4_trino.sql new file mode 100644 index 000000000..60d8e4c90 --- /dev/null +++ b/tests/test_sql_refsols/simple_cross_4_trino.sql @@ -0,0 +1,24 @@ +WITH _s2 AS ( + SELECT + r_name, + r_regionkey + FROM tpch.region +), _s3 AS ( + SELECT + _s0.r_regionkey, + COUNT(*) AS n_rows + FROM _s2 AS _s0 + JOIN tpch.region AS region + ON SUBSTRING(_s0.r_name, 1, 1) = SUBSTRING(region.r_name, 1, 1) + AND _s0.r_name <> region.r_name + GROUP BY + 1 +) +SELECT + _s2.r_name AS region_name, + COALESCE(_s3.n_rows, 0) AS n_other_regions +FROM _s2 AS _s2 +LEFT JOIN _s3 AS _s3 + ON _s2.r_regionkey = _s3.r_regionkey +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/simple_cross_5_trino.sql b/tests/test_sql_refsols/simple_cross_5_trino.sql new file mode 100644 index 000000000..98549975e --- /dev/null +++ b/tests/test_sql_refsols/simple_cross_5_trino.sql @@ -0,0 +1,79 @@ +WITH _t1 AS ( + SELECT + p_container, + p_size + FROM tpch.part + WHERE + STARTS_WITH(p_container, 'LG') +), _s6 AS ( + SELECT DISTINCT + p_size + FROM _t1 + ORDER BY + 1 NULLS FIRST + LIMIT 10 +), _s0 AS ( + SELECT DISTINCT + p_size + FROM _t1 + ORDER BY + 1 NULLS FIRST + LIMIT 10 +), _t4 AS ( + SELECT + orders.o_orderpriority, + _s0.p_size, + SUM(lineitem.l_quantity) AS sum_l_quantity + FROM _s0 AS _s0 + JOIN tpch.orders AS orders + ON MONTH(CAST(orders.o_orderdate AS TIMESTAMP)) = 1 + AND YEAR(CAST(orders.o_orderdate AS TIMESTAMP)) = 1998 + JOIN tpch.lineitem AS lineitem + ON lineitem.l_discount = 0 + AND lineitem.l_orderkey = orders.o_orderkey + AND lineitem.l_shipmode = 'SHIP' + AND lineitem.l_tax = 0 + JOIN tpch.part AS part + ON STARTS_WITH(part.p_container, 'LG') + AND _s0.p_size = part.p_size + AND lineitem.l_partkey = part.p_partkey + GROUP BY + 1, + 2 +), _t AS ( + SELECT + o_orderpriority, + p_size, + sum_l_quantity, + ROW_NUMBER() OVER (PARTITION BY p_size ORDER BY CASE + WHEN ( + NOT sum_l_quantity IS NULL AND sum_l_quantity > 0 + ) + THEN COALESCE(sum_l_quantity, 0) + ELSE NULL + END DESC NULLS FIRST) AS _w + FROM _t4 +), _s7 AS ( + SELECT + CASE + WHEN ( + NOT sum_l_quantity IS NULL AND sum_l_quantity > 0 + ) + THEN COALESCE(sum_l_quantity, 0) + ELSE NULL + END AS total_qty, + o_orderpriority, + p_size + FROM _t + WHERE + _w = 1 +) +SELECT + _s6.p_size AS part_size, + _s7.o_orderpriority AS best_order_priority, + _s7.total_qty AS best_order_priority_qty +FROM _s6 AS _s6 +LEFT JOIN _s7 AS _s7 + ON _s6.p_size = _s7.p_size +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/simple_cross_6_trino.sql b/tests/test_sql_refsols/simple_cross_6_trino.sql new file mode 100644 index 000000000..ec238d24b --- /dev/null +++ b/tests/test_sql_refsols/simple_cross_6_trino.sql @@ -0,0 +1,17 @@ +WITH _t1 AS ( + SELECT + c_acctbal, + c_custkey, + c_mktsegment, + c_nationkey + FROM tpch.customer + WHERE + c_acctbal > 9990 +) +SELECT + COUNT(*) AS n_pairs +FROM _t1 AS _t1 +JOIN _t1 AS _t2 + ON _t1.c_custkey < _t2.c_custkey + AND _t1.c_mktsegment = _t2.c_mktsegment + AND _t1.c_nationkey = _t2.c_nationkey diff --git a/tests/test_sql_refsols/simple_cross_7_trino.sql b/tests/test_sql_refsols/simple_cross_7_trino.sql new file mode 100644 index 000000000..fdf6cddde --- /dev/null +++ b/tests/test_sql_refsols/simple_cross_7_trino.sql @@ -0,0 +1,32 @@ +WITH _s3 AS ( + SELECT + part.p_partkey, + COUNT(*) AS n_rows + FROM tpch.part AS part + JOIN tpch.part AS part_2 + ON ABS(part_2.p_retailprice - part.p_retailprice) < 5.0 + AND part.p_brand = part_2.p_brand + AND part.p_mfgr = part_2.p_mfgr + AND part.p_partkey < part_2.p_partkey + AND part_2.p_name LIKE '%tomato%' + WHERE + part.p_brand = 'Brand#35' + AND part.p_mfgr = 'Manufacturer#3' + AND part.p_name LIKE '%tomato%' + GROUP BY + 1 +) +SELECT + part.p_partkey AS original_part_key, + COALESCE(_s3.n_rows, 0) AS n_other_parts +FROM tpch.part AS part +LEFT JOIN _s3 AS _s3 + ON _s3.p_partkey = part.p_partkey +WHERE + part.p_brand = 'Brand#35' + AND part.p_mfgr = 'Manufacturer#3' + AND part.p_name LIKE '%tomato%' +ORDER BY + 2 DESC, + 1 NULLS FIRST +LIMIT 5 diff --git a/tests/test_sql_refsols/simple_cross_8_trino.sql b/tests/test_sql_refsols/simple_cross_8_trino.sql new file mode 100644 index 000000000..d162cca46 --- /dev/null +++ b/tests/test_sql_refsols/simple_cross_8_trino.sql @@ -0,0 +1,43 @@ +WITH _s0 AS ( + SELECT + r_name, + r_regionkey + FROM tpch.region +), _s3 AS ( + SELECT + n_nationkey, + n_regionkey + FROM tpch.nation +), _s15 AS ( + SELECT + _s13.r_name, + supplier.s_suppkey + FROM tpch.supplier AS supplier + JOIN _s3 AS _s11 + ON _s11.n_nationkey = supplier.s_nationkey + JOIN _s0 AS _s13 + ON _s11.n_regionkey = _s13.r_regionkey + WHERE + supplier.s_acctbal < 0 +) +SELECT + ARBITRARY(_s0.r_name) AS supplier_region, + ARBITRARY(_s1.r_name) AS customer_region, + COUNT(*) AS region_combinations +FROM _s0 AS _s0 +CROSS JOIN _s0 AS _s1 +JOIN _s3 AS _s3 + ON _s1.r_regionkey = _s3.n_regionkey +JOIN tpch.customer AS customer + ON _s3.n_nationkey = customer.c_nationkey AND customer.c_mktsegment = 'AUTOMOBILE' +JOIN tpch.orders AS orders + ON customer.c_custkey = orders.o_custkey AND orders.o_clerk = 'Clerk#000000007' +JOIN tpch.lineitem AS lineitem + ON MONTH(CAST(lineitem.l_shipdate AS TIMESTAMP)) = 3 + AND YEAR(CAST(lineitem.l_shipdate AS TIMESTAMP)) = 1998 + AND lineitem.l_orderkey = orders.o_orderkey +JOIN _s15 AS _s15 + ON _s0.r_name = _s15.r_name AND _s15.s_suppkey = lineitem.l_suppkey +GROUP BY + _s0.r_regionkey, + _s1.r_regionkey diff --git a/tests/test_sql_refsols/simple_cross_9_trino.sql b/tests/test_sql_refsols/simple_cross_9_trino.sql new file mode 100644 index 000000000..89e18b38c --- /dev/null +++ b/tests/test_sql_refsols/simple_cross_9_trino.sql @@ -0,0 +1,25 @@ +WITH _s0 AS ( + SELECT + r_name, + r_regionkey + FROM tpch.region +), _s1 AS ( + SELECT + n_name, + n_regionkey + FROM tpch.nation +) +SELECT + _s1.n_name AS n1, + _s5.n_name AS n2 +FROM _s0 AS _s0 +JOIN _s1 AS _s1 + ON _s0.r_regionkey = _s1.n_regionkey +JOIN _s0 AS _s3 + ON _s0.r_name = _s3.r_name +JOIN _s1 AS _s5 + ON _s1.n_name <> _s5.n_name AND _s3.r_regionkey = _s5.n_regionkey +ORDER BY + 1 NULLS FIRST, + 2 NULLS FIRST +LIMIT 10 diff --git a/tests/test_sql_refsols/simple_dataframe_collection_1_trino.sql b/tests/test_sql_refsols/simple_dataframe_collection_1_trino.sql new file mode 100644 index 000000000..f8f2781fc --- /dev/null +++ b/tests/test_sql_refsols/simple_dataframe_collection_1_trino.sql @@ -0,0 +1,12 @@ +SELECT + rainbow.idx, + rainbow.color +FROM (VALUES + (0, 'red'), + (1, 'orange'), + (2, 'yellow'), + (3, 'green'), + (4, 'blue'), + (5, 'indigo'), + (6, 'violet'), + (7, NULL)) AS rainbow(idx, color) diff --git a/tests/test_sql_refsols/simple_dataframe_collection_2_trino.sql b/tests/test_sql_refsols/simple_dataframe_collection_2_trino.sql new file mode 100644 index 000000000..b07077509 --- /dev/null +++ b/tests/test_sql_refsols/simple_dataframe_collection_2_trino.sql @@ -0,0 +1,8 @@ +SELECT + users.signup_date, + users.user_id +FROM (VALUES + (CAST('2024-01-10 00:00:00' AS TIMESTAMP), 1), + (CAST('2024-01-12 00:00:00' AS TIMESTAMP), 2), + (CAST('2024-02-01 00:00:00' AS TIMESTAMP), 3), + (CAST('2024-02-01 00:00:00' AS TIMESTAMP), 4)) AS users(signup_date, user_id) diff --git a/tests/test_sql_refsols/simple_dataframe_collection_3_trino.sql b/tests/test_sql_refsols/simple_dataframe_collection_3_trino.sql new file mode 100644 index 000000000..db8096433 --- /dev/null +++ b/tests/test_sql_refsols/simple_dataframe_collection_3_trino.sql @@ -0,0 +1,10 @@ +SELECT + users.user_id, + users."`name""[", + users."space country", + users."cast" AS "CAST" +FROM (VALUES + (1, 'Alice', 'US', 25), + (2, 'Bob', 'CR', 30), + (3, 'Charlie', 'US', 22), + (4, 'David', 'MX', 30)) AS users(user_id, "`name""[", "space country", "cast") diff --git a/tests/test_sql_refsols/simple_dataframe_collection_4_trino.sql b/tests/test_sql_refsols/simple_dataframe_collection_4_trino.sql new file mode 100644 index 000000000..b48a11971 --- /dev/null +++ b/tests/test_sql_refsols/simple_dataframe_collection_4_trino.sql @@ -0,0 +1,8 @@ +SELECT + users.user_id, + users.country +FROM (VALUES + (1, 'US'), + (2, 'CR'), + (3, 'US'), + (4, 'MX')) AS users(user_id, country) diff --git a/tests/test_sql_refsols/simple_filter_top_five_trino.sql b/tests/test_sql_refsols/simple_filter_top_five_trino.sql new file mode 100644 index 000000000..c97abdfd4 --- /dev/null +++ b/tests/test_sql_refsols/simple_filter_top_five_trino.sql @@ -0,0 +1,8 @@ +SELECT + o_orderkey AS key +FROM tpch.orders +WHERE + o_totalprice < 1000.0 +ORDER BY + 1 DESC +LIMIT 5 diff --git a/tests/test_sql_refsols/simple_filter_trino.sql b/tests/test_sql_refsols/simple_filter_trino.sql new file mode 100644 index 000000000..b3caa6ec4 --- /dev/null +++ b/tests/test_sql_refsols/simple_filter_trino.sql @@ -0,0 +1,7 @@ +SELECT + o_orderdate AS order_date, + o_orderkey, + o_totalprice +FROM tpch.orders +WHERE + o_totalprice < 1000.0 diff --git a/tests/test_sql_refsols/simple_int_float_string_cast_trino.sql b/tests/test_sql_refsols/simple_int_float_string_cast_trino.sql new file mode 100644 index 000000000..9a879f430 --- /dev/null +++ b/tests/test_sql_refsols/simple_int_float_string_cast_trino.sql @@ -0,0 +1,25 @@ +SELECT + 1 AS i1, + CAST(2.2 AS BIGINT) AS i2, + CAST('3' AS BIGINT) AS i3, + CAST('4.3' AS BIGINT) AS i4, + CAST('-5.888' AS BIGINT) AS i5, + CAST(-6.0 AS BIGINT) AS i6, + 1.0 AS f1, + 2.2 AS f2, + CAST('3' AS DOUBLE) AS f3, + CAST('4.3' AS DOUBLE) AS f4, + CAST('-5.888' AS DOUBLE) AS f5, + -6.0 AS f6, + 0.0 AS f7, + CAST(1 AS VARCHAR) AS s1, + CAST(2.2 AS VARCHAR) AS s2, + '3' AS s3, + '4.3' AS s4, + '-5.888' AS s5, + CAST(-6.1 AS VARCHAR) AS s6, + CAST(0.1 AS VARCHAR) AS s7, + '0.0' AS s8, + 'abc def' AS s9 +FROM (VALUES + (NULL)) AS _q_0(_col_0) diff --git a/tests/test_sql_refsols/simple_range_1_trino.sql b/tests/test_sql_refsols/simple_range_1_trino.sql new file mode 100644 index 000000000..aa287b5b4 --- /dev/null +++ b/tests/test_sql_refsols/simple_range_1_trino.sql @@ -0,0 +1,13 @@ +SELECT + simple_range.value +FROM (VALUES + (0), + (1), + (2), + (3), + (4), + (5), + (6), + (7), + (8), + (9)) AS simple_range(value) diff --git a/tests/test_sql_refsols/simple_range_2_trino.sql b/tests/test_sql_refsols/simple_range_2_trino.sql new file mode 100644 index 000000000..ea71c6fc9 --- /dev/null +++ b/tests/test_sql_refsols/simple_range_2_trino.sql @@ -0,0 +1,15 @@ +SELECT + simple_range.value +FROM (VALUES + (0), + (1), + (2), + (3), + (4), + (5), + (6), + (7), + (8), + (9)) AS simple_range(value) +ORDER BY + 1 DESC diff --git a/tests/test_sql_refsols/simple_range_3_trino.sql b/tests/test_sql_refsols/simple_range_3_trino.sql new file mode 100644 index 000000000..d443ae57b --- /dev/null +++ b/tests/test_sql_refsols/simple_range_3_trino.sql @@ -0,0 +1,10 @@ +SELECT + t1.foo +FROM (VALUES + (15), + (16), + (17), + (18), + (19)) AS t1(foo) +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/simple_range_4_trino.sql b/tests/test_sql_refsols/simple_range_4_trino.sql new file mode 100644 index 000000000..b2c4fb185 --- /dev/null +++ b/tests/test_sql_refsols/simple_range_4_trino.sql @@ -0,0 +1,15 @@ +SELECT + t2.n AS N +FROM (VALUES + (10), + (9), + (8), + (7), + (6), + (5), + (4), + (3), + (2), + (1)) AS t2(n) +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/simple_range_5_trino.sql b/tests/test_sql_refsols/simple_range_5_trino.sql new file mode 100644 index 000000000..cd81c843d --- /dev/null +++ b/tests/test_sql_refsols/simple_range_5_trino.sql @@ -0,0 +1,4 @@ +SELECT + CAST(NULL AS INTEGER) AS x +WHERE + FALSE diff --git a/tests/test_sql_refsols/simple_range_6_trino.sql b/tests/test_sql_refsols/simple_range_6_trino.sql new file mode 100644 index 000000000..176c828bc --- /dev/null +++ b/tests/test_sql_refsols/simple_range_6_trino.sql @@ -0,0 +1,45 @@ +SELECT + CAST(SUBSTRING(CAST(tbl.v AS VARCHAR), 1, 1) AS BIGINT) AS first_digit, + COUNT(*) AS n +FROM (VALUES + (0), + (13), + (26), + (39), + (52), + (65), + (78), + (91), + (104), + (117), + (130), + (143), + (156), + (169), + (182), + (195), + (208), + (221), + (234), + (247), + (260), + (273), + (286), + (299), + (312), + (325), + (338), + (351), + (364), + (377), + (390), + (403), + (416), + (429), + (442), + (455), + (468), + (481), + (494)) AS tbl(v) +GROUP BY + 1 diff --git a/tests/test_sql_refsols/simple_range_7_trino.sql b/tests/test_sql_refsols/simple_range_7_trino.sql new file mode 100644 index 000000000..e426dc011 --- /dev/null +++ b/tests/test_sql_refsols/simple_range_7_trino.sql @@ -0,0 +1,20 @@ +SELECT + d1.x + d2.y AS s, + COUNT(*) AS n, + AVG(CAST(d1.x * d2.y AS DOUBLE)) AS a +FROM (VALUES + (1), + (2), + (3), + (4), + (5), + (6)) AS d1(x) +CROSS JOIN (VALUES + (1), + (2), + (3), + (4), + (5), + (6)) AS d2(y) +GROUP BY + 1 diff --git a/tests/test_sql_refsols/simple_range_8_trino.sql b/tests/test_sql_refsols/simple_range_8_trino.sql new file mode 100644 index 000000000..a189b8429 --- /dev/null +++ b/tests/test_sql_refsols/simple_range_8_trino.sql @@ -0,0 +1,23 @@ +SELECT + ( + d1.x + d2.y + ) + d3.z AS s, + COUNT(*) AS n, + AVG(d1.x * d2.y * d3.z) AS a +FROM (VALUES + (1), + (2), + (3), + (4)) AS d1(x) +CROSS JOIN (VALUES + (1), + (2), + (3), + (4)) AS d2(y) +CROSS JOIN (VALUES + (1), + (2), + (3), + (4)) AS d3(z) +GROUP BY + 1 diff --git a/tests/test_sql_refsols/simple_range_9_trino.sql b/tests/test_sql_refsols/simple_range_9_trino.sql new file mode 100644 index 000000000..ee28f1d4e --- /dev/null +++ b/tests/test_sql_refsols/simple_range_9_trino.sql @@ -0,0 +1,8 @@ +SELECT + "quoted-name"."name space" +FROM (VALUES + (0), + (1), + (2), + (3), + (4)) AS "quoted-name"("name space") diff --git a/tests/test_sql_refsols/simple_scan_top_five_trino.sql b/tests/test_sql_refsols/simple_scan_top_five_trino.sql new file mode 100644 index 000000000..ab9c3c5d2 --- /dev/null +++ b/tests/test_sql_refsols/simple_scan_top_five_trino.sql @@ -0,0 +1,6 @@ +SELECT + o_orderkey AS key +FROM tpch.orders +ORDER BY + 1 NULLS FIRST +LIMIT 5 diff --git a/tests/test_sql_refsols/simple_scan_trino.sql b/tests/test_sql_refsols/simple_scan_trino.sql new file mode 100644 index 000000000..4a2aac94f --- /dev/null +++ b/tests/test_sql_refsols/simple_scan_trino.sql @@ -0,0 +1,3 @@ +SELECT + o_orderkey AS key +FROM tpch.orders diff --git a/tests/test_sql_refsols/simple_smallest_or_largest_trino.sql b/tests/test_sql_refsols/simple_smallest_or_largest_trino.sql new file mode 100644 index 000000000..bc75fa2b4 --- /dev/null +++ b/tests/test_sql_refsols/simple_smallest_or_largest_trino.sql @@ -0,0 +1,29 @@ +SELECT + LEAST(20, 10) AS s1, + LEAST(20, 20) AS s2, + LEAST(20, 10, 0) AS s3, + LEAST(20, 10, 10, -1, -2, 100, -200) AS s4, + NULL AS s5, + LEAST(20.22, 10.22, -0.34) AS s6, + LEAST( + CAST('2025-01-01 00:00:00' AS TIMESTAMP), + CAST('2024-01-01 00:00:00' AS TIMESTAMP), + CAST('2023-01-01 00:00:00' AS TIMESTAMP) + ) AS s7, + LEAST('', 'alphabet soup', 'Hello World') AS s8, + NULL AS s9, + GREATEST(20, 10) AS l1, + GREATEST(20, 20) AS l2, + GREATEST(20, 10, 0) AS l3, + GREATEST(20, 10, 10, -1, -2, 100, -200, 300) AS l4, + NULL AS l5, + GREATEST(20.22, 100.22, -0.34) AS l6, + GREATEST( + CAST('2025-01-01 00:00:00' AS TIMESTAMP), + CAST('2024-01-01 00:00:00' AS TIMESTAMP), + CAST('2023-01-01 00:00:00' AS TIMESTAMP) + ) AS l7, + GREATEST('', 'alphabet soup', 'Hello World') AS l8, + NULL AS l9 +FROM (VALUES + (NULL)) AS _q_0(_col_0) diff --git a/tests/test_sql_refsols/simple_var_std_trino.sql b/tests/test_sql_refsols/simple_var_std_trino.sql new file mode 100644 index 000000000..1e1568ad8 --- /dev/null +++ b/tests/test_sql_refsols/simple_var_std_trino.sql @@ -0,0 +1,15 @@ +SELECT + ARBITRARY(nation.n_name) AS name, + VAR_POP(supplier.s_acctbal) AS var, + STDDEV_POP(supplier.s_acctbal) AS std, + VARIANCE(supplier.s_acctbal) AS sample_var, + STDDEV(supplier.s_acctbal) AS sample_std, + VAR_POP(supplier.s_acctbal) AS pop_var, + STDDEV_POP(supplier.s_acctbal) AS pop_std +FROM tpch.nation AS nation +JOIN tpch.supplier AS supplier + ON nation.n_nationkey = supplier.s_nationkey +WHERE + nation.n_name IN ('ALGERIA', 'ARGENTINA') +GROUP BY + supplier.s_nationkey diff --git a/tests/test_sql_refsols/simple_var_std_with_nulls_trino.sql b/tests/test_sql_refsols/simple_var_std_with_nulls_trino.sql new file mode 100644 index 000000000..55ede1abb --- /dev/null +++ b/tests/test_sql_refsols/simple_var_std_with_nulls_trino.sql @@ -0,0 +1,16 @@ +SELECT + VARIANCE(CASE WHEN c_custkey > 3 THEN c_acctbal ELSE NULL END) AS var_samp_0_nnull, + VARIANCE(CASE WHEN c_custkey > 2 THEN c_acctbal ELSE NULL END) AS var_samp_1_nnull, + VARIANCE(CASE WHEN c_custkey > 1 THEN c_acctbal ELSE NULL END) AS var_samp_2_nnull, + VAR_POP(CASE WHEN c_custkey > 3 THEN c_acctbal ELSE NULL END) AS var_pop_0_nnull, + VAR_POP(CASE WHEN c_custkey > 2 THEN c_acctbal ELSE NULL END) AS var_pop_1_nnull, + VAR_POP(CASE WHEN c_custkey > 1 THEN c_acctbal ELSE NULL END) AS var_pop_2_nnull, + STDDEV(CASE WHEN c_custkey > 3 THEN c_acctbal ELSE NULL END) AS std_samp_0_nnull, + STDDEV(CASE WHEN c_custkey > 2 THEN c_acctbal ELSE NULL END) AS std_samp_1_nnull, + STDDEV(CASE WHEN c_custkey > 1 THEN c_acctbal ELSE NULL END) AS std_samp_2_nnull, + STDDEV_POP(CASE WHEN c_custkey > 3 THEN c_acctbal ELSE NULL END) AS std_pop_0_nnull, + STDDEV_POP(CASE WHEN c_custkey > 2 THEN c_acctbal ELSE NULL END) AS std_pop_1_nnull, + STDDEV_POP(CASE WHEN c_custkey > 1 THEN c_acctbal ELSE NULL END) AS std_pop_2_nnull +FROM tpch.customer +WHERE + c_custkey IN (1, 2, 3) diff --git a/tests/test_sql_refsols/simplification_1_trino.sql b/tests/test_sql_refsols/simplification_1_trino.sql new file mode 100644 index 000000000..8384d095b --- /dev/null +++ b/tests/test_sql_refsols/simplification_1_trino.sql @@ -0,0 +1,26 @@ +SELECT + 13 AS s00, + 0 AS s01, + COUNT(*) AS s02, + COUNT(*) + 5 AS s03, + COUNT(*) * 2 AS s04, + CAST(COUNT(*) AS DOUBLE) / 8.0 AS s05, + 10 AS s06, + COUNT(*) AS s07, + ABS(COUNT(*) - 25) AS s08, + COUNT(*) + 1 AS s09, + COUNT(*) - 3 AS s10, + COUNT(*) * -1 AS s11, + CAST(COUNT(*) AS DOUBLE) / 2.5 AS s12, + COUNT(*) > 10 AS s13, + COUNT(*) >= 10 AS s14, + COUNT(*) = 20 AS s15, + COUNT(*) <> 25 AS s16, + COUNT(*) < 25 AS s17, + COUNT(*) <= 25 AS s18, + COUNT(*) AS s19, + AVG(COALESCE(LENGTH(sbcustname), 0)) AS s20, + TRUE AS s21, + TRUE AS s22, + TRUE AS s23 +FROM main.sbcustomer diff --git a/tests/test_sql_refsols/simplification_2_trino.sql b/tests/test_sql_refsols/simplification_2_trino.sql new file mode 100644 index 000000000..963c3a426 --- /dev/null +++ b/tests/test_sql_refsols/simplification_2_trino.sql @@ -0,0 +1,50 @@ +SELECT + TRUE AS s00, + FALSE AS s01, + TRUE AS s02, + FALSE AS s03, + TRUE AS s04, + FALSE AS s05, + NULL AS s06, + NULL AS s07, + NULL AS s08, + NULL AS s09, + NULL AS s10, + NULL AS s11, + FALSE AS s12, + TRUE AS s13, + FALSE AS s14, + FALSE AS s15, + TRUE AS s16, + TRUE AS s17, + TRUE AS s18, + FALSE AS s19, + TRUE AS s20, + FALSE AS s21, + TRUE AS s22, + FALSE AS s23, + FALSE AS s24, + TRUE AS s25, + TRUE AS s26, + FALSE AS s27, + TRUE AS s28, + FALSE AS s29, + 8 AS s30, + 'alphabet' AS s31, + 'SOUP' AS s32, + TRUE AS s33, + FALSE AS s34, + FALSE AS s35, + TRUE AS s36, + 3.0 AS s37, + NULL AS s38, + NULL AS s39, + NULL AS s40, + NULL AS s41, + NULL AS s42, + NULL AS s43, + NULL AS s44, + NULL AS s45, + NULL AS s46, + COALESCE(MAX(sbcustname), '') LIKE '%r%' AS s47 +FROM main.sbcustomer diff --git a/tests/test_sql_refsols/simplification_3_trino.sql b/tests/test_sql_refsols/simplification_3_trino.sql new file mode 100644 index 000000000..a21f990cf --- /dev/null +++ b/tests/test_sql_refsols/simplification_3_trino.sql @@ -0,0 +1,95 @@ +WITH _t2 AS ( + SELECT + ABS(CAST(sbcustpostalcode AS BIGINT)) AS expr_13, + ROW_NUMBER() OVER (ORDER BY sbcustname) AS rank, + AVG(CAST(ABS(COALESCE(CAST(sbcustpostalcode AS BIGINT), 0)) AS DOUBLE)) OVER () AS ravg1, + COALESCE( + AVG(CAST(ABS(COALESCE(CAST(sbcustpostalcode AS BIGINT), 0)) AS DOUBLE)) OVER (ORDER BY sbcustname ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), + 0.1 + ) AS ravg2, + COUNT(CAST(sbcustpostalcode AS BIGINT)) OVER () AS rcnt1, + COALESCE( + COUNT(CAST(sbcustpostalcode AS BIGINT)) OVER (ORDER BY sbcustname ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), + 0.1 + ) AS rcnt2, + COUNT(*) OVER () AS rsiz1, + COALESCE( + COUNT(*) OVER (ORDER BY sbcustname ROWS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING), + 0.1 + ) AS rsiz2, + SUM(ABS(COALESCE(CAST(sbcustpostalcode AS BIGINT), 0))) OVER () AS rsum1, + COALESCE( + SUM(ABS(COALESCE(CAST(sbcustpostalcode AS BIGINT), 0))) OVER (ORDER BY sbcustname ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), + 0.1 + ) AS rsum2 + FROM main.sbcustomer +), _t1 AS ( + SELECT + rank, + ravg1, + ravg2, + rcnt1, + rcnt2, + rsiz1, + rsiz2, + rsum1, + rsum2, + CASE + WHEN FLOOR(0.75 * COUNT(expr_13) OVER ()) < ROW_NUMBER() OVER (ORDER BY expr_13 DESC) + THEN expr_13 + ELSE NULL + END AS expr_15, + CASE + WHEN ABS( + ( + ROW_NUMBER() OVER (ORDER BY expr_13 DESC) - 1.0 + ) - ( + CAST(( + COUNT(expr_13) OVER () - 1.0 + ) AS DOUBLE) / 2.0 + ) + ) < 1.0 + THEN expr_13 + ELSE NULL + END AS expr_16 + FROM _t2 +) +SELECT + TRUE AS s00, + TRUE AS s01, + FALSE AS s02, + FALSE AS s03, + FALSE AS s04, + FALSE AS s05, + COUNT(*) >= 3 AS s06, + FALSE AS s07, + COUNT(*) <= 6 AS s08, + FALSE AS s09, + 91 AS s10, + 0 AS s11, + 50 AS s12, + 35 AS s13, + 25.0 AS s14, + ABS(COUNT(*) * -0.75) AS s15, + 10 AS s16, + COUNT(*) AS s17, + COUNT(*) AS s18, + FALSE AS s19, + TRUE AS s20, + FALSE AS s21, + TRUE AS s22, + FALSE AS s23, + TRUE AS s24, + MAX(expr_15) AS s25, + AVG(expr_16) AS s26, + MIN(rank) AS s27, + MAX(rank) AS s28, + ARBITRARY(rsum1) AS s29, + ROUND(SUM(rsum2), 2) AS s30, + ARBITRARY(ravg1) AS s31, + ROUND(SUM(ravg2), 2) AS s32, + ARBITRARY(rcnt1) AS s33, + ROUND(SUM(rcnt2), 2) AS s34, + ARBITRARY(rsiz1) AS s35, + ROUND(SUM(rsiz2), 2) AS s36 +FROM _t1 diff --git a/tests/test_sql_refsols/simplification_4_trino.sql b/tests/test_sql_refsols/simplification_4_trino.sql new file mode 100644 index 000000000..632e528f2 --- /dev/null +++ b/tests/test_sql_refsols/simplification_4_trino.sql @@ -0,0 +1,136 @@ +WITH _t AS ( + SELECT + sbtxdatetime, + ROW_NUMBER() OVER (ORDER BY sbtxdatetime DESC NULLS FIRST) AS _w, + ROW_NUMBER() OVER (ORDER BY sbtxdatetime) AS _w_2 + FROM main.sbtransaction + WHERE + YEAR(CAST(sbtxdatetime AS TIMESTAMP)) = 2023 +) +SELECT + sbtxdatetime AS date_time, + DATE_ADD( + 'WEEK', + -8, + DATE_TRUNC( + 'DAY', + DATE_ADD( + 'DAY', + ( + ( + ( + DAY_OF_WEEK(CAST(sbtxdatetime AS TIMESTAMP)) % 7 + ) + 0 + ) % 7 + ) * -1, + CAST(sbtxdatetime AS TIMESTAMP) + ) + ) + ) AS s00, + FALSE AS s01, + MONTH(CAST(sbtxdatetime AS TIMESTAMP)) IN (1, 2, 3) AS s02, + MONTH(CAST(sbtxdatetime AS TIMESTAMP)) IN (4, 5, 6) AS s03, + MONTH(CAST(sbtxdatetime AS TIMESTAMP)) IN (7, 8, 9) AS s04, + MONTH(CAST(sbtxdatetime AS TIMESTAMP)) IN (10, 11, 12) AS s05, + FALSE AS s06, + FALSE AS s07, + MONTH(CAST(sbtxdatetime AS TIMESTAMP)) < 4 AS s08, + MONTH(CAST(sbtxdatetime AS TIMESTAMP)) < 7 AS s09, + MONTH(CAST(sbtxdatetime AS TIMESTAMP)) < 10 AS s10, + TRUE AS s11, + FALSE AS s12, + MONTH(CAST(sbtxdatetime AS TIMESTAMP)) <= 3 AS s13, + MONTH(CAST(sbtxdatetime AS TIMESTAMP)) <= 6 AS s14, + MONTH(CAST(sbtxdatetime AS TIMESTAMP)) <= 9 AS s15, + TRUE AS s16, + TRUE AS s17, + MONTH(CAST(sbtxdatetime AS TIMESTAMP)) > 3 AS s18, + MONTH(CAST(sbtxdatetime AS TIMESTAMP)) > 6 AS s19, + MONTH(CAST(sbtxdatetime AS TIMESTAMP)) > 9 AS s20, + FALSE AS s21, + TRUE AS s22, + MONTH(CAST(sbtxdatetime AS TIMESTAMP)) >= 4 AS s23, + MONTH(CAST(sbtxdatetime AS TIMESTAMP)) >= 7 AS s24, + MONTH(CAST(sbtxdatetime AS TIMESTAMP)) >= 10 AS s25, + FALSE AS s26, + TRUE AS s27, + NOT MONTH(CAST(sbtxdatetime AS TIMESTAMP)) IN (1, 2, 3) AS s28, + NOT MONTH(CAST(sbtxdatetime AS TIMESTAMP)) IN (4, 5, 6) AS s29, + NOT MONTH(CAST(sbtxdatetime AS TIMESTAMP)) IN (7, 8, 9) AS s30, + NOT MONTH(CAST(sbtxdatetime AS TIMESTAMP)) IN (10, 11, 12) AS s31, + TRUE AS s32, + 2024 AS s33, + 3 AS s34, + 8 AS s35, + 13 AS s36, + 12 AS s37, + 45 AS s38, + 59 AS s39, + 2020 AS s40, + 1 AS s41, + 1 AS s42, + 31 AS s43, + 0 AS s44, + 0 AS s45, + 0 AS s46, + 2023 AS s47, + 3 AS s48, + 7 AS s49, + 4 AS s50, + 6 AS s51, + 55 AS s52, + 0 AS s53, + 1999 AS s54, + 4 AS s55, + 12 AS s56, + 31 AS s57, + 23 AS s58, + 59 AS s59, + 58 AS s60, + FALSE AS s61, + FALSE AS s62, + FALSE AS s63, + TRUE AS s64, + TRUE AS s65, + TRUE AS s66, + FALSE AS s67, + FALSE AS s68, + FALSE AS s69, + TRUE AS s70, + TRUE AS s71, + TRUE AS s72, + FALSE AS s73, + FALSE AS s74, + TRUE AS s75, + TRUE AS s76, + FALSE AS s77, + TRUE AS s78, + FALSE AS s79, + TRUE AS s80, + TRUE AS s81, + FALSE AS s82, + TRUE AS s83, + FALSE AS s84, + FALSE AS s85, + TRUE AS s86, + FALSE AS s87, + FALSE AS s88, + TRUE AS s89, + TRUE AS s90, + FALSE AS s91, + TRUE AS s92, + FALSE AS s93, + FALSE AS s94, + TRUE AS s95, + TRUE AS s96, + CAST('2025-02-28' AS DATE) AS s97, + CAST('2024-12-30' AS DATE) AS s98, + CAST('2024-12-30' AS DATE) AS s99, + CAST('2024-12-30' AS DATE) AS s100, + CAST('2024-12-30' AS DATE) AS s101, + CAST('2024-12-30' AS DATE) AS s102, + CAST('2025-01-06' AS DATE) AS s103, + CAST('2025-01-06' AS DATE) AS s104 +FROM _t +WHERE + _w = 1 OR _w_2 = 1 diff --git a/tests/test_sql_refsols/singular1_trino.sql b/tests/test_sql_refsols/singular1_trino.sql new file mode 100644 index 000000000..09f5f4fd0 --- /dev/null +++ b/tests/test_sql_refsols/singular1_trino.sql @@ -0,0 +1,14 @@ +WITH _s1 AS ( + SELECT + n_name, + n_regionkey + FROM tpch.nation + WHERE + n_nationkey = 4 +) +SELECT + region.r_name AS name, + _s1.n_name AS nation_4_name +FROM tpch.region AS region +LEFT JOIN _s1 AS _s1 + ON _s1.n_regionkey = region.r_regionkey diff --git a/tests/test_sql_refsols/singular2_trino.sql b/tests/test_sql_refsols/singular2_trino.sql new file mode 100644 index 000000000..067e9f76d --- /dev/null +++ b/tests/test_sql_refsols/singular2_trino.sql @@ -0,0 +1,16 @@ +WITH _s3 AS ( + SELECT + customer.c_nationkey, + orders.o_orderkey + FROM tpch.customer AS customer + JOIN tpch.orders AS orders + ON customer.c_custkey = orders.o_custkey AND orders.o_orderkey = 454791 + WHERE + customer.c_custkey = 1 +) +SELECT + nation.n_name AS name, + _s3.o_orderkey AS okey +FROM tpch.nation AS nation +LEFT JOIN _s3 AS _s3 + ON _s3.c_nationkey = nation.n_nationkey diff --git a/tests/test_sql_refsols/singular3_trino.sql b/tests/test_sql_refsols/singular3_trino.sql new file mode 100644 index 000000000..75fd801a9 --- /dev/null +++ b/tests/test_sql_refsols/singular3_trino.sql @@ -0,0 +1,29 @@ +WITH _s0 AS ( + SELECT + c_custkey, + c_name + FROM tpch.customer + ORDER BY + 2 NULLS FIRST + LIMIT 5 +), _t AS ( + SELECT + o_custkey, + o_orderdate, + ROW_NUMBER() OVER (PARTITION BY o_custkey ORDER BY o_totalprice DESC NULLS FIRST) AS _w + FROM tpch.orders +), _s1 AS ( + SELECT + o_custkey, + o_orderdate + FROM _t + WHERE + _w = 1 +) +SELECT + _s0.c_name AS name +FROM _s0 AS _s0 +LEFT JOIN _s1 AS _s1 + ON _s0.c_custkey = _s1.o_custkey +ORDER BY + _s1.o_orderdate diff --git a/tests/test_sql_refsols/singular4_trino.sql b/tests/test_sql_refsols/singular4_trino.sql new file mode 100644 index 000000000..3be95109f --- /dev/null +++ b/tests/test_sql_refsols/singular4_trino.sql @@ -0,0 +1,26 @@ +WITH _t AS ( + SELECT + o_custkey, + o_orderdate, + ROW_NUMBER() OVER (PARTITION BY o_custkey ORDER BY o_totalprice DESC NULLS FIRST) AS _w + FROM tpch.orders + WHERE + o_orderpriority = '1-URGENT' +), _s1 AS ( + SELECT + o_custkey, + o_orderdate + FROM _t + WHERE + _w = 1 +) +SELECT + customer.c_name AS name +FROM tpch.customer AS customer +LEFT JOIN _s1 AS _s1 + ON _s1.o_custkey = customer.c_custkey +WHERE + customer.c_nationkey = 6 +ORDER BY + COALESCE(_s1.o_orderdate, CAST('2000-01-01' AS DATE)) +LIMIT 5 diff --git a/tests/test_sql_refsols/singular5_trino.sql b/tests/test_sql_refsols/singular5_trino.sql new file mode 100644 index 000000000..88e1ba1dc --- /dev/null +++ b/tests/test_sql_refsols/singular5_trino.sql @@ -0,0 +1,49 @@ +WITH _t3 AS ( + SELECT + p_brand, + p_container, + p_partkey + FROM tpch.part + WHERE + p_brand = 'Brand#13' +), _t AS ( + SELECT + lineitem.l_shipdate, + _t7.p_partkey, + ROW_NUMBER() OVER (PARTITION BY _t7.p_container ORDER BY lineitem.l_extendedprice DESC NULLS FIRST, lineitem.l_shipdate) AS _w + FROM _t3 AS _t7 + JOIN tpch.lineitem AS lineitem + ON _t7.p_partkey = lineitem.l_partkey + AND lineitem.l_shipmode = 'RAIL' + AND lineitem.l_tax = 0 +), _s3 AS ( + SELECT + p_partkey, + ARBITRARY(l_shipdate) AS anything_l_shipdate, + COUNT(*) AS n_rows + FROM _t + WHERE + _w = 1 + GROUP BY + 1 +), _t1 AS ( + SELECT + _t3.p_container, + MAX(_s3.anything_l_shipdate) AS max_anything_l_shipdate, + SUM(_s3.n_rows) AS sum_n_rows + FROM _t3 AS _t3 + LEFT JOIN _s3 AS _s3 + ON _s3.p_partkey = _t3.p_partkey + GROUP BY + 1 +) +SELECT + p_container AS container, + max_anything_l_shipdate AS highest_price_ship_date +FROM _t1 +WHERE + sum_n_rows <> 0 +ORDER BY + 2 NULLS FIRST, + 1 NULLS FIRST +LIMIT 5 diff --git a/tests/test_sql_refsols/singular6_trino.sql b/tests/test_sql_refsols/singular6_trino.sql new file mode 100644 index 000000000..56e24ff3c --- /dev/null +++ b/tests/test_sql_refsols/singular6_trino.sql @@ -0,0 +1,31 @@ +WITH _t AS ( + SELECT + lineitem.l_receiptdate, + lineitem.l_suppkey, + orders.o_custkey, + ROW_NUMBER() OVER (PARTITION BY orders.o_custkey ORDER BY lineitem.l_receiptdate, lineitem.l_extendedprice * ( + 1 - lineitem.l_discount + ) DESC NULLS FIRST) AS _w + FROM tpch.orders AS orders + JOIN tpch.lineitem AS lineitem + ON lineitem.l_orderkey = orders.o_orderkey + WHERE + orders.o_clerk = 'Clerk#000000017' +) +SELECT + customer.c_name AS name, + _t.l_receiptdate AS receipt_date, + nation.n_name AS nation_name +FROM tpch.customer AS customer +JOIN _t AS _t + ON _t._w = 1 AND _t.o_custkey = customer.c_custkey +JOIN tpch.supplier AS supplier + ON _t.l_suppkey = supplier.s_suppkey +JOIN tpch.nation AS nation + ON nation.n_nationkey = supplier.s_nationkey +WHERE + customer.c_nationkey = 4 +ORDER BY + 2 NULLS FIRST, + 1 NULLS FIRST +LIMIT 5 diff --git a/tests/test_sql_refsols/singular7_trino.sql b/tests/test_sql_refsols/singular7_trino.sql new file mode 100644 index 000000000..399b90863 --- /dev/null +++ b/tests/test_sql_refsols/singular7_trino.sql @@ -0,0 +1,49 @@ +WITH _s3 AS ( + SELECT + l_partkey, + l_suppkey + FROM tpch.lineitem + WHERE + YEAR(CAST(l_shipdate AS TIMESTAMP)) = 1994 +), _t4 AS ( + SELECT + partsupp.ps_suppkey, + ARBITRARY(part.p_name) AS anything_p_name, + COUNT(_s3.l_suppkey) AS count_l_suppkey + FROM tpch.partsupp AS partsupp + JOIN tpch.part AS part + ON part.p_brand = 'Brand#13' AND part.p_partkey = partsupp.ps_partkey + LEFT JOIN _s3 AS _s3 + ON _s3.l_partkey = partsupp.ps_partkey AND _s3.l_suppkey = partsupp.ps_suppkey + GROUP BY + partsupp.ps_partkey, + 1 +), _t AS ( + SELECT + ps_suppkey, + anything_p_name, + count_l_suppkey, + ROW_NUMBER() OVER (PARTITION BY ps_suppkey ORDER BY COALESCE(count_l_suppkey, 0) DESC NULLS FIRST, anything_p_name) AS _w + FROM _t4 +), _s5 AS ( + SELECT + COALESCE(count_l_suppkey, 0) AS n_orders, + anything_p_name, + ps_suppkey + FROM _t + WHERE + _w = 1 +) +SELECT + supplier.s_name AS supplier_name, + _s5.anything_p_name AS part_name, + _s5.n_orders +FROM tpch.supplier AS supplier +LEFT JOIN _s5 AS _s5 + ON _s5.ps_suppkey = supplier.s_suppkey +WHERE + supplier.s_nationkey = 20 +ORDER BY + 3 DESC, + 1 NULLS FIRST +LIMIT 5 diff --git a/tests/test_sql_refsols/smoke_a_trino.sql b/tests/test_sql_refsols/smoke_a_trino.sql new file mode 100644 index 000000000..802ada01c --- /dev/null +++ b/tests/test_sql_refsols/smoke_a_trino.sql @@ -0,0 +1,57 @@ +SELECT + p_partkey AS key, + CAST(CONCAT_WS( + '', + SUBSTRING( + p_brand, + CASE WHEN ( + LENGTH(p_brand) + -1 + ) < 1 THEN 1 ELSE ( + LENGTH(p_brand) + -1 + ) END + ), + SUBSTRING(p_brand, 8), + SUBSTRING( + p_brand, + CASE WHEN ( + LENGTH(p_brand) + -1 + ) < 1 THEN 1 ELSE ( + LENGTH(p_brand) + -1 + ) END, + CASE + WHEN ( + LENGTH(p_brand) + 0 + ) < 1 + THEN 0 + ELSE ( + LENGTH(p_brand) + 0 + ) - CASE WHEN ( + LENGTH(p_brand) + -1 + ) < 1 THEN 1 ELSE ( + LENGTH(p_brand) + -1 + ) END + END + ) + ) AS BIGINT) AS a, + UPPER(LEAST(SPLIT_PART(p_name, ' ', 2), SPLIT_PART(p_name, ' ', -1))) AS b, + TRIM('o' FROM SUBSTRING(p_name, 1, 2)) AS c, + LPAD(CAST(p_size AS VARCHAR), 3, '0') AS d, + RPAD(CAST(p_size AS VARCHAR), 3, '0') AS e, + REPLACE(p_mfgr, 'Manufacturer#', 'm') AS f, + REPLACE(LOWER(p_container), ' ', '') AS g, + CASE + WHEN LENGTH('o') = 0 + THEN 0 + ELSE CAST(CAST(( + LENGTH(p_name) - LENGTH(REPLACE(p_name, 'o', '')) + ) AS DOUBLE) / LENGTH('o') AS BIGINT) + END + ( + CAST(( + STRPOS(p_name, 'o') - 1 + ) AS DOUBLE) / 100.0 + ) AS h, + ROUND(POWER(GREATEST(p_size, 10), 0.5), 3) AS i +FROM tpch.part +ORDER BY + 1 NULLS FIRST +LIMIT 5 diff --git a/tests/test_sql_refsols/smoke_b_trino.sql b/tests/test_sql_refsols/smoke_b_trino.sql new file mode 100644 index 000000000..8446173d0 --- /dev/null +++ b/tests/test_sql_refsols/smoke_b_trino.sql @@ -0,0 +1,97 @@ +SELECT + o_orderkey AS key, + CONCAT_WS( + '_', + YEAR(CAST(o_orderdate AS TIMESTAMP)), + QUARTER(CAST(o_orderdate AS TIMESTAMP)), + MONTH(CAST(o_orderdate AS TIMESTAMP)), + DAY(CAST(o_orderdate AS TIMESTAMP)) + ) AS a, + CONCAT_WS( + ':', + CASE + WHEN ( + ( + DAY_OF_WEEK(o_orderdate) % 7 + ) + 1 + ) = 1 + THEN 'Monday' + WHEN ( + ( + DAY_OF_WEEK(o_orderdate) % 7 + ) + 1 + ) = 2 + THEN 'Tuesday' + WHEN ( + ( + DAY_OF_WEEK(o_orderdate) % 7 + ) + 1 + ) = 3 + THEN 'Wednesday' + WHEN ( + ( + DAY_OF_WEEK(o_orderdate) % 7 + ) + 1 + ) = 4 + THEN 'Thursday' + WHEN ( + ( + DAY_OF_WEEK(o_orderdate) % 7 + ) + 1 + ) = 5 + THEN 'Friday' + WHEN ( + ( + DAY_OF_WEEK(o_orderdate) % 7 + ) + 1 + ) = 6 + THEN 'Saturday' + WHEN ( + ( + DAY_OF_WEEK(o_orderdate) % 7 + ) + 1 + ) = 7 + THEN 'Sunday' + END, + ( + ( + DAY_OF_WEEK(o_orderdate) % 7 + ) + 1 + ) + ) AS b, + DATE_ADD('DAY', -13, DATE_ADD('MONTH', 6, DATE_TRUNC('YEAR', CAST(o_orderdate AS TIMESTAMP)))) AS c, + DATE_ADD( + 'HOUR', + 25, + DATE_ADD('YEAR', 1, DATE_TRUNC('QUARTER', CAST(o_orderdate AS TIMESTAMP))) + ) AS d, + CAST('2025-01-01 12:35:00' AS TIMESTAMP) AS e, + CAST('2025-07-22 12:00:00' AS TIMESTAMP) AS f, + CAST('2025-01-01' AS DATE) AS g, + CONCAT_WS(';', 12, 20, 6) AS h, + DATE_DIFF('YEAR', CAST('1993-05-25 12:45:36' AS TIMESTAMP), CAST(o_orderdate AS TIMESTAMP)) AS i, + DATE_DIFF('QUARTER', CAST('1993-05-25 12:45:36' AS TIMESTAMP), CAST(o_orderdate AS TIMESTAMP)) AS j, + DATE_DIFF('MONTH', CAST('1993-05-25 12:45:36' AS TIMESTAMP), CAST(o_orderdate AS TIMESTAMP)) AS k, + DATE_DIFF('WEEK', CAST('1993-05-25 12:45:36' AS TIMESTAMP), CAST(o_orderdate AS TIMESTAMP)) AS l, + DATE_DIFF('DAY', CAST('1993-05-25 12:45:36' AS TIMESTAMP), CAST(o_orderdate AS TIMESTAMP)) AS m, + DATE_DIFF('HOUR', CAST('1993-05-25 12:45:36' AS TIMESTAMP), CAST(o_orderdate AS TIMESTAMP)) AS n, + DATE_DIFF('MINUTE', CAST('1993-05-25 12:45:36' AS TIMESTAMP), CAST(o_orderdate AS TIMESTAMP)) AS o, + DATE_DIFF('SECOND', CAST('1993-05-25 12:45:36' AS TIMESTAMP), CAST(o_orderdate AS TIMESTAMP)) AS p, + DATE_TRUNC( + 'DAY', + DATE_ADD( + 'DAY', + CAST(( + ( + DAY_OF_WEEK(CAST(o_orderdate AS TIMESTAMP)) % 7 + ) + 1 + ) AS BIGINT) * -1, + CAST(o_orderdate AS TIMESTAMP) + ) + ) AS q +FROM tpch.orders +WHERE + STARTS_WITH(o_orderpriority, '3') AND o_clerk LIKE '%5' AND o_comment LIKE '%fo%' +ORDER BY + 1 NULLS FIRST +LIMIT 5 diff --git a/tests/test_sql_refsols/smoke_c_trino.sql b/tests/test_sql_refsols/smoke_c_trino.sql new file mode 100644 index 000000000..67c65c223 --- /dev/null +++ b/tests/test_sql_refsols/smoke_c_trino.sql @@ -0,0 +1,45 @@ +WITH _t1 AS ( + SELECT + c_acctbal, + c_mktsegment, + c_name, + CASE + WHEN FLOOR(0.8 * COUNT(c_acctbal) OVER ()) < ROW_NUMBER() OVER (ORDER BY c_acctbal DESC) + THEN c_acctbal + ELSE NULL + END AS expr_30, + CASE + WHEN ABS( + ( + ROW_NUMBER() OVER (ORDER BY c_acctbal DESC) - 1.0 + ) - ( + CAST(( + COUNT(c_acctbal) OVER () - 1.0 + ) AS DOUBLE) / 2.0 + ) + ) < 1.0 + THEN c_acctbal + ELSE NULL + END AS expr_31 + FROM tpch.customer +) +SELECT + COUNT(*) AS a, + COALESCE(SUM(FLOOR(c_acctbal)), 0) AS b, + COALESCE(SUM(CEIL(c_acctbal)), 0) AS c, + COUNT(DISTINCT c_mktsegment) AS d, + ROUND(AVG(ABS(c_acctbal)), 4) AS e, + MIN(c_acctbal) AS f, + MAX(c_acctbal) AS g, + ARBITRARY(SUBSTRING(c_name, 1, 1)) AS h, + COUNT(CASE WHEN c_acctbal > 0 THEN c_acctbal ELSE NULL END) AS i, + CEIL(VAR_POP(CASE WHEN c_acctbal > 0 THEN c_acctbal ELSE NULL END)) AS j, + ROUND(VARIANCE(CASE WHEN c_acctbal < 0 THEN c_acctbal ELSE NULL END), 4) AS k, + FLOOR(STDDEV_POP(CASE WHEN c_acctbal < 0 THEN c_acctbal ELSE NULL END)) AS l, + ROUND(STDDEV(CASE WHEN c_acctbal > 0 THEN c_acctbal ELSE NULL END), 4) AS m, + ROUND(AVG(COALESCE(CASE WHEN c_acctbal > 0 THEN c_acctbal ELSE NULL END, 0)), 2) AS n, + SUM(NOT CASE WHEN c_acctbal > 1000 THEN c_acctbal ELSE NULL END IS NULL) AS o, + SUM(CASE WHEN c_acctbal > 1000 THEN c_acctbal ELSE NULL END IS NULL) AS p, + MAX(expr_30) AS q, + AVG(expr_31) AS r +FROM _t1 diff --git a/tests/test_sql_refsols/smoke_d_trino.sql b/tests/test_sql_refsols/smoke_d_trino.sql new file mode 100644 index 000000000..b1092feb4 --- /dev/null +++ b/tests/test_sql_refsols/smoke_d_trino.sql @@ -0,0 +1,29 @@ +SELECT + customer.c_custkey AS key, + ROW_NUMBER() OVER (ORDER BY customer.c_acctbal, customer.c_custkey) AS a, + ROW_NUMBER() OVER (PARTITION BY customer.c_nationkey ORDER BY customer.c_acctbal, customer.c_custkey) AS b, + RANK() OVER (ORDER BY customer.c_mktsegment) AS c, + DENSE_RANK() OVER (ORDER BY customer.c_mktsegment) AS d, + NTILE(100) OVER (ORDER BY customer.c_acctbal, customer.c_custkey) AS e, + NTILE(12) OVER (PARTITION BY customer.c_nationkey ORDER BY customer.c_acctbal, customer.c_custkey) AS f, + LAG(customer.c_custkey, 1) OVER (ORDER BY customer.c_custkey) AS g, + LAG(customer.c_custkey, 2, 42) OVER (PARTITION BY customer.c_nationkey ORDER BY customer.c_custkey) AS h, + LEAD(customer.c_custkey, 1) OVER (ORDER BY customer.c_custkey) AS i, + LEAD(customer.c_custkey, 6000) OVER (PARTITION BY customer.c_nationkey ORDER BY customer.c_custkey) AS j, + SUM(customer.c_acctbal) OVER (PARTITION BY customer.c_nationkey) AS k, + SUM(customer.c_acctbal) OVER (ORDER BY customer.c_custkey ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS l, + ROUND(AVG(CAST(customer.c_acctbal AS DOUBLE)) OVER (), 2) AS m, + ROUND( + AVG(CAST(customer.c_acctbal AS DOUBLE)) OVER (PARTITION BY customer.c_nationkey ORDER BY customer.c_custkey ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), + 2 + ) AS n, + COUNT(CASE WHEN customer.c_acctbal > 0 THEN customer.c_acctbal ELSE NULL END) OVER () AS o, + COUNT(*) OVER () AS p +FROM tpch.nation AS nation +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'ASIA' +JOIN tpch.customer AS customer + ON customer.c_nationkey = nation.n_nationkey +ORDER BY + 1 NULLS FIRST +LIMIT 10 diff --git a/tests/test_sql_refsols/sql_transaction_week_sampler_friday_one_trino.sql b/tests/test_sql_refsols/sql_transaction_week_sampler_friday_one_trino.sql new file mode 100644 index 000000000..669b4ba40 --- /dev/null +++ b/tests/test_sql_refsols/sql_transaction_week_sampler_friday_one_trino.sql @@ -0,0 +1,71 @@ +SELECT + sbtxdatetime AS date_time, + DATE_TRUNC( + 'DAY', + DATE_ADD( + 'DAY', + ( + ( + ( + DAY_OF_WEEK(CAST(sbtxdatetime AS TIMESTAMP)) % 7 + ) + 3 + ) % 7 + ) * -1, + CAST(sbtxdatetime AS TIMESTAMP) + ) + ) AS sow, + CASE + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 1 + THEN 'Monday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 2 + THEN 'Tuesday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 3 + THEN 'Wednesday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 4 + THEN 'Thursday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 5 + THEN 'Friday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 6 + THEN 'Saturday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 7 + THEN 'Sunday' + END AS dayname, + ( + ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 3 + ) % 7 + ) + 1 AS dayofweek +FROM main.sbtransaction +WHERE + DAY(CAST(sbtxdatetime AS TIMESTAMP)) > 1 + AND YEAR(CAST(sbtxdatetime AS TIMESTAMP)) < 2025 diff --git a/tests/test_sql_refsols/sql_transaction_week_sampler_friday_zero_trino.sql b/tests/test_sql_refsols/sql_transaction_week_sampler_friday_zero_trino.sql new file mode 100644 index 000000000..d6dbd7024 --- /dev/null +++ b/tests/test_sql_refsols/sql_transaction_week_sampler_friday_zero_trino.sql @@ -0,0 +1,69 @@ +SELECT + sbtxdatetime AS date_time, + DATE_TRUNC( + 'DAY', + DATE_ADD( + 'DAY', + ( + ( + ( + DAY_OF_WEEK(CAST(sbtxdatetime AS TIMESTAMP)) % 7 + ) + 3 + ) % 7 + ) * -1, + CAST(sbtxdatetime AS TIMESTAMP) + ) + ) AS sow, + CASE + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 1 + THEN 'Monday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 2 + THEN 'Tuesday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 3 + THEN 'Wednesday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 4 + THEN 'Thursday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 5 + THEN 'Friday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 6 + THEN 'Saturday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 7 + THEN 'Sunday' + END AS dayname, + ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 3 + ) % 7 AS dayofweek +FROM main.sbtransaction +WHERE + DAY(CAST(sbtxdatetime AS TIMESTAMP)) > 1 + AND YEAR(CAST(sbtxdatetime AS TIMESTAMP)) < 2025 diff --git a/tests/test_sql_refsols/sql_transaction_week_sampler_monday_one_trino.sql b/tests/test_sql_refsols/sql_transaction_week_sampler_monday_one_trino.sql new file mode 100644 index 000000000..1f77b0bab --- /dev/null +++ b/tests/test_sql_refsols/sql_transaction_week_sampler_monday_one_trino.sql @@ -0,0 +1,71 @@ +SELECT + sbtxdatetime AS date_time, + DATE_TRUNC( + 'DAY', + DATE_ADD( + 'DAY', + ( + ( + ( + DAY_OF_WEEK(CAST(sbtxdatetime AS TIMESTAMP)) % 7 + ) + 0 + ) % 7 + ) * -1, + CAST(sbtxdatetime AS TIMESTAMP) + ) + ) AS sow, + CASE + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 1 + THEN 'Monday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 2 + THEN 'Tuesday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 3 + THEN 'Wednesday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 4 + THEN 'Thursday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 5 + THEN 'Friday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 6 + THEN 'Saturday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 7 + THEN 'Sunday' + END AS dayname, + ( + ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 0 + ) % 7 + ) + 1 AS dayofweek +FROM main.sbtransaction +WHERE + DAY(CAST(sbtxdatetime AS TIMESTAMP)) > 1 + AND YEAR(CAST(sbtxdatetime AS TIMESTAMP)) < 2025 diff --git a/tests/test_sql_refsols/sql_transaction_week_sampler_monday_zero_trino.sql b/tests/test_sql_refsols/sql_transaction_week_sampler_monday_zero_trino.sql new file mode 100644 index 000000000..b6f117839 --- /dev/null +++ b/tests/test_sql_refsols/sql_transaction_week_sampler_monday_zero_trino.sql @@ -0,0 +1,69 @@ +SELECT + sbtxdatetime AS date_time, + DATE_TRUNC( + 'DAY', + DATE_ADD( + 'DAY', + ( + ( + ( + DAY_OF_WEEK(CAST(sbtxdatetime AS TIMESTAMP)) % 7 + ) + 0 + ) % 7 + ) * -1, + CAST(sbtxdatetime AS TIMESTAMP) + ) + ) AS sow, + CASE + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 1 + THEN 'Monday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 2 + THEN 'Tuesday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 3 + THEN 'Wednesday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 4 + THEN 'Thursday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 5 + THEN 'Friday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 6 + THEN 'Saturday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 7 + THEN 'Sunday' + END AS dayname, + ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 0 + ) % 7 AS dayofweek +FROM main.sbtransaction +WHERE + DAY(CAST(sbtxdatetime AS TIMESTAMP)) > 1 + AND YEAR(CAST(sbtxdatetime AS TIMESTAMP)) < 2025 diff --git a/tests/test_sql_refsols/sql_transaction_week_sampler_saturday_one_trino.sql b/tests/test_sql_refsols/sql_transaction_week_sampler_saturday_one_trino.sql new file mode 100644 index 000000000..37301872d --- /dev/null +++ b/tests/test_sql_refsols/sql_transaction_week_sampler_saturday_one_trino.sql @@ -0,0 +1,71 @@ +SELECT + sbtxdatetime AS date_time, + DATE_TRUNC( + 'DAY', + DATE_ADD( + 'DAY', + ( + ( + ( + DAY_OF_WEEK(CAST(sbtxdatetime AS TIMESTAMP)) % 7 + ) + 2 + ) % 7 + ) * -1, + CAST(sbtxdatetime AS TIMESTAMP) + ) + ) AS sow, + CASE + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 1 + THEN 'Monday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 2 + THEN 'Tuesday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 3 + THEN 'Wednesday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 4 + THEN 'Thursday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 5 + THEN 'Friday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 6 + THEN 'Saturday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 7 + THEN 'Sunday' + END AS dayname, + ( + ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 2 + ) % 7 + ) + 1 AS dayofweek +FROM main.sbtransaction +WHERE + DAY(CAST(sbtxdatetime AS TIMESTAMP)) > 1 + AND YEAR(CAST(sbtxdatetime AS TIMESTAMP)) < 2025 diff --git a/tests/test_sql_refsols/sql_transaction_week_sampler_saturday_zero_trino.sql b/tests/test_sql_refsols/sql_transaction_week_sampler_saturday_zero_trino.sql new file mode 100644 index 000000000..1498150ea --- /dev/null +++ b/tests/test_sql_refsols/sql_transaction_week_sampler_saturday_zero_trino.sql @@ -0,0 +1,69 @@ +SELECT + sbtxdatetime AS date_time, + DATE_TRUNC( + 'DAY', + DATE_ADD( + 'DAY', + ( + ( + ( + DAY_OF_WEEK(CAST(sbtxdatetime AS TIMESTAMP)) % 7 + ) + 2 + ) % 7 + ) * -1, + CAST(sbtxdatetime AS TIMESTAMP) + ) + ) AS sow, + CASE + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 1 + THEN 'Monday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 2 + THEN 'Tuesday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 3 + THEN 'Wednesday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 4 + THEN 'Thursday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 5 + THEN 'Friday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 6 + THEN 'Saturday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 7 + THEN 'Sunday' + END AS dayname, + ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 2 + ) % 7 AS dayofweek +FROM main.sbtransaction +WHERE + DAY(CAST(sbtxdatetime AS TIMESTAMP)) > 1 + AND YEAR(CAST(sbtxdatetime AS TIMESTAMP)) < 2025 diff --git a/tests/test_sql_refsols/sql_transaction_week_sampler_sunday_one_trino.sql b/tests/test_sql_refsols/sql_transaction_week_sampler_sunday_one_trino.sql new file mode 100644 index 000000000..5e641fbc7 --- /dev/null +++ b/tests/test_sql_refsols/sql_transaction_week_sampler_sunday_one_trino.sql @@ -0,0 +1,65 @@ +SELECT + sbtxdatetime AS date_time, + DATE_TRUNC( + 'DAY', + DATE_ADD( + 'DAY', + CAST(( + ( + DAY_OF_WEEK(CAST(sbtxdatetime AS TIMESTAMP)) % 7 + ) + 1 + ) AS BIGINT) * -1, + CAST(sbtxdatetime AS TIMESTAMP) + ) + ) AS sow, + CASE + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 1 + THEN 'Monday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 2 + THEN 'Tuesday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 3 + THEN 'Wednesday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 4 + THEN 'Thursday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 5 + THEN 'Friday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 6 + THEN 'Saturday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 7 + THEN 'Sunday' + END AS dayname, + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 2 AS dayofweek +FROM main.sbtransaction +WHERE + DAY(CAST(sbtxdatetime AS TIMESTAMP)) > 1 + AND YEAR(CAST(sbtxdatetime AS TIMESTAMP)) < 2025 diff --git a/tests/test_sql_refsols/sql_transaction_week_sampler_sunday_zero_trino.sql b/tests/test_sql_refsols/sql_transaction_week_sampler_sunday_zero_trino.sql new file mode 100644 index 000000000..bdbabaede --- /dev/null +++ b/tests/test_sql_refsols/sql_transaction_week_sampler_sunday_zero_trino.sql @@ -0,0 +1,65 @@ +SELECT + sbtxdatetime AS date_time, + DATE_TRUNC( + 'DAY', + DATE_ADD( + 'DAY', + CAST(( + ( + DAY_OF_WEEK(CAST(sbtxdatetime AS TIMESTAMP)) % 7 + ) + 1 + ) AS BIGINT) * -1, + CAST(sbtxdatetime AS TIMESTAMP) + ) + ) AS sow, + CASE + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 1 + THEN 'Monday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 2 + THEN 'Tuesday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 3 + THEN 'Wednesday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 4 + THEN 'Thursday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 5 + THEN 'Friday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 6 + THEN 'Saturday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 7 + THEN 'Sunday' + END AS dayname, + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 AS dayofweek +FROM main.sbtransaction +WHERE + DAY(CAST(sbtxdatetime AS TIMESTAMP)) > 1 + AND YEAR(CAST(sbtxdatetime AS TIMESTAMP)) < 2025 diff --git a/tests/test_sql_refsols/sql_transaction_week_sampler_thursday_one_trino.sql b/tests/test_sql_refsols/sql_transaction_week_sampler_thursday_one_trino.sql new file mode 100644 index 000000000..81fbdecc7 --- /dev/null +++ b/tests/test_sql_refsols/sql_transaction_week_sampler_thursday_one_trino.sql @@ -0,0 +1,71 @@ +SELECT + sbtxdatetime AS date_time, + DATE_TRUNC( + 'DAY', + DATE_ADD( + 'DAY', + ( + ( + ( + DAY_OF_WEEK(CAST(sbtxdatetime AS TIMESTAMP)) % 7 + ) + 4 + ) % 7 + ) * -1, + CAST(sbtxdatetime AS TIMESTAMP) + ) + ) AS sow, + CASE + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 1 + THEN 'Monday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 2 + THEN 'Tuesday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 3 + THEN 'Wednesday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 4 + THEN 'Thursday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 5 + THEN 'Friday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 6 + THEN 'Saturday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 7 + THEN 'Sunday' + END AS dayname, + ( + ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 4 + ) % 7 + ) + 1 AS dayofweek +FROM main.sbtransaction +WHERE + DAY(CAST(sbtxdatetime AS TIMESTAMP)) > 1 + AND YEAR(CAST(sbtxdatetime AS TIMESTAMP)) < 2025 diff --git a/tests/test_sql_refsols/sql_transaction_week_sampler_thursday_zero_trino.sql b/tests/test_sql_refsols/sql_transaction_week_sampler_thursday_zero_trino.sql new file mode 100644 index 000000000..7e0644f21 --- /dev/null +++ b/tests/test_sql_refsols/sql_transaction_week_sampler_thursday_zero_trino.sql @@ -0,0 +1,69 @@ +SELECT + sbtxdatetime AS date_time, + DATE_TRUNC( + 'DAY', + DATE_ADD( + 'DAY', + ( + ( + ( + DAY_OF_WEEK(CAST(sbtxdatetime AS TIMESTAMP)) % 7 + ) + 4 + ) % 7 + ) * -1, + CAST(sbtxdatetime AS TIMESTAMP) + ) + ) AS sow, + CASE + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 1 + THEN 'Monday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 2 + THEN 'Tuesday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 3 + THEN 'Wednesday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 4 + THEN 'Thursday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 5 + THEN 'Friday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 6 + THEN 'Saturday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 7 + THEN 'Sunday' + END AS dayname, + ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 4 + ) % 7 AS dayofweek +FROM main.sbtransaction +WHERE + DAY(CAST(sbtxdatetime AS TIMESTAMP)) > 1 + AND YEAR(CAST(sbtxdatetime AS TIMESTAMP)) < 2025 diff --git a/tests/test_sql_refsols/sql_transaction_week_sampler_tuesday_one_trino.sql b/tests/test_sql_refsols/sql_transaction_week_sampler_tuesday_one_trino.sql new file mode 100644 index 000000000..085179bcb --- /dev/null +++ b/tests/test_sql_refsols/sql_transaction_week_sampler_tuesday_one_trino.sql @@ -0,0 +1,71 @@ +SELECT + sbtxdatetime AS date_time, + DATE_TRUNC( + 'DAY', + DATE_ADD( + 'DAY', + ( + ( + ( + DAY_OF_WEEK(CAST(sbtxdatetime AS TIMESTAMP)) % 7 + ) + 6 + ) % 7 + ) * -1, + CAST(sbtxdatetime AS TIMESTAMP) + ) + ) AS sow, + CASE + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 1 + THEN 'Monday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 2 + THEN 'Tuesday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 3 + THEN 'Wednesday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 4 + THEN 'Thursday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 5 + THEN 'Friday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 6 + THEN 'Saturday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 7 + THEN 'Sunday' + END AS dayname, + ( + ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 6 + ) % 7 + ) + 1 AS dayofweek +FROM main.sbtransaction +WHERE + DAY(CAST(sbtxdatetime AS TIMESTAMP)) > 1 + AND YEAR(CAST(sbtxdatetime AS TIMESTAMP)) < 2025 diff --git a/tests/test_sql_refsols/sql_transaction_week_sampler_tuesday_zero_trino.sql b/tests/test_sql_refsols/sql_transaction_week_sampler_tuesday_zero_trino.sql new file mode 100644 index 000000000..c34334750 --- /dev/null +++ b/tests/test_sql_refsols/sql_transaction_week_sampler_tuesday_zero_trino.sql @@ -0,0 +1,69 @@ +SELECT + sbtxdatetime AS date_time, + DATE_TRUNC( + 'DAY', + DATE_ADD( + 'DAY', + ( + ( + ( + DAY_OF_WEEK(CAST(sbtxdatetime AS TIMESTAMP)) % 7 + ) + 6 + ) % 7 + ) * -1, + CAST(sbtxdatetime AS TIMESTAMP) + ) + ) AS sow, + CASE + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 1 + THEN 'Monday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 2 + THEN 'Tuesday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 3 + THEN 'Wednesday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 4 + THEN 'Thursday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 5 + THEN 'Friday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 6 + THEN 'Saturday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 7 + THEN 'Sunday' + END AS dayname, + ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 6 + ) % 7 AS dayofweek +FROM main.sbtransaction +WHERE + DAY(CAST(sbtxdatetime AS TIMESTAMP)) > 1 + AND YEAR(CAST(sbtxdatetime AS TIMESTAMP)) < 2025 diff --git a/tests/test_sql_refsols/sql_transaction_week_sampler_wednesday_one_trino.sql b/tests/test_sql_refsols/sql_transaction_week_sampler_wednesday_one_trino.sql new file mode 100644 index 000000000..2bedc7d65 --- /dev/null +++ b/tests/test_sql_refsols/sql_transaction_week_sampler_wednesday_one_trino.sql @@ -0,0 +1,71 @@ +SELECT + sbtxdatetime AS date_time, + DATE_TRUNC( + 'DAY', + DATE_ADD( + 'DAY', + ( + ( + ( + DAY_OF_WEEK(CAST(sbtxdatetime AS TIMESTAMP)) % 7 + ) + 5 + ) % 7 + ) * -1, + CAST(sbtxdatetime AS TIMESTAMP) + ) + ) AS sow, + CASE + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 1 + THEN 'Monday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 2 + THEN 'Tuesday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 3 + THEN 'Wednesday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 4 + THEN 'Thursday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 5 + THEN 'Friday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 6 + THEN 'Saturday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 7 + THEN 'Sunday' + END AS dayname, + ( + ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 5 + ) % 7 + ) + 1 AS dayofweek +FROM main.sbtransaction +WHERE + DAY(CAST(sbtxdatetime AS TIMESTAMP)) > 1 + AND YEAR(CAST(sbtxdatetime AS TIMESTAMP)) < 2025 diff --git a/tests/test_sql_refsols/sql_transaction_week_sampler_wednesday_zero_trino.sql b/tests/test_sql_refsols/sql_transaction_week_sampler_wednesday_zero_trino.sql new file mode 100644 index 000000000..9fb01593f --- /dev/null +++ b/tests/test_sql_refsols/sql_transaction_week_sampler_wednesday_zero_trino.sql @@ -0,0 +1,69 @@ +SELECT + sbtxdatetime AS date_time, + DATE_TRUNC( + 'DAY', + DATE_ADD( + 'DAY', + ( + ( + ( + DAY_OF_WEEK(CAST(sbtxdatetime AS TIMESTAMP)) % 7 + ) + 5 + ) % 7 + ) * -1, + CAST(sbtxdatetime AS TIMESTAMP) + ) + ) AS sow, + CASE + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 1 + THEN 'Monday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 2 + THEN 'Tuesday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 3 + THEN 'Wednesday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 4 + THEN 'Thursday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 5 + THEN 'Friday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 6 + THEN 'Saturday' + WHEN ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 1 + ) = 7 + THEN 'Sunday' + END AS dayname, + ( + ( + DAY_OF_WEEK(sbtxdatetime) % 7 + ) + 5 + ) % 7 AS dayofweek +FROM main.sbtransaction +WHERE + DAY(CAST(sbtxdatetime AS TIMESTAMP)) > 1 + AND YEAR(CAST(sbtxdatetime AS TIMESTAMP)) < 2025 diff --git a/tests/test_sql_refsols/string_format_specifiers_trino.sql b/tests/test_sql_refsols/string_format_specifiers_trino.sql new file mode 100644 index 000000000..690637796 --- /dev/null +++ b/tests/test_sql_refsols/string_format_specifiers_trino.sql @@ -0,0 +1,26 @@ +SELECT + DATE_FORMAT(CAST('2023-07-15 14:30:45' AS TIMESTAMP), '%d') AS d1, + DATE_FORMAT('2023-07-15 14:30:45', '%e') AS d2, + DATE_FORMAT('2023-07-15 14:30:45', '%f') AS d3, + DATE_FORMAT('2023-07-15 14:30:45', '%F') AS d4, + DATE_FORMAT(CAST('2023-07-15 14:30:45' AS TIMESTAMP), '%H') AS d5, + DATE_FORMAT(CAST('2023-07-15 14:30:45' AS TIMESTAMP), '%h') AS d6, + DATE_FORMAT(CAST('2023-07-15 14:30:45' AS TIMESTAMP), '%j') AS d7, + DATE_FORMAT(CAST('2023-07-15 14:30:45' AS TIMESTAMP), '%J') AS d8, + DATE_FORMAT(CAST('2023-07-15 14:30:45' AS TIMESTAMP), '%k') AS d9, + DATE_FORMAT(CAST('2023-07-15 14:30:45' AS TIMESTAMP), '%l') AS d10, + DATE_FORMAT(CAST('2023-07-15 14:30:45' AS TIMESTAMP), '%m') AS d11, + DATE_FORMAT(CAST('2023-07-15 14:30:45' AS TIMESTAMP), '%i') AS d12, + DATE_FORMAT(CAST('2023-07-15 14:30:45' AS TIMESTAMP), '%p') AS d13, + DATE_FORMAT(CAST('2023-07-15 14:30:45' AS TIMESTAMP), '%P') AS d14, + DATE_FORMAT(CAST('2023-07-15 14:30:45' AS TIMESTAMP), '%R') AS d15, + DATE_FORMAT(CAST('2023-07-15 14:30:45' AS TIMESTAMP), '%s') AS d16, + DATE_FORMAT(CAST('2023-07-15 14:30:45' AS TIMESTAMP), '%s') AS d17, + DATE_FORMAT(CAST('2023-07-15 14:30:45' AS TIMESTAMP), '%T') AS d18, + DATE_FORMAT(CAST('2023-07-15 14:30:45' AS TIMESTAMP), '%u') AS d19, + DATE_FORMAT(CAST('2023-07-15 14:30:45' AS TIMESTAMP), '%w') AS d20, + DATE_FORMAT(CAST('2023-07-15 14:30:45' AS TIMESTAMP), '%u') AS d21, + DATE_FORMAT(CAST('2023-07-15 14:30:45' AS TIMESTAMP), '%Y') AS d22, + DATE_FORMAT(CAST('2023-07-15 14:30:45' AS TIMESTAMP), '%m-%d-%Y') AS d23 +FROM (VALUES + (NULL)) AS _q_0(_col_0) diff --git a/tests/test_sql_refsols/string_functions_trino.sql b/tests/test_sql_refsols/string_functions_trino.sql new file mode 100644 index 000000000..2ef06e426 --- /dev/null +++ b/tests/test_sql_refsols/string_functions_trino.sql @@ -0,0 +1,27 @@ +SELECT + LOWER(customer.c_name) AS lowercase_name, + UPPER(customer.c_name) AS uppercase_name, + LENGTH(customer.c_name) AS name_length, + STARTS_WITH(customer.c_name, 'A') AS starts_with_A, + customer.c_name LIKE '%z' AS ends_with_z, + customer.c_name LIKE '%sub%' AS contains_sub, + customer.c_name LIKE '%test%' AS matches_like, + CONCAT_WS('::', customer.c_name, nation.n_name) AS joined_string, + LPAD(customer.c_name, 20, '*') AS lpad_name, + RPAD(customer.c_name, 20, '-') AS rpad_name, + TRIM(' + ' FROM customer.c_name) AS stripped, + TRIM('aeiou' FROM customer.c_name) AS stripped_vowels, + REPLACE(customer.c_name, 'Corp', 'Inc') AS replaced_name, + REPLACE(customer.c_name, 'Ltd', '') AS removed_substr, + CASE + WHEN LENGTH('e') = 0 + THEN 0 + ELSE CAST(CAST(( + LENGTH(customer.c_name) - LENGTH(REPLACE(customer.c_name, 'e', '')) + ) AS DOUBLE) / LENGTH('e') AS BIGINT) + END AS count_e, + STRPOS(customer.c_name, 'Alex') - 1 AS idx_Alex +FROM tpch.customer AS customer +JOIN tpch.nation AS nation + ON customer.c_nationkey = nation.n_nationkey diff --git a/tests/test_sql_refsols/supplier_best_part_trino.sql b/tests/test_sql_refsols/supplier_best_part_trino.sql new file mode 100644 index 000000000..495539188 --- /dev/null +++ b/tests/test_sql_refsols/supplier_best_part_trino.sql @@ -0,0 +1,37 @@ +WITH _s2 AS ( + SELECT + l_partkey, + l_suppkey, + COUNT(*) AS n_rows, + SUM(l_quantity) AS sum_l_quantity + FROM tpch.lineitem + WHERE + YEAR(CAST(l_shipdate AS TIMESTAMP)) = 1994 AND l_tax = 0 + GROUP BY + 1, + 2 +), _t AS ( + SELECT + _s2.l_suppkey, + _s2.n_rows, + part.p_name, + _s2.sum_l_quantity, + ROW_NUMBER() OVER (PARTITION BY _s2.l_suppkey ORDER BY COALESCE(_s2.sum_l_quantity, 0) DESC NULLS FIRST) AS _w + FROM _s2 AS _s2 + JOIN tpch.part AS part + ON _s2.l_partkey = part.p_partkey +) +SELECT + supplier.s_name AS supplier_name, + _t.p_name AS part_name, + COALESCE(_t.sum_l_quantity, 0) AS total_quantity, + _t.n_rows AS n_shipments +FROM tpch.supplier AS supplier +JOIN tpch.nation AS nation + ON nation.n_name = 'FRANCE' AND nation.n_nationkey = supplier.s_nationkey +JOIN _t AS _t + ON _t._w = 1 AND _t.l_suppkey = supplier.s_suppkey +ORDER BY + 3 DESC, + 1 NULLS FIRST +LIMIT 3 diff --git a/tests/test_sql_refsols/supplier_pct_national_qty_trino.sql b/tests/test_sql_refsols/supplier_pct_national_qty_trino.sql new file mode 100644 index 000000000..be172adc7 --- /dev/null +++ b/tests/test_sql_refsols/supplier_pct_national_qty_trino.sql @@ -0,0 +1,46 @@ +WITH _s7 AS ( + SELECT + lineitem.l_quantity, + lineitem.l_suppkey + FROM tpch.lineitem AS lineitem + JOIN tpch.part AS part + ON STARTS_WITH(part.p_container, 'LG') + AND lineitem.l_partkey = part.p_partkey + AND part.p_name LIKE '%tomato%' + WHERE + MONTH(CAST(lineitem.l_shipdate AS TIMESTAMP)) < 7 + AND YEAR(CAST(lineitem.l_shipdate AS TIMESTAMP)) = 1995 + AND lineitem.l_shipmode = 'SHIP' +), _t0 AS ( + SELECT + ARBITRARY(nation.n_name) AS anything_n_name, + ARBITRARY(supplier.s_name) AS anything_s_name, + ARBITRARY(supplier.s_nationkey) AS anything_s_nationkey, + SUM(_s7.l_quantity) AS sum_l_quantity + FROM tpch.nation AS nation + JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'AFRICA' + JOIN tpch.supplier AS supplier + ON nation.n_nationkey = supplier.s_nationkey + AND supplier.s_acctbal >= 8000.0 + AND supplier.s_comment LIKE '%careful%' + LEFT JOIN _s7 AS _s7 + ON _s7.l_suppkey = supplier.s_suppkey + GROUP BY + supplier.s_suppkey +) +SELECT + anything_s_name AS supplier_name, + anything_n_name AS nation_name, + COALESCE(sum_l_quantity, 0) AS supplier_quantity, + ( + 100.0 * COALESCE(sum_l_quantity, 0) + ) / CASE + WHEN SUM(COALESCE(sum_l_quantity, 0)) OVER (PARTITION BY anything_s_nationkey) > 0 + THEN SUM(COALESCE(sum_l_quantity, 0)) OVER (PARTITION BY anything_s_nationkey) + ELSE NULL + END AS national_qty_pct +FROM _t0 +ORDER BY + 4 DESC +LIMIT 5 diff --git a/tests/test_sql_refsols/suppliers_bal_diffs_trino.sql b/tests/test_sql_refsols/suppliers_bal_diffs_trino.sql new file mode 100644 index 000000000..09958daca --- /dev/null +++ b/tests/test_sql_refsols/suppliers_bal_diffs_trino.sql @@ -0,0 +1,12 @@ +SELECT + supplier.s_name AS name, + region.r_name AS region_name, + supplier.s_acctbal - LAG(supplier.s_acctbal, 1) OVER (PARTITION BY nation.n_regionkey ORDER BY supplier.s_acctbal) AS acctbal_delta +FROM tpch.region AS region +JOIN tpch.nation AS nation + ON nation.n_regionkey = region.r_regionkey +JOIN tpch.supplier AS supplier + ON nation.n_nationkey = supplier.s_nationkey +ORDER BY + 3 DESC +LIMIT 5 diff --git a/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_trino.sql b/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_trino.sql new file mode 100644 index 000000000..27c572c49 --- /dev/null +++ b/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_trino.sql @@ -0,0 +1,29 @@ +WITH _s7 AS ( + SELECT + incidents.in_device_id, + COUNT(*) AS n_rows + FROM main.incidents AS incidents + JOIN main.errors AS errors + ON errors.er_id = incidents.in_error_id AND errors.er_name = 'Battery Failure' + GROUP BY + 1 +) +SELECT + countries.co_name AS country_name, + products.pr_name AS product_name, + ROUND(CAST(COALESCE(SUM(_s7.n_rows), 0) AS DOUBLE) / COUNT(*), 2) AS ir +FROM main.countries AS countries +JOIN main.devices AS devices + ON countries.co_id = devices.de_production_country_id +JOIN main.products AS products + ON devices.de_product_id = products.pr_id +LEFT JOIN _s7 AS _s7 + ON _s7.in_device_id = devices.de_id +GROUP BY + 1, + 2 +ORDER BY + 3 DESC, + 2 NULLS FIRST, + 1 NULLS FIRST +LIMIT 5 diff --git a/tests/test_sql_refsols/technograph_country_cartesian_oddball_trino.sql b/tests/test_sql_refsols/technograph_country_cartesian_oddball_trino.sql new file mode 100644 index 000000000..ad81fe364 --- /dev/null +++ b/tests/test_sql_refsols/technograph_country_cartesian_oddball_trino.sql @@ -0,0 +1,12 @@ +WITH _s1 AS ( + SELECT + COUNT(*) AS n_rows + FROM main.countries +) +SELECT + countries.co_name AS name, + _s1.n_rows AS n_other_countries +FROM main.countries AS countries +CROSS JOIN _s1 AS _s1 +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/technograph_country_combination_analysis_trino.sql b/tests/test_sql_refsols/technograph_country_combination_analysis_trino.sql new file mode 100644 index 000000000..dc3aee9b4 --- /dev/null +++ b/tests/test_sql_refsols/technograph_country_combination_analysis_trino.sql @@ -0,0 +1,45 @@ +WITH _s0 AS ( + SELECT + co_id, + co_name + FROM main.countries +), _s2 AS ( + SELECT + co_id + FROM main.countries +), _t1 AS ( + SELECT + ARBITRARY(_s3.co_id) AS anything__id_3, + ARBITRARY(_s2.co_id) AS anything_co_id, + COUNT(incidents.in_device_id) AS count_in_device_id + FROM _s2 AS _s2 + CROSS JOIN _s2 AS _s3 + JOIN main.devices AS devices + ON _s2.co_id = devices.de_production_country_id + AND _s3.co_id = devices.de_purchase_country_id + LEFT JOIN main.incidents AS incidents + ON devices.de_id = incidents.in_device_id + GROUP BY + devices.de_id +), _s9 AS ( + SELECT + anything__id_3, + anything_co_id, + COUNT(*) AS n_rows, + SUM(count_in_device_id) AS sum_n_rows + FROM _t1 + GROUP BY + 1, + 2 +) +SELECT + _s0.co_name AS factory_country, + _s1.co_name AS purchase_country, + ROUND(CAST(COALESCE(_s9.sum_n_rows, 0) AS DOUBLE) / COALESCE(_s9.n_rows, 0), 2) AS ir +FROM _s0 AS _s0 +CROSS JOIN _s0 AS _s1 +LEFT JOIN _s9 AS _s9 + ON _s0.co_id = _s9.anything_co_id AND _s1.co_id = _s9.anything__id_3 +ORDER BY + 3 DESC +LIMIT 5 diff --git a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_trino.sql b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_trino.sql new file mode 100644 index 000000000..7f7d779fd --- /dev/null +++ b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_trino.sql @@ -0,0 +1,72 @@ +WITH _t2 AS ( + SELECT + in_device_id + FROM main.incidents +), _s1 AS ( + SELECT + in_device_id, + COUNT(*) AS n_rows + FROM _t2 + GROUP BY + 1 +), _s3 AS ( + SELECT + devices.de_production_country_id, + COUNT(*) AS n_rows, + SUM(_s1.n_rows) AS sum_n_rows + FROM main.devices AS devices + LEFT JOIN _s1 AS _s1 + ON _s1.in_device_id = devices.de_id + GROUP BY + 1 +), _s5 AS ( + SELECT + in_device_id, + COUNT(*) AS n_rows + FROM _t2 + GROUP BY + 1 +), _s7 AS ( + SELECT + devices.de_purchase_country_id, + COUNT(*) AS n_rows, + SUM(_s5.n_rows) AS sum_n_rows + FROM main.devices AS devices + LEFT JOIN _s5 AS _s5 + ON _s5.in_device_id = devices.de_id + GROUP BY + 1 +), _t5 AS ( + SELECT + ARBITRARY(users.us_country_id) AS anything_us_country_id, + COUNT(_s11.in_device_id) AS count_in_device_id + FROM main.users AS users + JOIN main.devices AS devices + ON devices.de_owner_id = users.us_id + LEFT JOIN _t2 AS _s11 + ON _s11.in_device_id = devices.de_id + GROUP BY + devices.de_id +), _s13 AS ( + SELECT + anything_us_country_id, + COUNT(*) AS n_rows, + SUM(count_in_device_id) AS sum_n_rows + FROM _t5 + GROUP BY + 1 +) +SELECT + countries.co_name AS country_name, + ROUND(CAST(COALESCE(_s3.sum_n_rows, 0) AS DOUBLE) / _s3.n_rows, 2) AS made_ir, + ROUND(CAST(COALESCE(_s7.sum_n_rows, 0) AS DOUBLE) / _s7.n_rows, 2) AS sold_ir, + ROUND(CAST(COALESCE(_s13.sum_n_rows, 0) AS DOUBLE) / COALESCE(_s13.n_rows, 0), 2) AS user_ir +FROM main.countries AS countries +JOIN _s3 AS _s3 + ON _s3.de_production_country_id = countries.co_id +JOIN _s7 AS _s7 + ON _s7.de_purchase_country_id = countries.co_id +LEFT JOIN _s13 AS _s13 + ON _s13.anything_us_country_id = countries.co_id +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/technograph_error_percentages_sun_set_by_error_trino.sql b/tests/test_sql_refsols/technograph_error_percentages_sun_set_by_error_trino.sql new file mode 100644 index 000000000..c400ac716 --- /dev/null +++ b/tests/test_sql_refsols/technograph_error_percentages_sun_set_by_error_trino.sql @@ -0,0 +1,22 @@ +WITH _s5 AS ( + SELECT + incidents.in_error_id, + COUNT(*) AS n_rows + FROM main.incidents AS incidents + JOIN main.devices AS devices + ON devices.de_id = incidents.in_device_id + JOIN main.products AS products + ON devices.de_product_id = products.pr_id AND products.pr_name = 'Sun-Set' + GROUP BY + 1 +) +SELECT + errors.er_name AS error, + ROUND(( + 100.0 * COALESCE(_s5.n_rows, 0) + ) / SUM(COALESCE(_s5.n_rows, 0)) OVER (), 2) AS pct +FROM main.errors AS errors +LEFT JOIN _s5 AS _s5 + ON _s5.in_error_id = errors.er_id +ORDER BY + 2 DESC diff --git a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_trino.sql b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_trino.sql new file mode 100644 index 000000000..7c1de6b5a --- /dev/null +++ b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_trino.sql @@ -0,0 +1,28 @@ +WITH _t2 AS ( + SELECT + ARBITRARY(devices.de_production_country_id) AS anything_de_production_country_id, + COUNT(incidents.in_device_id) AS count_in_device_id + FROM main.devices AS devices + JOIN main.products AS products + ON devices.de_product_id = products.pr_id AND products.pr_name = 'Sun-Set' + LEFT JOIN main.incidents AS incidents + ON devices.de_id = incidents.in_device_id + GROUP BY + devices.de_id +), _s5 AS ( + SELECT + COALESCE(SUM(count_in_device_id), 0) AS sum_n_incidents, + anything_de_production_country_id, + COUNT(*) AS n_rows + FROM _t2 + GROUP BY + 2 +) +SELECT + countries.co_name AS country, + ROUND(CAST(COALESCE(_s5.sum_n_incidents, 0) AS DOUBLE) / COALESCE(_s5.n_rows, 0), 2) AS ir +FROM main.countries AS countries +LEFT JOIN _s5 AS _s5 + ON _s5.anything_de_production_country_id = countries.co_id +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/technograph_global_incident_rate_trino.sql b/tests/test_sql_refsols/technograph_global_incident_rate_trino.sql new file mode 100644 index 000000000..71d7e722d --- /dev/null +++ b/tests/test_sql_refsols/technograph_global_incident_rate_trino.sql @@ -0,0 +1,13 @@ +WITH _s0 AS ( + SELECT + COUNT(*) AS n_rows + FROM main.incidents +), _s1 AS ( + SELECT + COUNT(*) AS n_rows + FROM main.devices +) +SELECT + ROUND(CAST(_s0.n_rows AS DOUBLE) / _s1.n_rows, 2) AS ir +FROM _s0 AS _s0 +CROSS JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/technograph_hot_purchase_window_trino.sql b/tests/test_sql_refsols/technograph_hot_purchase_window_trino.sql new file mode 100644 index 000000000..105771c33 --- /dev/null +++ b/tests/test_sql_refsols/technograph_hot_purchase_window_trino.sql @@ -0,0 +1,17 @@ +SELECT + calendar.ca_dt AS start_of_period, + COUNT(*) AS n_purchases +FROM main.calendar AS calendar +JOIN main.calendar AS calendar_2 + ON calendar.ca_dt <= calendar_2.ca_dt + AND calendar_2.ca_dt < DATE_ADD('DAY', 5, CAST(calendar.ca_dt AS TIMESTAMP)) +JOIN main.devices AS devices + ON calendar_2.ca_dt = DATE_TRUNC('DAY', CAST(devices.de_purchase_ts AS TIMESTAMP)) +WHERE + YEAR(CAST(calendar.ca_dt AS TIMESTAMP)) = 2024 +GROUP BY + 1 +ORDER BY + 2 DESC, + 1 NULLS FIRST +LIMIT 1 diff --git a/tests/test_sql_refsols/technograph_incident_rate_by_release_year_trino.sql b/tests/test_sql_refsols/technograph_incident_rate_by_release_year_trino.sql new file mode 100644 index 000000000..5193cd4cd --- /dev/null +++ b/tests/test_sql_refsols/technograph_incident_rate_by_release_year_trino.sql @@ -0,0 +1,41 @@ +WITH _s0 AS ( + SELECT + de_product_id, + COUNT(*) AS n_rows + FROM main.devices + GROUP BY + 1 +), _s1 AS ( + SELECT + pr_id, + pr_release + FROM main.products +), _s6 AS ( + SELECT + YEAR(CAST(_s1.pr_release AS TIMESTAMP)) AS year_pr_release, + SUM(_s0.n_rows) AS sum_n_rows + FROM _s0 AS _s0 + JOIN _s1 AS _s1 + ON _s0.de_product_id = _s1.pr_id + GROUP BY + 1 +), _s7 AS ( + SELECT + YEAR(CAST(_s3.pr_release AS TIMESTAMP)) AS year_pr_release, + COUNT(*) AS n_rows + FROM main.devices AS devices + JOIN _s1 AS _s3 + ON _s3.pr_id = devices.de_product_id + JOIN main.incidents AS incidents + ON devices.de_id = incidents.in_device_id + GROUP BY + 1 +) +SELECT + _s6.year_pr_release AS year, + ROUND(CAST(COALESCE(_s7.n_rows, 0) AS DOUBLE) / _s6.sum_n_rows, 2) AS ir +FROM _s6 AS _s6 +LEFT JOIN _s7 AS _s7 + ON _s6.year_pr_release = _s7.year_pr_release +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/technograph_incident_rate_per_brand_trino.sql b/tests/test_sql_refsols/technograph_incident_rate_per_brand_trino.sql new file mode 100644 index 000000000..bd1d6e660 --- /dev/null +++ b/tests/test_sql_refsols/technograph_incident_rate_per_brand_trino.sql @@ -0,0 +1,20 @@ +WITH _s3 AS ( + SELECT + in_device_id, + COUNT(*) AS n_rows + FROM main.incidents + GROUP BY + 1 +) +SELECT + products.pr_brand AS brand, + ROUND(CAST(COALESCE(SUM(_s3.n_rows), 0) AS DOUBLE) / COUNT(*), 2) AS ir +FROM main.devices AS devices +JOIN main.products AS products + ON devices.de_product_id = products.pr_id +LEFT JOIN _s3 AS _s3 + ON _s3.in_device_id = devices.de_id +GROUP BY + 1 +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/technograph_monthly_incident_rate_trino.sql b/tests/test_sql_refsols/technograph_monthly_incident_rate_trino.sql new file mode 100644 index 000000000..c20cf45e1 --- /dev/null +++ b/tests/test_sql_refsols/technograph_monthly_incident_rate_trino.sql @@ -0,0 +1,62 @@ +WITH _t2 AS ( + SELECT + ca_dt + FROM main.calendar + WHERE + YEAR(CAST(ca_dt AS TIMESTAMP)) IN (2020, 2021) +), _t5 AS ( + SELECT + co_id, + co_name + FROM main.countries + WHERE + co_name = 'CN' +), _s7 AS ( + SELECT + _t4.ca_dt, + COUNT(*) AS n_rows + FROM _t2 AS _t4 + JOIN main.calendar AS calendar + ON calendar.ca_dt >= DATE_ADD('MONTH', -6, CAST(_t4.ca_dt AS TIMESTAMP)) + JOIN main.devices AS devices + ON calendar.ca_dt = DATE_TRUNC('DAY', CAST(devices.de_purchase_ts AS TIMESTAMP)) + JOIN _t5 AS _t5 + ON _t5.co_id = devices.de_production_country_id + GROUP BY + 1 +), _s15 AS ( + SELECT + _t7.ca_dt, + COUNT(*) AS n_rows + FROM _t2 AS _t7 + JOIN main.incidents AS incidents + ON _t7.ca_dt = DATE_TRUNC('DAY', CAST(incidents.in_error_report_ts AS TIMESTAMP)) + JOIN main.devices AS devices + ON devices.de_id = incidents.in_device_id + JOIN _t5 AS _t8 + ON _t8.co_id = devices.de_production_country_id + GROUP BY + 1 +), _t0 AS ( + SELECT + MONTH(CAST(_t2.ca_dt AS TIMESTAMP)) AS month_ca_dt, + YEAR(CAST(_t2.ca_dt AS TIMESTAMP)) AS year_ca_dt, + SUM(_s7.n_rows) AS sum_expr_3, + SUM(_s15.n_rows) AS sum_n_rows + FROM _t2 AS _t2 + LEFT JOIN _s7 AS _s7 + ON _s7.ca_dt = _t2.ca_dt + LEFT JOIN _s15 AS _s15 + ON _s15.ca_dt = _t2.ca_dt + GROUP BY + 1, + 2 +) +SELECT + CONCAT_WS('-', year_ca_dt, LPAD(month_ca_dt, 2, '0')) AS month, + ROUND(( + 1000000.0 * COALESCE(sum_n_rows, 0) + ) / COALESCE(sum_expr_3, 0), 2) AS ir +FROM _t0 +ORDER BY + month_ca_dt NULLS FIRST diff --git a/tests/test_sql_refsols/technograph_most_unreliable_products_trino.sql b/tests/test_sql_refsols/technograph_most_unreliable_products_trino.sql new file mode 100644 index 000000000..ace57b7f6 --- /dev/null +++ b/tests/test_sql_refsols/technograph_most_unreliable_products_trino.sql @@ -0,0 +1,29 @@ +WITH _s1 AS ( + SELECT + in_device_id, + COUNT(*) AS n_rows + FROM main.incidents + GROUP BY + 1 +), _s3 AS ( + SELECT + devices.de_product_id, + COUNT(*) AS n_rows, + SUM(_s1.n_rows) AS sum_n_rows + FROM main.devices AS devices + LEFT JOIN _s1 AS _s1 + ON _s1.in_device_id = devices.de_id + GROUP BY + 1 +) +SELECT + products.pr_name AS product, + products.pr_brand AS product_brand, + products.pr_type AS product_type, + ROUND(CAST(COALESCE(_s3.sum_n_rows, 0) AS DOUBLE) / _s3.n_rows, 2) AS ir +FROM main.products AS products +JOIN _s3 AS _s3 + ON _s3.de_product_id = products.pr_id +ORDER BY + 4 DESC +LIMIT 5 diff --git a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_trino.sql b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_trino.sql new file mode 100644 index 000000000..a8e8fef4d --- /dev/null +++ b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_trino.sql @@ -0,0 +1,83 @@ +WITH _s14 AS ( + SELECT + ARBITRARY(pr_release) AS anything_pr_release + FROM main.products + WHERE + pr_name = 'GoldCopper-Star' +), _s6 AS ( + SELECT + ca_dt + FROM main.calendar +), _t5 AS ( + SELECT + pr_id, + pr_name + FROM main.products + WHERE + pr_name = 'GoldCopper-Star' +), _s7 AS ( + SELECT + _s0.ca_dt, + COUNT(*) AS n_rows + FROM _s6 AS _s0 + JOIN main.incidents AS incidents + ON _s0.ca_dt = DATE_TRUNC('DAY', CAST(incidents.in_error_report_ts AS TIMESTAMP)) + JOIN main.devices AS devices + ON devices.de_id = incidents.in_device_id + JOIN _t5 AS _t5 + ON _t5.pr_id = devices.de_product_id + GROUP BY + 1 +), _s13 AS ( + SELECT + _s8.ca_dt, + COUNT(*) AS n_rows + FROM _s6 AS _s8 + JOIN main.devices AS devices + ON _s8.ca_dt = DATE_TRUNC('DAY', CAST(devices.de_purchase_ts AS TIMESTAMP)) + JOIN _t5 AS _t7 + ON _t7.pr_id = devices.de_product_id + GROUP BY + 1 +), _s15 AS ( + SELECT + YEAR(CAST(_s6.ca_dt AS TIMESTAMP)) AS year_ca_dt, + SUM(_s7.n_rows) AS sum_expr_4, + SUM(_s13.n_rows) AS sum_n_rows + FROM _s6 AS _s6 + LEFT JOIN _s7 AS _s7 + ON _s6.ca_dt = _s7.ca_dt + LEFT JOIN _s13 AS _s13 + ON _s13.ca_dt = _s6.ca_dt + GROUP BY + 1 +) +SELECT + _s15.year_ca_dt - YEAR(CAST(_s14.anything_pr_release AS TIMESTAMP)) AS years_since_release, + ROUND( + CAST(SUM(COALESCE(_s15.sum_expr_4, 0)) OVER (ORDER BY _s15.year_ca_dt ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS DOUBLE) / SUM(COALESCE(_s15.sum_n_rows, 0)) OVER (ORDER BY _s15.year_ca_dt ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), + 2 + ) AS cum_ir, + ROUND( + CAST(( + 100.0 * ( + COALESCE(_s15.sum_n_rows, 0) - LAG(COALESCE(_s15.sum_n_rows, 0), 1) OVER (ORDER BY _s15.year_ca_dt) + ) + ) AS DOUBLE) / LAG(COALESCE(_s15.sum_n_rows, 0), 1) OVER (ORDER BY _s15.year_ca_dt), + 2 + ) AS pct_bought_change, + ROUND( + CAST(( + 100.0 * ( + COALESCE(_s15.sum_expr_4, 0) - LAG(COALESCE(_s15.sum_expr_4, 0), 1) OVER (ORDER BY _s15.year_ca_dt) + ) + ) AS DOUBLE) / LAG(COALESCE(_s15.sum_expr_4, 0), 1) OVER (ORDER BY _s15.year_ca_dt), + 2 + ) AS pct_incident_change, + COALESCE(_s15.sum_n_rows, 0) AS bought, + COALESCE(_s15.sum_expr_4, 0) AS incidents +FROM _s14 AS _s14 +JOIN _s15 AS _s15 + ON _s15.year_ca_dt >= YEAR(CAST(_s14.anything_pr_release AS TIMESTAMP)) +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_trino.sql b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_trino.sql new file mode 100644 index 000000000..af69e7242 --- /dev/null +++ b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_trino.sql @@ -0,0 +1,64 @@ +WITH _s2 AS ( + SELECT + ca_dt + FROM main.calendar +), _s3 AS ( + SELECT + _s0.ca_dt, + COUNT(*) AS n_rows + FROM _s2 AS _s0 + JOIN main.devices AS devices + ON _s0.ca_dt = DATE_TRUNC('DAY', CAST(devices.de_purchase_ts AS TIMESTAMP)) + GROUP BY + 1 +), _s7 AS ( + SELECT + _s4.ca_dt, + COUNT(*) AS n_rows + FROM _s2 AS _s4 + JOIN main.incidents AS incidents + ON _s4.ca_dt = DATE_TRUNC('DAY', CAST(incidents.in_error_report_ts AS TIMESTAMP)) + GROUP BY + 1 +), _t1 AS ( + SELECT + YEAR(CAST(_s2.ca_dt AS TIMESTAMP)) AS year_ca_dt, + SUM(_s3.n_rows) AS sum_expr_3, + SUM(_s7.n_rows) AS sum_n_rows + FROM _s2 AS _s2 + LEFT JOIN _s3 AS _s3 + ON _s2.ca_dt = _s3.ca_dt + LEFT JOIN _s7 AS _s7 + ON _s2.ca_dt = _s7.ca_dt + GROUP BY + 1 +) +SELECT + year_ca_dt AS yr, + ROUND( + CAST(SUM(COALESCE(sum_n_rows, 0)) OVER (ORDER BY year_ca_dt ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS DOUBLE) / SUM(sum_expr_3) OVER (ORDER BY year_ca_dt ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), + 2 + ) AS cum_ir, + ROUND( + CAST(( + 100.0 * ( + sum_expr_3 - LAG(sum_expr_3, 1) OVER (ORDER BY year_ca_dt) + ) + ) AS DOUBLE) / LAG(sum_expr_3, 1) OVER (ORDER BY year_ca_dt), + 2 + ) AS pct_bought_change, + ROUND( + CAST(( + 100.0 * ( + COALESCE(sum_n_rows, 0) - LAG(COALESCE(sum_n_rows, 0), 1) OVER (ORDER BY year_ca_dt) + ) + ) AS DOUBLE) / LAG(COALESCE(sum_n_rows, 0), 1) OVER (ORDER BY year_ca_dt), + 2 + ) AS pct_incident_change, + sum_expr_3 AS bought, + COALESCE(sum_n_rows, 0) AS incidents +FROM _t1 +WHERE + NOT sum_expr_3 IS NULL AND sum_expr_3 <> 0 +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/time_threshold_reached_trino.sql b/tests/test_sql_refsols/time_threshold_reached_trino.sql new file mode 100644 index 000000000..a3eba676e --- /dev/null +++ b/tests/test_sql_refsols/time_threshold_reached_trino.sql @@ -0,0 +1,24 @@ +WITH _t3 AS ( + SELECT + sbtxdatetime, + CAST(( + 100.0 * SUM(sbtxshares) OVER (PARTITION BY DATE_TRUNC('DAY', CAST(sbtxdatetime AS TIMESTAMP)) ORDER BY sbtxdatetime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) + ) AS DOUBLE) / SUM(sbtxshares) OVER (PARTITION BY DATE_TRUNC('DAY', CAST(sbtxdatetime AS TIMESTAMP))) AS pct_of_day + FROM main.sbtransaction + WHERE + YEAR(CAST(sbtxdatetime AS TIMESTAMP)) = 2023 +), _t AS ( + SELECT + sbtxdatetime, + ROW_NUMBER() OVER (PARTITION BY DATE_TRUNC('DAY', CAST(sbtxdatetime AS TIMESTAMP)) ORDER BY pct_of_day) AS _w + FROM _t3 + WHERE + pct_of_day >= 50.0 +) +SELECT + sbtxdatetime AS date_time +FROM _t +WHERE + _w = 1 +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/top_customers_by_orders_trino.sql b/tests/test_sql_refsols/top_customers_by_orders_trino.sql new file mode 100644 index 000000000..8b77e2eb2 --- /dev/null +++ b/tests/test_sql_refsols/top_customers_by_orders_trino.sql @@ -0,0 +1,18 @@ +WITH _s1 AS ( + SELECT + o_custkey, + COUNT(*) AS n_rows + FROM tpch.orders + GROUP BY + 1 +) +SELECT + customer.c_custkey AS customer_key, + COALESCE(_s1.n_rows, 0) AS n_orders +FROM tpch.customer AS customer +LEFT JOIN _s1 AS _s1 + ON _s1.o_custkey = customer.c_custkey +ORDER BY + 2 DESC, + 1 NULLS FIRST +LIMIT 5 diff --git a/tests/test_sql_refsols/top_lineitems_info_1_trino.sql b/tests/test_sql_refsols/top_lineitems_info_1_trino.sql new file mode 100644 index 000000000..be7bee5d1 --- /dev/null +++ b/tests/test_sql_refsols/top_lineitems_info_1_trino.sql @@ -0,0 +1,34 @@ +WITH _s2 AS ( + SELECT + l_linenumber, + l_orderkey, + l_partkey, + l_suppkey + FROM tpch.lineitem + ORDER BY + 2 NULLS FIRST, + 1 NULLS FIRST + LIMIT 7 +), _s0 AS ( + SELECT + ps_partkey, + ps_suppkey + FROM tpch.partsupp +) +SELECT + _s2.l_orderkey AS order_key, + _s2.l_linenumber AS line_number, + part.p_size AS part_size, + supplier.s_nationkey AS supplier_nation +FROM _s2 AS _s2 +JOIN _s0 AS _s0 + ON _s0.ps_partkey = _s2.l_partkey AND _s0.ps_suppkey = _s2.l_suppkey +JOIN tpch.part AS part + ON _s0.ps_partkey = part.p_partkey +JOIN _s0 AS _s4 + ON _s2.l_partkey = _s4.ps_partkey AND _s2.l_suppkey = _s4.ps_suppkey +JOIN tpch.supplier AS supplier + ON _s4.ps_suppkey = supplier.s_suppkey +ORDER BY + 1 NULLS FIRST, + 2 NULLS FIRST diff --git a/tests/test_sql_refsols/top_lineitems_info_2_trino.sql b/tests/test_sql_refsols/top_lineitems_info_2_trino.sql new file mode 100644 index 000000000..bfa83b7ee --- /dev/null +++ b/tests/test_sql_refsols/top_lineitems_info_2_trino.sql @@ -0,0 +1,28 @@ +WITH _s1 AS ( + SELECT + ps_partkey, + ps_suppkey + FROM tpch.partsupp +) +SELECT + lineitem.l_orderkey AS order_key, + lineitem.l_linenumber AS line_number, + part.p_size AS part_size, + nation.n_nationkey AS supplier_nation +FROM tpch.part AS part +JOIN _s1 AS _s1 + ON _s1.ps_partkey = part.p_partkey +CROSS JOIN tpch.nation AS nation +JOIN tpch.supplier AS supplier + ON nation.n_nationkey = supplier.s_nationkey +JOIN _s1 AS _s7 + ON _s7.ps_suppkey = supplier.s_suppkey +JOIN tpch.lineitem AS lineitem + ON _s1.ps_suppkey = lineitem.l_suppkey + AND _s7.ps_partkey = lineitem.l_partkey + AND _s7.ps_suppkey = lineitem.l_suppkey + AND lineitem.l_partkey = part.p_partkey +ORDER BY + 1 NULLS FIRST, + 2 NULLS FIRST +LIMIT 7 diff --git a/tests/test_sql_refsols/tpch_q10_trino.sql b/tests/test_sql_refsols/tpch_q10_trino.sql new file mode 100644 index 000000000..bf4bfb2d1 --- /dev/null +++ b/tests/test_sql_refsols/tpch_q10_trino.sql @@ -0,0 +1,33 @@ +WITH _s3 AS ( + SELECT + orders.o_custkey, + SUM(lineitem.l_extendedprice * ( + 1 - lineitem.l_discount + )) AS sum_expr + FROM tpch.orders AS orders + JOIN tpch.lineitem AS lineitem + ON lineitem.l_orderkey = orders.o_orderkey AND lineitem.l_returnflag = 'R' + WHERE + MONTH(CAST(orders.o_orderdate AS TIMESTAMP)) IN (10, 11, 12) + AND YEAR(CAST(orders.o_orderdate AS TIMESTAMP)) = 1993 + GROUP BY + 1 +) +SELECT + customer.c_custkey AS C_CUSTKEY, + customer.c_name AS C_NAME, + COALESCE(_s3.sum_expr, 0) AS REVENUE, + customer.c_acctbal AS C_ACCTBAL, + nation.n_name AS N_NAME, + customer.c_address AS C_ADDRESS, + customer.c_phone AS C_PHONE, + customer.c_comment AS C_COMMENT +FROM tpch.customer AS customer +LEFT JOIN _s3 AS _s3 + ON _s3.o_custkey = customer.c_custkey +JOIN tpch.nation AS nation + ON customer.c_nationkey = nation.n_nationkey +ORDER BY + 3 DESC, + 1 NULLS FIRST +LIMIT 20 diff --git a/tests/test_sql_refsols/tpch_q11_trino.sql b/tests/test_sql_refsols/tpch_q11_trino.sql new file mode 100644 index 000000000..c4a081cb0 --- /dev/null +++ b/tests/test_sql_refsols/tpch_q11_trino.sql @@ -0,0 +1,43 @@ +WITH _s0 AS ( + SELECT + s_nationkey, + s_suppkey + FROM tpch.supplier +), _t2 AS ( + SELECT + n_name, + n_nationkey + FROM tpch.nation + WHERE + n_name = 'GERMANY' +), _s8 AS ( + SELECT + SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS sum_metric + FROM tpch.partsupp AS partsupp + JOIN _s0 AS _s0 + ON _s0.s_suppkey = partsupp.ps_suppkey + JOIN _t2 AS _t2 + ON _s0.s_nationkey = _t2.n_nationkey +), _s9 AS ( + SELECT + partsupp.ps_partkey, + SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS sum_expr + FROM tpch.partsupp AS partsupp + JOIN _s0 AS _s4 + ON _s4.s_suppkey = partsupp.ps_suppkey + JOIN _t2 AS _t4 + ON _s4.s_nationkey = _t4.n_nationkey + GROUP BY + 1 +) +SELECT + _s9.ps_partkey AS PS_PARTKEY, + COALESCE(_s9.sum_expr, 0) AS VALUE +FROM _s8 AS _s8 +JOIN _s9 AS _s9 + ON ( + COALESCE(_s8.sum_metric, 0) * 0.0001 + ) < COALESCE(_s9.sum_expr, 0) +ORDER BY + 2 DESC +LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q12_trino.sql b/tests/test_sql_refsols/tpch_q12_trino.sql new file mode 100644 index 000000000..032b4c5a6 --- /dev/null +++ b/tests/test_sql_refsols/tpch_q12_trino.sql @@ -0,0 +1,18 @@ +SELECT + lineitem.l_shipmode AS L_SHIPMODE, + COALESCE(SUM(orders.o_orderpriority IN ('1-URGENT', '2-HIGH')), 0) AS HIGH_LINE_COUNT, + COALESCE(SUM(NOT orders.o_orderpriority IN ('1-URGENT', '2-HIGH')), 0) AS LOW_LINE_COUNT +FROM tpch.lineitem AS lineitem +JOIN tpch.orders AS orders + ON lineitem.l_orderkey = orders.o_orderkey +WHERE + YEAR(CAST(lineitem.l_receiptdate AS TIMESTAMP)) = 1994 + AND lineitem.l_commitdate < lineitem.l_receiptdate + AND lineitem.l_commitdate > lineitem.l_shipdate + AND ( + lineitem.l_shipmode = 'MAIL' OR lineitem.l_shipmode = 'SHIP' + ) +GROUP BY + 1 +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/tpch_q13_trino.sql b/tests/test_sql_refsols/tpch_q13_trino.sql new file mode 100644 index 000000000..c8fce7bb7 --- /dev/null +++ b/tests/test_sql_refsols/tpch_q13_trino.sql @@ -0,0 +1,22 @@ +WITH _s1 AS ( + SELECT + o_custkey, + COUNT(*) AS n_rows + FROM tpch.orders + WHERE + NOT o_comment LIKE '%special%requests%' + GROUP BY + 1 +) +SELECT + COALESCE(_s1.n_rows, 0) AS C_COUNT, + COUNT(*) AS CUSTDIST +FROM tpch.customer AS customer +LEFT JOIN _s1 AS _s1 + ON _s1.o_custkey = customer.c_custkey +GROUP BY + 1 +ORDER BY + 2 DESC, + 1 DESC +LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q14_trino.sql b/tests/test_sql_refsols/tpch_q14_trino.sql new file mode 100644 index 000000000..f4d736b74 --- /dev/null +++ b/tests/test_sql_refsols/tpch_q14_trino.sql @@ -0,0 +1,23 @@ +SELECT + ( + 100.0 * COALESCE( + SUM( + IF( + STARTS_WITH(part.p_type, 'PROMO'), + lineitem.l_extendedprice * ( + 1 - lineitem.l_discount + ), + 0 + ) + ), + 0 + ) + ) / COALESCE(SUM(lineitem.l_extendedprice * ( + 1 - lineitem.l_discount + )), 0) AS PROMO_REVENUE +FROM tpch.lineitem AS lineitem +JOIN tpch.part AS part + ON lineitem.l_partkey = part.p_partkey +WHERE + MONTH(CAST(lineitem.l_shipdate AS TIMESTAMP)) = 9 + AND YEAR(CAST(lineitem.l_shipdate AS TIMESTAMP)) = 1995 diff --git a/tests/test_sql_refsols/tpch_q15_trino.sql b/tests/test_sql_refsols/tpch_q15_trino.sql new file mode 100644 index 000000000..5481d24fd --- /dev/null +++ b/tests/test_sql_refsols/tpch_q15_trino.sql @@ -0,0 +1,45 @@ +WITH _t3 AS ( + SELECT + l_discount, + l_extendedprice, + l_shipdate, + l_suppkey + FROM tpch.lineitem + WHERE + l_shipdate < CAST('1996-04-01' AS DATE) + AND l_shipdate >= CAST('1996-01-01' AS DATE) +), _t1 AS ( + SELECT + SUM(l_extendedprice * ( + 1 - l_discount + )) AS sum_expr + FROM _t3 + GROUP BY + l_suppkey +), _s0 AS ( + SELECT + MAX(COALESCE(sum_expr, 0)) AS max_total_revenue + FROM _t1 +), _s3 AS ( + SELECT + l_suppkey, + SUM(l_extendedprice * ( + 1 - l_discount + )) AS sum_expr + FROM _t3 + GROUP BY + 1 +) +SELECT + supplier.s_suppkey AS S_SUPPKEY, + supplier.s_name AS S_NAME, + supplier.s_address AS S_ADDRESS, + supplier.s_phone AS S_PHONE, + COALESCE(_s3.sum_expr, 0) AS TOTAL_REVENUE +FROM _s0 AS _s0 +CROSS JOIN tpch.supplier AS supplier +JOIN _s3 AS _s3 + ON _s0.max_total_revenue = COALESCE(_s3.sum_expr, 0) + AND _s3.l_suppkey = supplier.s_suppkey +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/tpch_q16_trino.sql b/tests/test_sql_refsols/tpch_q16_trino.sql new file mode 100644 index 000000000..4b87594c7 --- /dev/null +++ b/tests/test_sql_refsols/tpch_q16_trino.sql @@ -0,0 +1,24 @@ +SELECT + part.p_brand AS P_BRAND, + part.p_type AS P_TYPE, + part.p_size AS P_SIZE, + COUNT(DISTINCT partsupp.ps_suppkey) AS SUPPLIER_COUNT +FROM tpch.partsupp AS partsupp +JOIN tpch.supplier AS supplier + ON NOT supplier.s_comment LIKE '%Customer%Complaints%' + AND partsupp.ps_suppkey = supplier.s_suppkey +JOIN tpch.part AS part + ON NOT STARTS_WITH(part.p_type, 'MEDIUM POLISHED%') + AND part.p_brand <> 'BRAND#45' + AND part.p_partkey = partsupp.ps_partkey + AND part.p_size IN (49, 14, 23, 45, 19, 3, 36, 9) +GROUP BY + 1, + 2, + 3 +ORDER BY + 4 DESC, + 1 NULLS FIRST, + 2 NULLS FIRST, + 3 NULLS FIRST +LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q17_trino.sql b/tests/test_sql_refsols/tpch_q17_trino.sql new file mode 100644 index 000000000..c2eb98159 --- /dev/null +++ b/tests/test_sql_refsols/tpch_q17_trino.sql @@ -0,0 +1,18 @@ +WITH _t AS ( + SELECT + lineitem.l_extendedprice, + lineitem.l_quantity, + AVG(CAST(lineitem.l_quantity AS DOUBLE)) OVER (PARTITION BY lineitem.l_partkey) AS _w + FROM tpch.part AS part + JOIN tpch.lineitem AS lineitem + ON lineitem.l_partkey = part.p_partkey + WHERE + part.p_brand = 'Brand#23' AND part.p_container = 'MED BOX' +) +SELECT + CAST(COALESCE(SUM(l_extendedprice), 0) AS DOUBLE) / 7.0 AS AVG_YEARLY +FROM _t +WHERE + l_quantity < ( + 0.2 * _w + ) diff --git a/tests/test_sql_refsols/tpch_q18_trino.sql b/tests/test_sql_refsols/tpch_q18_trino.sql new file mode 100644 index 000000000..e5c458291 --- /dev/null +++ b/tests/test_sql_refsols/tpch_q18_trino.sql @@ -0,0 +1,26 @@ +WITH _t1 AS ( + SELECT + l_orderkey, + SUM(l_quantity) AS sum_l_quantity + FROM tpch.lineitem + GROUP BY + 1 +) +SELECT + customer.c_name AS C_NAME, + customer.c_custkey AS C_CUSTKEY, + orders.o_orderkey AS O_ORDERKEY, + orders.o_orderdate AS O_ORDERDATE, + orders.o_totalprice AS O_TOTALPRICE, + _t1.sum_l_quantity AS TOTAL_QUANTITY +FROM tpch.orders AS orders +JOIN tpch.customer AS customer + ON customer.c_custkey = orders.o_custkey +JOIN _t1 AS _t1 + ON NOT _t1.sum_l_quantity IS NULL + AND _t1.l_orderkey = orders.o_orderkey + AND _t1.sum_l_quantity > 300 +ORDER BY + 5 DESC, + 4 NULLS FIRST +LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q19_trino.sql b/tests/test_sql_refsols/tpch_q19_trino.sql new file mode 100644 index 000000000..3ea806f1f --- /dev/null +++ b/tests/test_sql_refsols/tpch_q19_trino.sql @@ -0,0 +1,36 @@ +SELECT + COALESCE(SUM(lineitem.l_extendedprice * ( + 1 - lineitem.l_discount + )), 0) AS REVENUE +FROM tpch.lineitem AS lineitem +JOIN tpch.part AS part + ON ( + ( + lineitem.l_quantity <= 11 + AND lineitem.l_quantity >= 1 + AND part.p_brand = 'Brand#12' + AND part.p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') + AND part.p_size <= 5 + AND part.p_size >= 1 + ) + OR ( + lineitem.l_quantity <= 20 + AND lineitem.l_quantity >= 10 + AND part.p_brand = 'Brand#23' + AND part.p_container IN ('MED BAG', 'MED BOX', 'MED PACK', 'MED PKG') + AND part.p_size <= 10 + AND part.p_size >= 1 + ) + OR ( + lineitem.l_quantity <= 30 + AND lineitem.l_quantity >= 20 + AND part.p_brand = 'Brand#34' + AND part.p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') + AND part.p_size <= 15 + AND part.p_size >= 1 + ) + ) + AND lineitem.l_partkey = part.p_partkey +WHERE + lineitem.l_shipinstruct = 'DELIVER IN PERSON' + AND lineitem.l_shipmode IN ('AIR', 'AIR REG') diff --git a/tests/test_sql_refsols/tpch_q1_trino.sql b/tests/test_sql_refsols/tpch_q1_trino.sql new file mode 100644 index 000000000..a060e085e --- /dev/null +++ b/tests/test_sql_refsols/tpch_q1_trino.sql @@ -0,0 +1,26 @@ +SELECT + l_returnflag AS L_RETURNFLAG, + l_linestatus AS L_LINESTATUS, + COALESCE(SUM(l_quantity), 0) AS SUM_QTY, + COALESCE(SUM(l_extendedprice), 0) AS SUM_BASE_PRICE, + COALESCE(SUM(l_extendedprice * ( + 1 - l_discount + )), 0) AS SUM_DISC_PRICE, + COALESCE(SUM(l_extendedprice * ( + 1 - l_discount + ) * ( + 1 + l_tax + )), 0) AS SUM_CHARGE, + AVG(l_quantity) AS AVG_QTY, + AVG(l_extendedprice) AS AVG_PRICE, + AVG(l_discount) AS AVG_DISC, + COUNT(*) AS COUNT_ORDER +FROM tpch.lineitem +WHERE + l_shipdate <= CAST('1998-12-01' AS DATE) +GROUP BY + 1, + 2 +ORDER BY + 1 NULLS FIRST, + 2 NULLS FIRST diff --git a/tests/test_sql_refsols/tpch_q20_trino.sql b/tests/test_sql_refsols/tpch_q20_trino.sql new file mode 100644 index 000000000..2576c4a65 --- /dev/null +++ b/tests/test_sql_refsols/tpch_q20_trino.sql @@ -0,0 +1,31 @@ +WITH _s5 AS ( + SELECT + lineitem.l_partkey, + SUM(lineitem.l_quantity) AS sum_l_quantity + FROM tpch.part AS part + JOIN tpch.lineitem AS lineitem + ON YEAR(CAST(lineitem.l_shipdate AS TIMESTAMP)) = 1994 + AND lineitem.l_partkey = part.p_partkey + WHERE + STARTS_WITH(part.p_name, 'forest') + GROUP BY + 1 +) +SELECT + ARBITRARY(supplier.s_name) AS S_NAME, + ARBITRARY(supplier.s_address) AS S_ADDRESS +FROM tpch.supplier AS supplier +JOIN tpch.nation AS nation + ON nation.n_name = 'CANADA' AND nation.n_nationkey = supplier.s_nationkey +JOIN tpch.partsupp AS partsupp + ON partsupp.ps_suppkey = supplier.s_suppkey +JOIN _s5 AS _s5 + ON _s5.l_partkey = partsupp.ps_partkey + AND partsupp.ps_availqty > ( + 0.5 * COALESCE(_s5.sum_l_quantity, 0) + ) +GROUP BY + partsupp.ps_suppkey +ORDER BY + 1 NULLS FIRST +LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q21_trino.sql b/tests/test_sql_refsols/tpch_q21_trino.sql new file mode 100644 index 000000000..2cb5fda72 --- /dev/null +++ b/tests/test_sql_refsols/tpch_q21_trino.sql @@ -0,0 +1,63 @@ +WITH _t5 AS ( + SELECT + l_commitdate, + l_linenumber, + l_orderkey, + l_receiptdate, + l_suppkey + FROM tpch.lineitem + WHERE + l_commitdate < l_receiptdate +), _t3 AS ( + SELECT + _t5.l_linenumber, + _t5.l_orderkey, + orders.o_orderkey, + ARBITRARY(_t5.l_suppkey) AS anything_l_suppkey, + ARBITRARY(orders.o_orderstatus) AS anything_o_orderstatus + FROM _t5 AS _t5 + JOIN tpch.orders AS orders + ON _t5.l_orderkey = orders.o_orderkey + JOIN tpch.lineitem AS lineitem + ON _t5.l_suppkey <> lineitem.l_suppkey AND lineitem.l_orderkey = orders.o_orderkey + GROUP BY + 1, + 2, + 3 +), _u_0 AS ( + SELECT + _t6.l_linenumber AS _u_1, + _t6.l_orderkey AS _u_2 + FROM _t5 AS _t6 + JOIN tpch.lineitem AS lineitem + ON _t6.l_orderkey = lineitem.l_orderkey + AND _t6.l_suppkey <> lineitem.l_suppkey + AND lineitem.l_commitdate < lineitem.l_receiptdate + GROUP BY + 1, + 2 +), _s11 AS ( + SELECT + _t3.anything_l_suppkey + FROM _t3 AS _t3 + LEFT JOIN _u_0 AS _u_0 + ON _t3.l_linenumber = _u_0._u_1 + AND _t3.l_orderkey = _u_0._u_2 + AND _t3.o_orderkey = _u_0._u_2 + WHERE + _t3.anything_o_orderstatus = 'F' AND _u_0._u_1 IS NULL +) +SELECT + ARBITRARY(supplier.s_name) AS S_NAME, + COUNT(_s11.anything_l_suppkey) AS NUMWAIT +FROM tpch.supplier AS supplier +JOIN tpch.nation AS nation + ON nation.n_name = 'SAUDI ARABIA' AND nation.n_nationkey = supplier.s_nationkey +LEFT JOIN _s11 AS _s11 + ON _s11.anything_l_suppkey = supplier.s_suppkey +GROUP BY + supplier.s_suppkey +ORDER BY + 2 DESC, + 1 NULLS FIRST +LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q22_trino.sql b/tests/test_sql_refsols/tpch_q22_trino.sql new file mode 100644 index 000000000..8f8901a3c --- /dev/null +++ b/tests/test_sql_refsols/tpch_q22_trino.sql @@ -0,0 +1,30 @@ +WITH _s0 AS ( + SELECT + AVG(c_acctbal) AS avg_c_acctbal + FROM tpch.customer + WHERE + SUBSTRING(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17') + AND c_acctbal > 0.0 +), _u_0 AS ( + SELECT + o_custkey AS _u_1 + FROM tpch.orders + GROUP BY + 1 +) +SELECT + SUBSTRING(customer.c_phone, 1, 2) AS CNTRY_CODE, + COUNT(*) AS NUM_CUSTS, + COALESCE(SUM(customer.c_acctbal), 0) AS TOTACCTBAL +FROM _s0 AS _s0 +JOIN tpch.customer AS customer + ON SUBSTRING(customer.c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17') + AND _s0.avg_c_acctbal < customer.c_acctbal +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = customer.c_custkey +WHERE + _u_0._u_1 IS NULL +GROUP BY + 1 +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/tpch_q2_trino.sql b/tests/test_sql_refsols/tpch_q2_trino.sql new file mode 100644 index 000000000..83c0d291c --- /dev/null +++ b/tests/test_sql_refsols/tpch_q2_trino.sql @@ -0,0 +1,41 @@ +WITH _t AS ( + SELECT + nation.n_name, + part.p_mfgr, + part.p_partkey, + supplier.s_acctbal, + supplier.s_address, + supplier.s_comment, + supplier.s_name, + supplier.s_phone, + RANK() OVER (PARTITION BY partsupp.ps_partkey ORDER BY partsupp.ps_supplycost) AS _w + FROM tpch.part AS part + JOIN tpch.partsupp AS partsupp + ON part.p_partkey = partsupp.ps_partkey + JOIN tpch.supplier AS supplier + ON partsupp.ps_suppkey = supplier.s_suppkey + JOIN tpch.nation AS nation + ON nation.n_nationkey = supplier.s_nationkey + JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'EUROPE' + WHERE + part.p_size = 15 AND part.p_type LIKE '%BRASS' +) +SELECT + s_acctbal AS S_ACCTBAL, + s_name AS S_NAME, + n_name AS N_NAME, + p_partkey AS P_PARTKEY, + p_mfgr AS P_MFGR, + s_address AS S_ADDRESS, + s_phone AS S_PHONE, + s_comment AS S_COMMENT +FROM _t +WHERE + _w = 1 +ORDER BY + 1 DESC, + 3 NULLS FIRST, + 2 NULLS FIRST, + 4 NULLS FIRST +LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q3_trino.sql b/tests/test_sql_refsols/tpch_q3_trino.sql new file mode 100644 index 000000000..a866fbfcf --- /dev/null +++ b/tests/test_sql_refsols/tpch_q3_trino.sql @@ -0,0 +1,24 @@ +SELECT + lineitem.l_orderkey AS L_ORDERKEY, + COALESCE(SUM(lineitem.l_extendedprice * ( + 1 - lineitem.l_discount + )), 0) AS REVENUE, + orders.o_orderdate AS O_ORDERDATE, + orders.o_shippriority AS O_SHIPPRIORITY +FROM tpch.orders AS orders +JOIN tpch.customer AS customer + ON customer.c_custkey = orders.o_custkey AND customer.c_mktsegment = 'BUILDING' +JOIN tpch.lineitem AS lineitem + ON lineitem.l_orderkey = orders.o_orderkey + AND lineitem.l_shipdate > CAST('1995-03-15' AS DATE) +WHERE + orders.o_orderdate < CAST('1995-03-15' AS DATE) +GROUP BY + 1, + 3, + 4 +ORDER BY + 2 DESC, + 3 NULLS FIRST, + 1 NULLS FIRST +LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q4_trino.sql b/tests/test_sql_refsols/tpch_q4_trino.sql new file mode 100644 index 000000000..47b4d8723 --- /dev/null +++ b/tests/test_sql_refsols/tpch_q4_trino.sql @@ -0,0 +1,23 @@ +WITH _u_0 AS ( + SELECT + l_orderkey AS _u_1 + FROM tpch.lineitem + WHERE + l_commitdate < l_receiptdate + GROUP BY + 1 +) +SELECT + orders.o_orderpriority AS O_ORDERPRIORITY, + COUNT(*) AS ORDER_COUNT +FROM tpch.orders AS orders +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = orders.o_orderkey +WHERE + MONTH(CAST(orders.o_orderdate AS TIMESTAMP)) IN (7, 8, 9) + AND NOT _u_0._u_1 IS NULL + AND YEAR(CAST(orders.o_orderdate AS TIMESTAMP)) = 1993 +GROUP BY + 1 +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/tpch_q5_trino.sql b/tests/test_sql_refsols/tpch_q5_trino.sql new file mode 100644 index 000000000..2b0a5df40 --- /dev/null +++ b/tests/test_sql_refsols/tpch_q5_trino.sql @@ -0,0 +1,30 @@ +WITH _s11 AS ( + SELECT + nation.n_name, + supplier.s_suppkey + FROM tpch.supplier AS supplier + JOIN tpch.nation AS nation + ON nation.n_nationkey = supplier.s_nationkey +) +SELECT + ARBITRARY(nation.n_name) AS N_NAME, + COALESCE(SUM(lineitem.l_extendedprice * ( + 1 - lineitem.l_discount + )), 0) AS REVENUE +FROM tpch.nation AS nation +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'ASIA' +JOIN tpch.customer AS customer + ON customer.c_nationkey = nation.n_nationkey +JOIN tpch.orders AS orders + ON customer.c_custkey = orders.o_custkey + AND orders.o_orderdate < CAST('1995-01-01' AS DATE) + AND orders.o_orderdate >= CAST('1994-01-01' AS DATE) +JOIN tpch.lineitem AS lineitem + ON lineitem.l_orderkey = orders.o_orderkey +JOIN _s11 AS _s11 + ON _s11.n_name = nation.n_name AND _s11.s_suppkey = lineitem.l_suppkey +GROUP BY + nation.n_nationkey +ORDER BY + 2 DESC diff --git a/tests/test_sql_refsols/tpch_q6_trino.sql b/tests/test_sql_refsols/tpch_q6_trino.sql new file mode 100644 index 000000000..93d3c085b --- /dev/null +++ b/tests/test_sql_refsols/tpch_q6_trino.sql @@ -0,0 +1,9 @@ +SELECT + COALESCE(SUM(l_extendedprice * l_discount), 0) AS REVENUE +FROM tpch.lineitem +WHERE + l_discount <= 0.07 + AND l_discount >= 0.05 + AND l_quantity < 24 + AND l_shipdate < CAST('1995-01-01' AS DATE) + AND l_shipdate >= CAST('1994-01-01' AS DATE) diff --git a/tests/test_sql_refsols/tpch_q7_trino.sql b/tests/test_sql_refsols/tpch_q7_trino.sql new file mode 100644 index 000000000..439889188 --- /dev/null +++ b/tests/test_sql_refsols/tpch_q7_trino.sql @@ -0,0 +1,46 @@ +WITH _s9 AS ( + SELECT + nation.n_name, + orders.o_orderkey + FROM tpch.orders AS orders + JOIN tpch.customer AS customer + ON customer.c_custkey = orders.o_custkey + JOIN tpch.nation AS nation + ON customer.c_nationkey = nation.n_nationkey + AND ( + nation.n_name = 'FRANCE' OR nation.n_name = 'GERMANY' + ) +) +SELECT + nation.n_name AS SUPP_NATION, + _s9.n_name AS CUST_NATION, + YEAR(CAST(lineitem.l_shipdate AS TIMESTAMP)) AS L_YEAR, + COALESCE(SUM(lineitem.l_extendedprice * ( + 1 - lineitem.l_discount + )), 0) AS REVENUE +FROM tpch.lineitem AS lineitem +JOIN tpch.supplier AS supplier + ON lineitem.l_suppkey = supplier.s_suppkey +JOIN tpch.nation AS nation + ON nation.n_nationkey = supplier.s_nationkey +JOIN _s9 AS _s9 + ON ( + _s9.n_name = 'FRANCE' OR nation.n_name = 'FRANCE' + ) + AND ( + _s9.n_name = 'GERMANY' OR nation.n_name = 'GERMANY' + ) + AND _s9.o_orderkey = lineitem.l_orderkey + AND ( + nation.n_name = 'FRANCE' OR nation.n_name = 'GERMANY' + ) +WHERE + YEAR(CAST(lineitem.l_shipdate AS TIMESTAMP)) IN (1995, 1996) +GROUP BY + 1, + 2, + 3 +ORDER BY + 1 NULLS FIRST, + 2 NULLS FIRST, + 3 NULLS FIRST diff --git a/tests/test_sql_refsols/tpch_q8_trino.sql b/tests/test_sql_refsols/tpch_q8_trino.sql new file mode 100644 index 000000000..45e9515b0 --- /dev/null +++ b/tests/test_sql_refsols/tpch_q8_trino.sql @@ -0,0 +1,34 @@ +SELECT + YEAR(CAST(orders.o_orderdate AS TIMESTAMP)) AS O_YEAR, + CAST(COALESCE( + SUM( + IF( + nation_2.n_name = 'BRAZIL', + lineitem.l_extendedprice * ( + 1 - lineitem.l_discount + ), + 0 + ) + ), + 0 + ) AS DOUBLE) / COALESCE(SUM(lineitem.l_extendedprice * ( + 1 - lineitem.l_discount + )), 0) AS MKT_SHARE +FROM tpch.lineitem AS lineitem +JOIN tpch.part AS part + ON lineitem.l_partkey = part.p_partkey AND part.p_type = 'ECONOMY ANODIZED STEEL' +JOIN tpch.orders AS orders + ON YEAR(CAST(orders.o_orderdate AS TIMESTAMP)) IN (1995, 1996) + AND lineitem.l_orderkey = orders.o_orderkey +JOIN tpch.customer AS customer + ON customer.c_custkey = orders.o_custkey +JOIN tpch.nation AS nation + ON customer.c_nationkey = nation.n_nationkey +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'AMERICA' +JOIN tpch.supplier AS supplier + ON lineitem.l_suppkey = supplier.s_suppkey +JOIN tpch.nation AS nation_2 + ON nation_2.n_nationkey = supplier.s_nationkey +GROUP BY + 1 diff --git a/tests/test_sql_refsols/tpch_q9_trino.sql b/tests/test_sql_refsols/tpch_q9_trino.sql new file mode 100644 index 000000000..d71067dc9 --- /dev/null +++ b/tests/test_sql_refsols/tpch_q9_trino.sql @@ -0,0 +1,30 @@ +SELECT + nation.n_name AS NATION, + YEAR(CAST(orders.o_orderdate AS TIMESTAMP)) AS O_YEAR, + COALESCE( + SUM( + lineitem.l_extendedprice * ( + 1 - lineitem.l_discount + ) - partsupp.ps_supplycost * lineitem.l_quantity + ), + 0 + ) AS AMOUNT +FROM tpch.lineitem AS lineitem +JOIN tpch.part AS part + ON lineitem.l_partkey = part.p_partkey AND part.p_name LIKE '%green%' +JOIN tpch.supplier AS supplier + ON lineitem.l_suppkey = supplier.s_suppkey +JOIN tpch.nation AS nation + ON nation.n_nationkey = supplier.s_nationkey +JOIN tpch.orders AS orders + ON lineitem.l_orderkey = orders.o_orderkey +JOIN tpch.partsupp AS partsupp + ON lineitem.l_partkey = partsupp.ps_partkey + AND lineitem.l_suppkey = partsupp.ps_suppkey +GROUP BY + 1, + 2 +ORDER BY + 1 NULLS FIRST, + 2 DESC +LIMIT 10 diff --git a/tests/test_sql_refsols/triple_partition_trino.sql b/tests/test_sql_refsols/triple_partition_trino.sql new file mode 100644 index 000000000..37edb25c0 --- /dev/null +++ b/tests/test_sql_refsols/triple_partition_trino.sql @@ -0,0 +1,72 @@ +WITH _s3 AS ( + SELECT + n_nationkey, + n_regionkey + FROM tpch.nation +), _s5 AS ( + SELECT + r_name, + r_regionkey + FROM tpch.region +), _s14 AS ( + SELECT + orders.o_custkey, + part.p_type, + _s5.r_name, + COUNT(*) AS n_rows + FROM tpch.part AS part + JOIN tpch.lineitem AS lineitem + ON MONTH(CAST(lineitem.l_shipdate AS TIMESTAMP)) = 6 + AND YEAR(CAST(lineitem.l_shipdate AS TIMESTAMP)) = 1992 + AND lineitem.l_partkey = part.p_partkey + JOIN tpch.supplier AS supplier + ON lineitem.l_suppkey = supplier.s_suppkey + JOIN _s3 AS _s3 + ON _s3.n_nationkey = supplier.s_nationkey + JOIN _s5 AS _s5 + ON _s3.n_regionkey = _s5.r_regionkey + JOIN tpch.orders AS orders + ON YEAR(CAST(orders.o_orderdate AS TIMESTAMP)) = 1992 + AND lineitem.l_orderkey = orders.o_orderkey + WHERE + STARTS_WITH(part.p_container, 'SM') + GROUP BY + 1, + 2, + 3 +), _t2 AS ( + SELECT + _s13.r_name AS cust_region, + _s14.r_name, + SUM(_s14.n_rows) AS sum_n_rows + FROM _s14 AS _s14 + JOIN tpch.customer AS customer + ON _s14.o_custkey = customer.c_custkey + JOIN _s3 AS _s11 + ON _s11.n_nationkey = customer.c_nationkey + JOIN _s5 AS _s13 + ON _s11.n_regionkey = _s13.r_regionkey + GROUP BY + _s14.p_type, + 1, + 2 +), _t1 AS ( + SELECT + r_name, + MAX(sum_n_rows) AS max_sum_n_rows, + SUM(sum_n_rows) AS sum_sum_n_rows + FROM _t2 + GROUP BY + cust_region, + 1 +) +SELECT + r_name AS region, + AVG(( + 100.0 * max_sum_n_rows + ) / sum_sum_n_rows) AS avgpct +FROM _t1 +GROUP BY + 1 +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/unary_and_slicing_operators_trino.sql b/tests/test_sql_refsols/unary_and_slicing_operators_trino.sql new file mode 100644 index 000000000..b80ccca12 --- /dev/null +++ b/tests/test_sql_refsols/unary_and_slicing_operators_trino.sql @@ -0,0 +1,64 @@ +SELECT + SUBSTRING(c_phone, 1, 3) AS country_code, + SUBSTRING(c_name, 2) AS name_without_first_char, + SUBSTRING( + c_phone, + CASE WHEN ( + LENGTH(c_phone) + 0 + ) < 1 THEN 1 ELSE ( + LENGTH(c_phone) + 0 + ) END + ) AS last_digit, + SUBSTRING( + c_name, + 2, + CASE + WHEN ( + LENGTH(c_name) + 0 + ) < 1 + THEN 0 + ELSE CASE + WHEN ( + ( + LENGTH(c_name) + 0 + ) - 2 + ) <= 0 + THEN 0 + ELSE ( + LENGTH(c_name) + 0 + ) - 2 + END + END + ) AS name_without_start_and_end_char, + SUBSTRING( + c_phone, + 1, + CASE WHEN ( + LENGTH(c_phone) + -5 + ) < 0 THEN 0 ELSE ( + LENGTH(c_phone) + -5 + ) END + ) AS phone_without_last_5_chars, + SUBSTRING( + c_name, + CASE WHEN ( + LENGTH(c_name) + -1 + ) < 1 THEN 1 ELSE ( + LENGTH(c_name) + -1 + ) END, + CASE + WHEN ( + LENGTH(c_name) + 0 + ) < 1 + THEN 0 + ELSE ( + LENGTH(c_name) + 0 + ) - CASE WHEN ( + LENGTH(c_name) + -1 + ) < 1 THEN 1 ELSE ( + LENGTH(c_name) + -1 + ) END + END + ) AS name_second_to_last_char, + c_acctbal >= 0 AS is_not_in_debt +FROM tpch.customer diff --git a/tests/test_sql_refsols/user_range_collection_1_trino.sql b/tests/test_sql_refsols/user_range_collection_1_trino.sql new file mode 100644 index 000000000..03cb0961f --- /dev/null +++ b/tests/test_sql_refsols/user_range_collection_1_trino.sql @@ -0,0 +1,36 @@ +WITH _s1 AS ( + SELECT + p_size, + COUNT(*) AS n_rows + FROM tpch.part + WHERE + p_name LIKE '%turquoise%' + GROUP BY + 1 +) +SELECT + sizes.part_size, + COALESCE(_s1.n_rows, 0) AS n_parts +FROM (VALUES + (1), + (6), + (11), + (16), + (21), + (26), + (31), + (36), + (41), + (46), + (51), + (56), + (61), + (66), + (71), + (76), + (81), + (86), + (91), + (96)) AS sizes(part_size) +LEFT JOIN _s1 AS _s1 + ON _s1.p_size = sizes.part_size diff --git a/tests/test_sql_refsols/user_range_collection_2_trino.sql b/tests/test_sql_refsols/user_range_collection_2_trino.sql new file mode 100644 index 000000000..31ae7b97a --- /dev/null +++ b/tests/test_sql_refsols/user_range_collection_2_trino.sql @@ -0,0 +1,544 @@ +WITH _s3 AS ( + SELECT + a_2.x, + SUM(CAST(b.y AS VARCHAR) LIKE ( + CONCAT('%', CAST(a_2.x AS VARCHAR)) + )) AS sum_expr, + SUM(STARTS_WITH(CAST(b.y AS VARCHAR), CAST(a_2.x AS VARCHAR))) AS sum_expr_5 + FROM (VALUES + (0), + (1), + (2), + (3), + (4), + (5), + (6), + (7), + (8), + (9)) AS a_2(x) + JOIN (VALUES + (0), + (2), + (4), + (6), + (8), + (10), + (12), + (14), + (16), + (18), + (20), + (22), + (24), + (26), + (28), + (30), + (32), + (34), + (36), + (38), + (40), + (42), + (44), + (46), + (48), + (50), + (52), + (54), + (56), + (58), + (60), + (62), + (64), + (66), + (68), + (70), + (72), + (74), + (76), + (78), + (80), + (82), + (84), + (86), + (88), + (90), + (92), + (94), + (96), + (98), + (100), + (102), + (104), + (106), + (108), + (110), + (112), + (114), + (116), + (118), + (120), + (122), + (124), + (126), + (128), + (130), + (132), + (134), + (136), + (138), + (140), + (142), + (144), + (146), + (148), + (150), + (152), + (154), + (156), + (158), + (160), + (162), + (164), + (166), + (168), + (170), + (172), + (174), + (176), + (178), + (180), + (182), + (184), + (186), + (188), + (190), + (192), + (194), + (196), + (198), + (200), + (202), + (204), + (206), + (208), + (210), + (212), + (214), + (216), + (218), + (220), + (222), + (224), + (226), + (228), + (230), + (232), + (234), + (236), + (238), + (240), + (242), + (244), + (246), + (248), + (250), + (252), + (254), + (256), + (258), + (260), + (262), + (264), + (266), + (268), + (270), + (272), + (274), + (276), + (278), + (280), + (282), + (284), + (286), + (288), + (290), + (292), + (294), + (296), + (298), + (300), + (302), + (304), + (306), + (308), + (310), + (312), + (314), + (316), + (318), + (320), + (322), + (324), + (326), + (328), + (330), + (332), + (334), + (336), + (338), + (340), + (342), + (344), + (346), + (348), + (350), + (352), + (354), + (356), + (358), + (360), + (362), + (364), + (366), + (368), + (370), + (372), + (374), + (376), + (378), + (380), + (382), + (384), + (386), + (388), + (390), + (392), + (394), + (396), + (398), + (400), + (402), + (404), + (406), + (408), + (410), + (412), + (414), + (416), + (418), + (420), + (422), + (424), + (426), + (428), + (430), + (432), + (434), + (436), + (438), + (440), + (442), + (444), + (446), + (448), + (450), + (452), + (454), + (456), + (458), + (460), + (462), + (464), + (466), + (468), + (470), + (472), + (474), + (476), + (478), + (480), + (482), + (484), + (486), + (488), + (490), + (492), + (494), + (496), + (498), + (500), + (502), + (504), + (506), + (508), + (510), + (512), + (514), + (516), + (518), + (520), + (522), + (524), + (526), + (528), + (530), + (532), + (534), + (536), + (538), + (540), + (542), + (544), + (546), + (548), + (550), + (552), + (554), + (556), + (558), + (560), + (562), + (564), + (566), + (568), + (570), + (572), + (574), + (576), + (578), + (580), + (582), + (584), + (586), + (588), + (590), + (592), + (594), + (596), + (598), + (600), + (602), + (604), + (606), + (608), + (610), + (612), + (614), + (616), + (618), + (620), + (622), + (624), + (626), + (628), + (630), + (632), + (634), + (636), + (638), + (640), + (642), + (644), + (646), + (648), + (650), + (652), + (654), + (656), + (658), + (660), + (662), + (664), + (666), + (668), + (670), + (672), + (674), + (676), + (678), + (680), + (682), + (684), + (686), + (688), + (690), + (692), + (694), + (696), + (698), + (700), + (702), + (704), + (706), + (708), + (710), + (712), + (714), + (716), + (718), + (720), + (722), + (724), + (726), + (728), + (730), + (732), + (734), + (736), + (738), + (740), + (742), + (744), + (746), + (748), + (750), + (752), + (754), + (756), + (758), + (760), + (762), + (764), + (766), + (768), + (770), + (772), + (774), + (776), + (778), + (780), + (782), + (784), + (786), + (788), + (790), + (792), + (794), + (796), + (798), + (800), + (802), + (804), + (806), + (808), + (810), + (812), + (814), + (816), + (818), + (820), + (822), + (824), + (826), + (828), + (830), + (832), + (834), + (836), + (838), + (840), + (842), + (844), + (846), + (848), + (850), + (852), + (854), + (856), + (858), + (860), + (862), + (864), + (866), + (868), + (870), + (872), + (874), + (876), + (878), + (880), + (882), + (884), + (886), + (888), + (890), + (892), + (894), + (896), + (898), + (900), + (902), + (904), + (906), + (908), + (910), + (912), + (914), + (916), + (918), + (920), + (922), + (924), + (926), + (928), + (930), + (932), + (934), + (936), + (938), + (940), + (942), + (944), + (946), + (948), + (950), + (952), + (954), + (956), + (958), + (960), + (962), + (964), + (966), + (968), + (970), + (972), + (974), + (976), + (978), + (980), + (982), + (984), + (986), + (988), + (990), + (992), + (994), + (996), + (998), + (1000)) AS b(y) + ON CAST(b.y AS VARCHAR) LIKE CONCAT('%', CAST(a_2.x AS VARCHAR)) + OR STARTS_WITH(CAST(b.y AS VARCHAR), CAST(a_2.x AS VARCHAR)) + GROUP BY + 1 +) +SELECT + a.x, + COALESCE(_s3.sum_expr_5, 0) AS n_prefix, + COALESCE(_s3.sum_expr, 0) AS n_suffix +FROM (VALUES + (0), + (1), + (2), + (3), + (4), + (5), + (6), + (7), + (8), + (9)) AS a(x) +LEFT JOIN _s3 AS _s3 + ON _s3.x = a.x +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/user_range_collection_3_trino.sql b/tests/test_sql_refsols/user_range_collection_3_trino.sql new file mode 100644 index 000000000..b4dccb2f5 --- /dev/null +++ b/tests/test_sql_refsols/user_range_collection_3_trino.sql @@ -0,0 +1,1045 @@ +WITH _s5 AS ( + SELECT + a_2.x, + COUNT(*) AS n_rows + FROM (VALUES + (0), + (1), + (2), + (3), + (4), + (5), + (6), + (7), + (8), + (9)) AS a_2(x) + JOIN (VALUES + (0), + (2), + (4), + (6), + (8), + (10), + (12), + (14), + (16), + (18), + (20), + (22), + (24), + (26), + (28), + (30), + (32), + (34), + (36), + (38), + (40), + (42), + (44), + (46), + (48), + (50), + (52), + (54), + (56), + (58), + (60), + (62), + (64), + (66), + (68), + (70), + (72), + (74), + (76), + (78), + (80), + (82), + (84), + (86), + (88), + (90), + (92), + (94), + (96), + (98), + (100), + (102), + (104), + (106), + (108), + (110), + (112), + (114), + (116), + (118), + (120), + (122), + (124), + (126), + (128), + (130), + (132), + (134), + (136), + (138), + (140), + (142), + (144), + (146), + (148), + (150), + (152), + (154), + (156), + (158), + (160), + (162), + (164), + (166), + (168), + (170), + (172), + (174), + (176), + (178), + (180), + (182), + (184), + (186), + (188), + (190), + (192), + (194), + (196), + (198), + (200), + (202), + (204), + (206), + (208), + (210), + (212), + (214), + (216), + (218), + (220), + (222), + (224), + (226), + (228), + (230), + (232), + (234), + (236), + (238), + (240), + (242), + (244), + (246), + (248), + (250), + (252), + (254), + (256), + (258), + (260), + (262), + (264), + (266), + (268), + (270), + (272), + (274), + (276), + (278), + (280), + (282), + (284), + (286), + (288), + (290), + (292), + (294), + (296), + (298), + (300), + (302), + (304), + (306), + (308), + (310), + (312), + (314), + (316), + (318), + (320), + (322), + (324), + (326), + (328), + (330), + (332), + (334), + (336), + (338), + (340), + (342), + (344), + (346), + (348), + (350), + (352), + (354), + (356), + (358), + (360), + (362), + (364), + (366), + (368), + (370), + (372), + (374), + (376), + (378), + (380), + (382), + (384), + (386), + (388), + (390), + (392), + (394), + (396), + (398), + (400), + (402), + (404), + (406), + (408), + (410), + (412), + (414), + (416), + (418), + (420), + (422), + (424), + (426), + (428), + (430), + (432), + (434), + (436), + (438), + (440), + (442), + (444), + (446), + (448), + (450), + (452), + (454), + (456), + (458), + (460), + (462), + (464), + (466), + (468), + (470), + (472), + (474), + (476), + (478), + (480), + (482), + (484), + (486), + (488), + (490), + (492), + (494), + (496), + (498), + (500), + (502), + (504), + (506), + (508), + (510), + (512), + (514), + (516), + (518), + (520), + (522), + (524), + (526), + (528), + (530), + (532), + (534), + (536), + (538), + (540), + (542), + (544), + (546), + (548), + (550), + (552), + (554), + (556), + (558), + (560), + (562), + (564), + (566), + (568), + (570), + (572), + (574), + (576), + (578), + (580), + (582), + (584), + (586), + (588), + (590), + (592), + (594), + (596), + (598), + (600), + (602), + (604), + (606), + (608), + (610), + (612), + (614), + (616), + (618), + (620), + (622), + (624), + (626), + (628), + (630), + (632), + (634), + (636), + (638), + (640), + (642), + (644), + (646), + (648), + (650), + (652), + (654), + (656), + (658), + (660), + (662), + (664), + (666), + (668), + (670), + (672), + (674), + (676), + (678), + (680), + (682), + (684), + (686), + (688), + (690), + (692), + (694), + (696), + (698), + (700), + (702), + (704), + (706), + (708), + (710), + (712), + (714), + (716), + (718), + (720), + (722), + (724), + (726), + (728), + (730), + (732), + (734), + (736), + (738), + (740), + (742), + (744), + (746), + (748), + (750), + (752), + (754), + (756), + (758), + (760), + (762), + (764), + (766), + (768), + (770), + (772), + (774), + (776), + (778), + (780), + (782), + (784), + (786), + (788), + (790), + (792), + (794), + (796), + (798), + (800), + (802), + (804), + (806), + (808), + (810), + (812), + (814), + (816), + (818), + (820), + (822), + (824), + (826), + (828), + (830), + (832), + (834), + (836), + (838), + (840), + (842), + (844), + (846), + (848), + (850), + (852), + (854), + (856), + (858), + (860), + (862), + (864), + (866), + (868), + (870), + (872), + (874), + (876), + (878), + (880), + (882), + (884), + (886), + (888), + (890), + (892), + (894), + (896), + (898), + (900), + (902), + (904), + (906), + (908), + (910), + (912), + (914), + (916), + (918), + (920), + (922), + (924), + (926), + (928), + (930), + (932), + (934), + (936), + (938), + (940), + (942), + (944), + (946), + (948), + (950), + (952), + (954), + (956), + (958), + (960), + (962), + (964), + (966), + (968), + (970), + (972), + (974), + (976), + (978), + (980), + (982), + (984), + (986), + (988), + (990), + (992), + (994), + (996), + (998), + (1000)) AS b_2(y) + ON CAST(b_2.y AS VARCHAR) LIKE CONCAT('%', CAST(a_2.x AS VARCHAR)) + GROUP BY + 1 +) +SELECT + a.x, + COUNT(*) AS n_prefix, + ARBITRARY(_s5.n_rows) AS n_suffix +FROM (VALUES + (0), + (1), + (2), + (3), + (4), + (5), + (6), + (7), + (8), + (9)) AS a(x) +JOIN (VALUES + (0), + (2), + (4), + (6), + (8), + (10), + (12), + (14), + (16), + (18), + (20), + (22), + (24), + (26), + (28), + (30), + (32), + (34), + (36), + (38), + (40), + (42), + (44), + (46), + (48), + (50), + (52), + (54), + (56), + (58), + (60), + (62), + (64), + (66), + (68), + (70), + (72), + (74), + (76), + (78), + (80), + (82), + (84), + (86), + (88), + (90), + (92), + (94), + (96), + (98), + (100), + (102), + (104), + (106), + (108), + (110), + (112), + (114), + (116), + (118), + (120), + (122), + (124), + (126), + (128), + (130), + (132), + (134), + (136), + (138), + (140), + (142), + (144), + (146), + (148), + (150), + (152), + (154), + (156), + (158), + (160), + (162), + (164), + (166), + (168), + (170), + (172), + (174), + (176), + (178), + (180), + (182), + (184), + (186), + (188), + (190), + (192), + (194), + (196), + (198), + (200), + (202), + (204), + (206), + (208), + (210), + (212), + (214), + (216), + (218), + (220), + (222), + (224), + (226), + (228), + (230), + (232), + (234), + (236), + (238), + (240), + (242), + (244), + (246), + (248), + (250), + (252), + (254), + (256), + (258), + (260), + (262), + (264), + (266), + (268), + (270), + (272), + (274), + (276), + (278), + (280), + (282), + (284), + (286), + (288), + (290), + (292), + (294), + (296), + (298), + (300), + (302), + (304), + (306), + (308), + (310), + (312), + (314), + (316), + (318), + (320), + (322), + (324), + (326), + (328), + (330), + (332), + (334), + (336), + (338), + (340), + (342), + (344), + (346), + (348), + (350), + (352), + (354), + (356), + (358), + (360), + (362), + (364), + (366), + (368), + (370), + (372), + (374), + (376), + (378), + (380), + (382), + (384), + (386), + (388), + (390), + (392), + (394), + (396), + (398), + (400), + (402), + (404), + (406), + (408), + (410), + (412), + (414), + (416), + (418), + (420), + (422), + (424), + (426), + (428), + (430), + (432), + (434), + (436), + (438), + (440), + (442), + (444), + (446), + (448), + (450), + (452), + (454), + (456), + (458), + (460), + (462), + (464), + (466), + (468), + (470), + (472), + (474), + (476), + (478), + (480), + (482), + (484), + (486), + (488), + (490), + (492), + (494), + (496), + (498), + (500), + (502), + (504), + (506), + (508), + (510), + (512), + (514), + (516), + (518), + (520), + (522), + (524), + (526), + (528), + (530), + (532), + (534), + (536), + (538), + (540), + (542), + (544), + (546), + (548), + (550), + (552), + (554), + (556), + (558), + (560), + (562), + (564), + (566), + (568), + (570), + (572), + (574), + (576), + (578), + (580), + (582), + (584), + (586), + (588), + (590), + (592), + (594), + (596), + (598), + (600), + (602), + (604), + (606), + (608), + (610), + (612), + (614), + (616), + (618), + (620), + (622), + (624), + (626), + (628), + (630), + (632), + (634), + (636), + (638), + (640), + (642), + (644), + (646), + (648), + (650), + (652), + (654), + (656), + (658), + (660), + (662), + (664), + (666), + (668), + (670), + (672), + (674), + (676), + (678), + (680), + (682), + (684), + (686), + (688), + (690), + (692), + (694), + (696), + (698), + (700), + (702), + (704), + (706), + (708), + (710), + (712), + (714), + (716), + (718), + (720), + (722), + (724), + (726), + (728), + (730), + (732), + (734), + (736), + (738), + (740), + (742), + (744), + (746), + (748), + (750), + (752), + (754), + (756), + (758), + (760), + (762), + (764), + (766), + (768), + (770), + (772), + (774), + (776), + (778), + (780), + (782), + (784), + (786), + (788), + (790), + (792), + (794), + (796), + (798), + (800), + (802), + (804), + (806), + (808), + (810), + (812), + (814), + (816), + (818), + (820), + (822), + (824), + (826), + (828), + (830), + (832), + (834), + (836), + (838), + (840), + (842), + (844), + (846), + (848), + (850), + (852), + (854), + (856), + (858), + (860), + (862), + (864), + (866), + (868), + (870), + (872), + (874), + (876), + (878), + (880), + (882), + (884), + (886), + (888), + (890), + (892), + (894), + (896), + (898), + (900), + (902), + (904), + (906), + (908), + (910), + (912), + (914), + (916), + (918), + (920), + (922), + (924), + (926), + (928), + (930), + (932), + (934), + (936), + (938), + (940), + (942), + (944), + (946), + (948), + (950), + (952), + (954), + (956), + (958), + (960), + (962), + (964), + (966), + (968), + (970), + (972), + (974), + (976), + (978), + (980), + (982), + (984), + (986), + (988), + (990), + (992), + (994), + (996), + (998), + (1000)) AS b(y) + ON STARTS_WITH(CAST(b.y AS VARCHAR), CAST(a.x AS VARCHAR)) +JOIN _s5 AS _s5 + ON _s5.x = a.x +GROUP BY + 1 +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/user_range_collection_4_trino.sql b/tests/test_sql_refsols/user_range_collection_4_trino.sql new file mode 100644 index 000000000..3f96d4b3e --- /dev/null +++ b/tests/test_sql_refsols/user_range_collection_4_trino.sql @@ -0,0 +1,32 @@ +WITH _t AS ( + SELECT + part.p_name, + part.p_retailprice, + sizes.part_size, + ROW_NUMBER() OVER (PARTITION BY sizes.part_size ORDER BY part.p_retailprice) AS _w + FROM (VALUES + (1), + (2), + (3), + (4), + (5), + (6), + (7), + (8), + (9), + (10)) AS sizes(part_size) + JOIN tpch.part AS part + ON part.p_container LIKE '%SM DRUM%' + AND part.p_name LIKE '%azure%' + AND part.p_size = sizes.part_size + AND part.p_type LIKE '%PLATED%' +) +SELECT + part_size, + p_name AS name, + p_retailprice AS retail_price +FROM _t +WHERE + _w = 1 +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/user_range_collection_5_trino.sql b/tests/test_sql_refsols/user_range_collection_5_trino.sql new file mode 100644 index 000000000..08fcc4b21 --- /dev/null +++ b/tests/test_sql_refsols/user_range_collection_5_trino.sql @@ -0,0 +1,44 @@ +WITH _s3 AS ( + SELECT + sizes_2.part_size, + COUNT(*) AS n_rows + FROM (VALUES + (1), + (6), + (11), + (16), + (21), + (26), + (31), + (36), + (41), + (46), + (51), + (56)) AS sizes_2(part_size) + JOIN tpch.part AS part + ON part.p_name LIKE '%almond%' + AND part.p_size <= ( + sizes_2.part_size + 4 + ) + AND part.p_size >= sizes_2.part_size + GROUP BY + 1 +) +SELECT + sizes.part_size, + COALESCE(_s3.n_rows, 0) AS n_parts +FROM (VALUES + (1), + (6), + (11), + (16), + (21), + (26), + (31), + (36), + (41), + (46), + (51), + (56)) AS sizes(part_size) +LEFT JOIN _s3 AS _s3 + ON _s3.part_size = sizes.part_size diff --git a/tests/test_sql_refsols/user_range_collection_6_trino.sql b/tests/test_sql_refsols/user_range_collection_6_trino.sql new file mode 100644 index 000000000..ef61e5198 --- /dev/null +++ b/tests/test_sql_refsols/user_range_collection_6_trino.sql @@ -0,0 +1,33 @@ +WITH _s5 AS ( + SELECT + YEAR(CAST(orders.o_orderdate AS TIMESTAMP)) AS year_o_orderdate, + COUNT(DISTINCT orders.o_custkey) AS ndistinct_o_custkey + FROM tpch.orders AS orders + JOIN tpch.customer AS customer + ON customer.c_custkey = orders.o_custkey AND customer.c_mktsegment = 'AUTOMOBILE' + JOIN tpch.nation AS nation + ON customer.c_nationkey = nation.n_nationkey AND nation.n_name = 'JAPAN' + WHERE + orders.o_clerk = 'Clerk#000000925' + GROUP BY + 1 +) +SELECT + years.year, + COALESCE(_s5.ndistinct_o_custkey, 0) AS n_orders +FROM (VALUES + (1990), + (1991), + (1992), + (1993), + (1994), + (1995), + (1996), + (1997), + (1998), + (1999), + (2000)) AS years(year) +LEFT JOIN _s5 AS _s5 + ON _s5.year_o_orderdate = years.year +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/wealthiest_supplier_trino.sql b/tests/test_sql_refsols/wealthiest_supplier_trino.sql new file mode 100644 index 000000000..89322f05e --- /dev/null +++ b/tests/test_sql_refsols/wealthiest_supplier_trino.sql @@ -0,0 +1,13 @@ +WITH _t AS ( + SELECT + s_acctbal, + s_name, + ROW_NUMBER() OVER (ORDER BY s_acctbal DESC NULLS FIRST, s_name) AS _w + FROM tpch.supplier +) +SELECT + s_name AS name, + s_acctbal AS account_balance +FROM _t +WHERE + _w = 1 diff --git a/tests/test_sql_refsols/week_offset_trino.sql b/tests/test_sql_refsols/week_offset_trino.sql new file mode 100644 index 000000000..37ba57e73 --- /dev/null +++ b/tests/test_sql_refsols/week_offset_trino.sql @@ -0,0 +1,14 @@ +SELECT + sbtxdatetime AS date_time, + DATE_ADD('WEEK', 1, CAST(sbtxdatetime AS TIMESTAMP)) AS week_adj1, + DATE_ADD('WEEK', -1, CAST(sbtxdatetime AS TIMESTAMP)) AS week_adj2, + DATE_ADD('WEEK', 2, DATE_ADD('HOUR', 1, CAST(sbtxdatetime AS TIMESTAMP))) AS week_adj3, + DATE_ADD('WEEK', 2, DATE_ADD('SECOND', -1, CAST(sbtxdatetime AS TIMESTAMP))) AS week_adj4, + DATE_ADD('WEEK', 2, DATE_ADD('DAY', 1, CAST(sbtxdatetime AS TIMESTAMP))) AS week_adj5, + DATE_ADD('WEEK', 2, DATE_ADD('MINUTE', -1, CAST(sbtxdatetime AS TIMESTAMP))) AS week_adj6, + DATE_ADD('WEEK', 2, DATE_ADD('MONTH', 1, CAST(sbtxdatetime AS TIMESTAMP))) AS week_adj7, + DATE_ADD('WEEK', 2, DATE_ADD('YEAR', 1, CAST(sbtxdatetime AS TIMESTAMP))) AS week_adj8 +FROM main.sbtransaction +WHERE + DAY(CAST(sbtxdatetime AS TIMESTAMP)) > 1 + AND YEAR(CAST(sbtxdatetime AS TIMESTAMP)) < 2025 diff --git a/tests/test_sql_refsols/window_filter_order_10_trino.sql b/tests/test_sql_refsols/window_filter_order_10_trino.sql new file mode 100644 index 000000000..2740b81dd --- /dev/null +++ b/tests/test_sql_refsols/window_filter_order_10_trino.sql @@ -0,0 +1,25 @@ +WITH _u_0 AS ( + SELECT + c_custkey AS _u_1 + FROM tpch.customer + WHERE + c_mktsegment = 'BUILDING' + GROUP BY + 1 +), _t AS ( + SELECT + orders.o_totalprice, + AVG(CAST(NULL AS INTEGER)) OVER () AS _w + FROM tpch.orders AS orders + LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = orders.o_custkey + WHERE + _u_0._u_1 IS NULL AND orders.o_clerk = 'Clerk#000000001' +) +SELECT + COUNT(*) AS n +FROM _t +WHERE + o_totalprice < ( + 0.05 * _w + ) diff --git a/tests/test_sql_refsols/window_filter_order_1_trino.sql b/tests/test_sql_refsols/window_filter_order_1_trino.sql new file mode 100644 index 000000000..02aecec12 --- /dev/null +++ b/tests/test_sql_refsols/window_filter_order_1_trino.sql @@ -0,0 +1,27 @@ +WITH _s3 AS ( + SELECT + o_custkey + FROM tpch.orders + WHERE + YEAR(CAST(o_orderdate AS TIMESTAMP)) = 1992 +), _t2 AS ( + SELECT + COUNT(_s3.o_custkey) AS count_o_custkey + FROM tpch.customer AS customer + JOIN tpch.nation AS nation + ON customer.c_nationkey = nation.n_nationkey AND nation.n_name = 'GERMANY' + LEFT JOIN _s3 AS _s3 + ON _s3.o_custkey = customer.c_custkey + GROUP BY + customer.c_custkey +), _t AS ( + SELECT + count_o_custkey, + AVG(CAST(COALESCE(count_o_custkey, 0) AS DOUBLE)) OVER () AS _w + FROM _t2 +) +SELECT + COUNT(*) AS n +FROM _t +WHERE + NULLIF(count_o_custkey, 0) <> 0 AND _w > COALESCE(count_o_custkey, 0) diff --git a/tests/test_sql_refsols/window_filter_order_2_trino.sql b/tests/test_sql_refsols/window_filter_order_2_trino.sql new file mode 100644 index 000000000..02aecec12 --- /dev/null +++ b/tests/test_sql_refsols/window_filter_order_2_trino.sql @@ -0,0 +1,27 @@ +WITH _s3 AS ( + SELECT + o_custkey + FROM tpch.orders + WHERE + YEAR(CAST(o_orderdate AS TIMESTAMP)) = 1992 +), _t2 AS ( + SELECT + COUNT(_s3.o_custkey) AS count_o_custkey + FROM tpch.customer AS customer + JOIN tpch.nation AS nation + ON customer.c_nationkey = nation.n_nationkey AND nation.n_name = 'GERMANY' + LEFT JOIN _s3 AS _s3 + ON _s3.o_custkey = customer.c_custkey + GROUP BY + customer.c_custkey +), _t AS ( + SELECT + count_o_custkey, + AVG(CAST(COALESCE(count_o_custkey, 0) AS DOUBLE)) OVER () AS _w + FROM _t2 +) +SELECT + COUNT(*) AS n +FROM _t +WHERE + NULLIF(count_o_custkey, 0) <> 0 AND _w > COALESCE(count_o_custkey, 0) diff --git a/tests/test_sql_refsols/window_filter_order_3_trino.sql b/tests/test_sql_refsols/window_filter_order_3_trino.sql new file mode 100644 index 000000000..02aecec12 --- /dev/null +++ b/tests/test_sql_refsols/window_filter_order_3_trino.sql @@ -0,0 +1,27 @@ +WITH _s3 AS ( + SELECT + o_custkey + FROM tpch.orders + WHERE + YEAR(CAST(o_orderdate AS TIMESTAMP)) = 1992 +), _t2 AS ( + SELECT + COUNT(_s3.o_custkey) AS count_o_custkey + FROM tpch.customer AS customer + JOIN tpch.nation AS nation + ON customer.c_nationkey = nation.n_nationkey AND nation.n_name = 'GERMANY' + LEFT JOIN _s3 AS _s3 + ON _s3.o_custkey = customer.c_custkey + GROUP BY + customer.c_custkey +), _t AS ( + SELECT + count_o_custkey, + AVG(CAST(COALESCE(count_o_custkey, 0) AS DOUBLE)) OVER () AS _w + FROM _t2 +) +SELECT + COUNT(*) AS n +FROM _t +WHERE + NULLIF(count_o_custkey, 0) <> 0 AND _w > COALESCE(count_o_custkey, 0) diff --git a/tests/test_sql_refsols/window_filter_order_4_trino.sql b/tests/test_sql_refsols/window_filter_order_4_trino.sql new file mode 100644 index 000000000..421117119 --- /dev/null +++ b/tests/test_sql_refsols/window_filter_order_4_trino.sql @@ -0,0 +1,22 @@ +WITH _t2 AS ( + SELECT + COUNT(*) AS n_rows + FROM tpch.customer AS customer + JOIN tpch.nation AS nation + ON customer.c_nationkey = nation.n_nationkey AND nation.n_name = 'GERMANY' + JOIN tpch.orders AS orders + ON YEAR(CAST(orders.o_orderdate AS TIMESTAMP)) = 1992 + AND customer.c_custkey = orders.o_custkey + GROUP BY + orders.o_custkey +), _t AS ( + SELECT + n_rows, + AVG(CAST(n_rows AS DOUBLE)) OVER () AS _w + FROM _t2 +) +SELECT + COUNT(*) AS n +FROM _t +WHERE + _w > n_rows diff --git a/tests/test_sql_refsols/window_filter_order_5_trino.sql b/tests/test_sql_refsols/window_filter_order_5_trino.sql new file mode 100644 index 000000000..cbd22c1bd --- /dev/null +++ b/tests/test_sql_refsols/window_filter_order_5_trino.sql @@ -0,0 +1,24 @@ +WITH _s1 AS ( + SELECT + 1 AS expr_0, + c_acctbal, + c_custkey + FROM tpch.customer + WHERE + c_mktsegment = 'BUILDING' +), _t AS ( + SELECT + _s1.c_acctbal, + _s1.expr_0, + AVG(CAST(COALESCE(_s1.c_acctbal, 0) AS DOUBLE)) OVER () AS _w + FROM tpch.orders AS orders + LEFT JOIN _s1 AS _s1 + ON _s1.c_custkey = orders.o_custkey + WHERE + YEAR(CAST(orders.o_orderdate AS TIMESTAMP)) = 1995 +) +SELECT + COUNT(*) AS n +FROM _t +WHERE + NOT expr_0 IS NULL AND _w > c_acctbal diff --git a/tests/test_sql_refsols/window_filter_order_6_trino.sql b/tests/test_sql_refsols/window_filter_order_6_trino.sql new file mode 100644 index 000000000..cbd22c1bd --- /dev/null +++ b/tests/test_sql_refsols/window_filter_order_6_trino.sql @@ -0,0 +1,24 @@ +WITH _s1 AS ( + SELECT + 1 AS expr_0, + c_acctbal, + c_custkey + FROM tpch.customer + WHERE + c_mktsegment = 'BUILDING' +), _t AS ( + SELECT + _s1.c_acctbal, + _s1.expr_0, + AVG(CAST(COALESCE(_s1.c_acctbal, 0) AS DOUBLE)) OVER () AS _w + FROM tpch.orders AS orders + LEFT JOIN _s1 AS _s1 + ON _s1.c_custkey = orders.o_custkey + WHERE + YEAR(CAST(orders.o_orderdate AS TIMESTAMP)) = 1995 +) +SELECT + COUNT(*) AS n +FROM _t +WHERE + NOT expr_0 IS NULL AND _w > c_acctbal diff --git a/tests/test_sql_refsols/window_filter_order_7_trino.sql b/tests/test_sql_refsols/window_filter_order_7_trino.sql new file mode 100644 index 000000000..ba4dde684 --- /dev/null +++ b/tests/test_sql_refsols/window_filter_order_7_trino.sql @@ -0,0 +1,15 @@ +WITH _t AS ( + SELECT + customer.c_acctbal, + AVG(CAST(customer.c_acctbal AS DOUBLE)) OVER () AS _w + FROM tpch.orders AS orders + JOIN tpch.customer AS customer + ON customer.c_custkey = orders.o_custkey AND customer.c_mktsegment = 'BUILDING' + WHERE + YEAR(CAST(orders.o_orderdate AS TIMESTAMP)) = 1995 +) +SELECT + COUNT(*) AS n +FROM _t +WHERE + _w > c_acctbal diff --git a/tests/test_sql_refsols/window_filter_order_8_trino.sql b/tests/test_sql_refsols/window_filter_order_8_trino.sql new file mode 100644 index 000000000..24c98f1c3 --- /dev/null +++ b/tests/test_sql_refsols/window_filter_order_8_trino.sql @@ -0,0 +1,30 @@ +WITH _s3 AS ( + SELECT + o_custkey + FROM tpch.orders + WHERE + MONTH(CAST(o_orderdate AS TIMESTAMP)) = 1 + AND YEAR(CAST(o_orderdate AS TIMESTAMP)) = 1998 +), _t2 AS ( + SELECT + ARBITRARY(customer.c_acctbal) AS anything_c_acctbal, + COUNT(_s3.o_custkey) AS count_o_custkey + FROM tpch.customer AS customer + JOIN tpch.nation AS nation + ON customer.c_nationkey = nation.n_nationkey AND nation.n_name = 'FRANCE' + LEFT JOIN _s3 AS _s3 + ON _s3.o_custkey = customer.c_custkey + GROUP BY + customer.c_custkey +), _t AS ( + SELECT + anything_c_acctbal, + count_o_custkey, + SUM(COALESCE(count_o_custkey, 0)) OVER () AS _w + FROM _t2 +) +SELECT + COUNT(*) AS n +FROM _t +WHERE + NULLIF(count_o_custkey, 0) IS NULL AND _w > anything_c_acctbal diff --git a/tests/test_sql_refsols/window_filter_order_9_trino.sql b/tests/test_sql_refsols/window_filter_order_9_trino.sql new file mode 100644 index 000000000..900dbc476 --- /dev/null +++ b/tests/test_sql_refsols/window_filter_order_9_trino.sql @@ -0,0 +1,30 @@ +WITH _s3 AS ( + SELECT + 1 AS expr_0, + COALESCE(SUM(orders.o_totalprice), 0) AS total_spent, + customer.c_custkey + FROM tpch.customer AS customer + LEFT JOIN tpch.orders AS orders + ON customer.c_custkey = orders.o_custkey + WHERE + customer.c_mktsegment = 'BUILDING' + GROUP BY + 3 +), _t AS ( + SELECT + _s3.expr_0, + orders.o_totalprice, + AVG(CAST(_s3.total_spent AS DOUBLE)) OVER () AS _w + FROM tpch.orders AS orders + LEFT JOIN _s3 AS _s3 + ON _s3.c_custkey = orders.o_custkey + WHERE + orders.o_clerk = 'Clerk#000000001' +) +SELECT + COUNT(*) AS n +FROM _t +WHERE + expr_0 IS NULL AND o_totalprice < ( + 0.05 * _w + ) diff --git a/tests/test_sql_refsols/window_functions_trino.sql b/tests/test_sql_refsols/window_functions_trino.sql new file mode 100644 index 000000000..68c0ba536 --- /dev/null +++ b/tests/test_sql_refsols/window_functions_trino.sql @@ -0,0 +1,13 @@ +SELECT + DENSE_RANK() OVER (ORDER BY customer.c_acctbal DESC NULLS FIRST) AS rank_value, + NTILE(10) OVER (ORDER BY customer.c_acctbal) AS precentile_value, + LAG(customer.c_acctbal, 2, 0.0) OVER (PARTITION BY nation.n_regionkey ORDER BY customer.c_acctbal) AS two_prev_value, + LEAD(customer.c_acctbal, 2) OVER (PARTITION BY customer.c_nationkey ORDER BY customer.c_acctbal) AS two_next_value, + SUM(customer.c_acctbal) OVER (PARTITION BY nation.n_regionkey ORDER BY customer.c_acctbal ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS relsum_value, + SUM(customer.c_acctbal) OVER (ORDER BY customer.c_acctbal ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS relsum_value2, + CAST(customer.c_acctbal AS DOUBLE) / AVG(CAST(customer.c_acctbal AS DOUBLE)) OVER (ORDER BY customer.c_acctbal ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) AS relavg_value, + CAST(customer.c_acctbal AS DOUBLE) / COUNT(CASE WHEN customer.c_acctbal > 0.0 THEN customer.c_acctbal ELSE NULL END) OVER () AS relcount_value, + CAST(customer.c_acctbal AS DOUBLE) / COUNT(*) OVER () AS relsize_value +FROM tpch.nation AS nation +JOIN tpch.customer AS customer + ON customer.c_nationkey = nation.n_nationkey diff --git a/tests/test_sql_refsols/window_sliding_frame_relsize_trino.sql b/tests/test_sql_refsols/window_sliding_frame_relsize_trino.sql new file mode 100644 index 000000000..3a3e94a30 --- /dev/null +++ b/tests/test_sql_refsols/window_sliding_frame_relsize_trino.sql @@ -0,0 +1,14 @@ +SELECT + sbtxid AS transaction_id, + COUNT(*) OVER (ORDER BY sbtxdatetime, sbtxid ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) AS w1, + COUNT(*) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime, sbtxid ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) AS w2, + COUNT(*) OVER (ORDER BY sbtxdatetime, sbtxid ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w3, + COUNT(*) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime, sbtxid ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w4, + COUNT(*) OVER (ORDER BY sbtxdatetime, sbtxid ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS w5, + COUNT(*) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime, sbtxid ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS w6, + COUNT(*) OVER (ORDER BY sbtxdatetime, sbtxid ROWS BETWEEN 3 PRECEDING AND 5 FOLLOWING) AS w7, + COUNT(*) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime, sbtxid ROWS BETWEEN 3 PRECEDING AND 5 FOLLOWING) AS w8 +FROM main.sbtransaction +ORDER BY + sbtxdatetime NULLS FIRST +LIMIT 8 diff --git a/tests/test_sql_refsols/window_sliding_frame_relsum_trino.sql b/tests/test_sql_refsols/window_sliding_frame_relsum_trino.sql new file mode 100644 index 000000000..cc7d2b368 --- /dev/null +++ b/tests/test_sql_refsols/window_sliding_frame_relsum_trino.sql @@ -0,0 +1,14 @@ +SELECT + sbtxid AS transaction_id, + SUM(sbtxshares) OVER (ORDER BY sbtxdatetime, sbtxid ROWS BETWEEN CURRENT ROW AND 4 FOLLOWING) AS w1, + SUM(sbtxshares) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime, sbtxid ROWS BETWEEN CURRENT ROW AND 4 FOLLOWING) AS w2, + SUM(sbtxshares) OVER (ORDER BY sbtxdatetime, sbtxid ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w3, + SUM(sbtxshares) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime, sbtxid ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w4, + SUM(sbtxshares) OVER (ORDER BY sbtxdatetime, sbtxid ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) AS w5, + SUM(sbtxshares) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime, sbtxid ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) AS w6, + SUM(sbtxshares) OVER (ORDER BY sbtxdatetime, sbtxid ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING) AS w7, + SUM(sbtxshares) OVER (PARTITION BY sbtxcustid ORDER BY sbtxdatetime, sbtxid ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING) AS w8 +FROM main.sbtransaction +ORDER BY + sbtxdatetime NULLS FIRST +LIMIT 8 diff --git a/tests/test_sql_refsols/year_month_nation_orders_trino.sql b/tests/test_sql_refsols/year_month_nation_orders_trino.sql new file mode 100644 index 000000000..05e57ac62 --- /dev/null +++ b/tests/test_sql_refsols/year_month_nation_orders_trino.sql @@ -0,0 +1,21 @@ +SELECT + nation.n_name AS nation_name, + YEAR(CAST(orders.o_orderdate AS TIMESTAMP)) AS order_year, + MONTH(CAST(orders.o_orderdate AS TIMESTAMP)) AS order_month, + COUNT(*) AS n_orders +FROM tpch.region AS region +JOIN tpch.nation AS nation + ON nation.n_regionkey = region.r_regionkey +JOIN tpch.customer AS customer + ON customer.c_nationkey = nation.n_nationkey +JOIN tpch.orders AS orders + ON customer.c_custkey = orders.o_custkey AND orders.o_orderpriority = '1-URGENT' +WHERE + region.r_name IN ('ASIA', 'AFRICA') +GROUP BY + 1, + 2, + 3 +ORDER BY + 4 DESC +LIMIT 5 diff --git a/tests/test_sql_refsols/yoy_change_in_num_orders_trino.sql b/tests/test_sql_refsols/yoy_change_in_num_orders_trino.sql new file mode 100644 index 000000000..d09c9e409 --- /dev/null +++ b/tests/test_sql_refsols/yoy_change_in_num_orders_trino.sql @@ -0,0 +1,17 @@ +WITH _t0 AS ( + SELECT + YEAR(CAST(o_orderdate AS TIMESTAMP)) AS year_o_orderdate, + COUNT(*) AS n_rows + FROM tpch.orders + GROUP BY + 1 +) +SELECT + year_o_orderdate AS year, + n_rows AS current_year_orders, + ( + 100.0 * CAST(n_rows - LAG(n_rows, 1) OVER (ORDER BY year_o_orderdate) AS DOUBLE) + ) / LAG(n_rows, 1) OVER (ORDER BY year_o_orderdate) AS pct_change +FROM _t0 +ORDER BY + 1 NULLS FIRST