diff --git a/conda/rapids-gpu-bdb-dask-sql.yml b/conda/rapids-gpu-bdb-dask-sql.yml
new file mode 100755
index 00000000..54156f8c
--- /dev/null
+++ b/conda/rapids-gpu-bdb-dask-sql.yml
@@ -0,0 +1,34 @@
+channels:
+  - rapidsai-nightly
+  - nvidia
+  - conda-forge
+
+dependencies:
+  - python=3.8
+  - cudatoolkit=11.2
+  - cudf
+  - rmm
+  - dask-cuda
+  - dask-cudf
+  - cuml
+  - dask
+  - distributed
+  - ucx-py
+  - ucx-proc=*=gpu
+  - dask-sql>=2022.1
+  - numba=0.54.*
+  - scipy
+  - scikit-learn
+  - cupy
+  - spacy=2.3
+  - oauth2client
+  - asyncssh
+  - psutil
+  - ipykernel
+  - jupyterlab
+  - gspread
+  - oauth2client
+  - pytest
+  - pip
+  - pip:
+    - jupyter-server-proxy
diff --git a/gpu_bdb/bdb_tools/__init__.py b/gpu_bdb/bdb_tools/__init__.py
index ccbb1625..2b586df8 100755
--- a/gpu_bdb/bdb_tools/__init__.py
+++ b/gpu_bdb/bdb_tools/__init__.py
@@ -1 +1,4 @@
 # Copyright (c) 2020, NVIDIA CORPORATION.
+
+from .rmm_monitor import RMMResourceMonitor
+from .dasktasklogger import DaskTaskLogger
diff --git a/gpu_bdb/bdb_tools/cluster_startup.py b/gpu_bdb/bdb_tools/cluster_startup.py
index e90e2737..02e56640 100755
--- a/gpu_bdb/bdb_tools/cluster_startup.py
+++ b/gpu_bdb/bdb_tools/cluster_startup.py
@@ -24,43 +24,13 @@
 from dask.utils import parse_bytes
 
 
-def get_bsql_config_options():
-    """Loads configuration environment variables.
-    In case it is not previously set, returns a default value for each one.
-
-    Returns a dictionary object.
-    For more info: https://docs.blazingdb.com/docs/config_options
-    """
-    config_options = {}
-    config_options['JOIN_PARTITION_SIZE_THRESHOLD'] = os.environ.get("JOIN_PARTITION_SIZE_THRESHOLD", 300000000)
-    config_options['MAX_DATA_LOAD_CONCAT_CACHE_BYTE_SIZE'] =  os.environ.get("MAX_DATA_LOAD_CONCAT_CACHE_BYTE_SIZE", 400000000)
-    config_options['BLAZING_DEVICE_MEM_CONSUMPTION_THRESHOLD'] = os.environ.get("BLAZING_DEVICE_MEM_CONSUMPTION_THRESHOLD", 0.6)
-    config_options['BLAZ_HOST_MEM_CONSUMPTION_THRESHOLD'] = os.environ.get("BLAZ_HOST_MEM_CONSUMPTION_THRESHOLD", 0.6)
-    config_options['MAX_KERNEL_RUN_THREADS'] = os.environ.get("MAX_KERNEL_RUN_THREADS", 3)
-    config_options['TABLE_SCAN_KERNEL_NUM_THREADS'] = os.environ.get("TABLE_SCAN_KERNEL_NUM_THREADS", 1)
-    config_options['MAX_NUM_ORDER_BY_PARTITIONS_PER_NODE'] = os.environ.get("MAX_NUM_ORDER_BY_PARTITIONS_PER_NODE", 20)
-    config_options['NUM_BYTES_PER_ORDER_BY_PARTITION'] = os.environ.get("NUM_BYTES_PER_ORDER_BY_PARTITION", 400000000)
-    config_options['MAX_ORDER_BY_SAMPLES_PER_NODE'] = os.environ.get("MAX_ORDER_BY_SAMPLES_PER_NODE", 10000)
-    config_options['MAX_SEND_MESSAGE_THREADS'] = os.environ.get("MAX_SEND_MESSAGE_THREADS", 20)
-    config_options['MEMORY_MONITOR_PERIOD'] = os.environ.get("MEMORY_MONITOR_PERIOD", 50)
-    config_options['TRANSPORT_BUFFER_BYTE_SIZE'] = os.environ.get("TRANSPORT_BUFFER_BYTE_SIZE", 1048576) # 1 MBs
-    config_options['TRANSPORT_POOL_NUM_BUFFERS'] = os.environ.get("TRANSPORT_POOL_NUM_BUFFERS", 1000)
-    config_options['BLAZING_LOGGING_DIRECTORY'] = os.environ.get("BLAZING_LOGGING_DIRECTORY", 'blazing_log')
-    config_options['BLAZING_CACHE_DIRECTORY'] = os.environ.get("BLAZING_CACHE_DIRECTORY", '/tmp/')
-    config_options['LOGGING_LEVEL'] = os.environ.get("LOGGING_LEVEL", "trace")
-    config_options['MAX_JOIN_SCATTER_MEM_OVERHEAD'] = os.environ.get("MAX_JOIN_SCATTER_MEM_OVERHEAD", 500000000)
-    config_options['PROTOCOL'] = os.environ.get("PROTOCOL", "AUTO")
-
-    return config_options
-
-
-def attach_to_cluster(config, create_blazing_context=False):
+def attach_to_cluster(config, create_sql_context=False):
     """Attaches to an existing cluster if available.
     By default, tries to attach to a cluster running on localhost:8786 (dask's default).
 
     This is currently hardcoded to assume the dashboard is running on port 8787.
 
-    Optionally, this will also create a BlazingContext.
+    Optionally, this will also create a Dask-SQL Context.
     """
     scheduler_file = config.get("scheduler_file_path")
     host = config.get("cluster_host")
@@ -131,19 +101,12 @@ def maybe_create_worker_directories(dask_worker):
     config["40GB_workers"] = worker_counts.get("40GB", 0)
     config["80GB_workers"] = worker_counts.get("80GB", 0)
 
-    bc = None
-    if create_blazing_context:
-        from blazingsql import BlazingContext
-        bc = BlazingContext(
-            dask_client=client,
-            pool=os.environ.get("BLAZING_POOL", False),
-            network_interface=os.environ.get("INTERFACE", "ib0"),
-            config_options=get_bsql_config_options(),
-            allocator=os.environ.get("BLAZING_ALLOCATOR_MODE", "existing"),
-            initial_pool_size=os.environ.get("BLAZING_INITIAL_POOL_SIZE", None)
-        )
+    c = None
+    if create_sql_context:
+        from dask_sql import Context
+        c = Context()
 
-    return client, bc
+    return client, c
 
 
 def worker_count_info(client):
@@ -173,7 +136,7 @@ def _get_ucx_config():
     Get a subset of ucx config variables relevant for benchmarking
     """
     relevant_configs = ["infiniband", "nvlink"]
-    ucx_config = dask.config.get("ucx")
+    ucx_config = dask.config.get("distributed.comm.ucx")
     # Doing this since when relevant configs are not enabled the value is `None` instead of `False`
     filtered_ucx_config = {
         config: ucx_config.get(config) if ucx_config.get(config) else False
@@ -196,11 +159,5 @@ def import_query_libs():
         "spacy",
     ]
 
-    # optionally include blazingsql
-    # this is brittle, but it resolves breaking change
-    # issues as we can control the environment
-    if os.environ.get("RUNNER_INCLUDE_BSQL"):
-        library_list.append("blazingsql")
-
     for lib in library_list:
         importlib.import_module(lib)
diff --git a/gpu_bdb/bdb_tools/dasktasklogger.py b/gpu_bdb/bdb_tools/dasktasklogger.py
new file mode 100644
index 00000000..fcb4b9bb
--- /dev/null
+++ b/gpu_bdb/bdb_tools/dasktasklogger.py
@@ -0,0 +1,20 @@
+import re
+import os
+import json
+import numpy as np
+
+class DaskTaskLogger():
+    key_expr=re.compile( '([\w-]+)-([0-9a-f-]{32,36})' )
+    
+    def __init__(self, client, outputdir='/tmp'):
+        self._client=client
+        self._outputdir=outputdir
+
+    def mark_begin( self ):
+        self._client.get_task_stream()
+
+    def save_tasks( self, prefix='dask' ):
+        plotfname=os.path.join(self._outputdir, f"{prefix}_plot.html")
+        pdata, pfigure = self._client.get_task_stream(plot='save', filename=plotfname)
+        with open( os.path.join(self._outputdir, f"{prefix}_tasks.json"), 'w') as outf:
+            json.dump([{k:t[k] for k in filter( lambda x: type(t[x]) != bytes().__class__, t)} for t in pdata],outf)
diff --git a/gpu_bdb/bdb_tools/q01_utils.py b/gpu_bdb/bdb_tools/q01_utils.py
new file mode 100644
index 00000000..471b96f8
--- /dev/null
+++ b/gpu_bdb/bdb_tools/q01_utils.py
@@ -0,0 +1,47 @@
+#
+# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+from bdb_tools.readers import build_reader
+
+# -------- Q1 -----------
+q01_i_category_id_IN = 1, 2, 3
+# -- sf1 -> 11 stores, 90k sales in 820k lines
+q01_ss_store_sk_IN = 10, 20, 33, 40, 50
+q01_viewed_together_count = 50
+q01_limit = 100
+
+
+item_cols = ["i_item_sk", "i_category_id"]
+ss_cols = ["ss_item_sk", "ss_store_sk", "ss_ticket_number"]
+
+
+def read_tables(config, c=None):
+    table_reader = build_reader(
+        data_format=config["file_format"],
+        basepath=config["data_dir"],
+        split_row_groups=config["split_row_groups"],
+    )
+
+    item_df = table_reader.read("item", relevant_cols=item_cols)
+    ss_df = table_reader.read("store_sales", relevant_cols=ss_cols)
+
+    if c:
+        c.create_table("item", item_df, persist=False)
+        c.create_table("store_sales", ss_df, persist=False)
+
+    return item_df, ss_df
+
diff --git a/gpu_bdb/bdb_tools/q02_utils.py b/gpu_bdb/bdb_tools/q02_utils.py
new file mode 100644
index 00000000..b9f058a6
--- /dev/null
+++ b/gpu_bdb/bdb_tools/q02_utils.py
@@ -0,0 +1,38 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from bdb_tools.readers import build_reader
+
+q02_item_sk = 10001
+q02_limit = 30
+q02_session_timeout_inSec = 3600
+q02_MAX_ITEMS_PER_BASKET = 5000000
+
+
+def read_tables(config, c=None):
+    table_reader = build_reader(
+        data_format=config["file_format"],
+        basepath=config["data_dir"],
+        split_row_groups=config["split_row_groups"],
+    )
+    wcs_cols = ["wcs_user_sk", "wcs_item_sk", "wcs_click_date_sk", "wcs_click_time_sk"]
+    wcs_df = table_reader.read("web_clickstreams", relevant_cols=wcs_cols)
+
+    if c:
+        c.create_table("web_clickstreams", wcs_df, persist=False)
+
+    return wcs_df
+
diff --git a/gpu_bdb/bdb_tools/q03_utils.py b/gpu_bdb/bdb_tools/q03_utils.py
new file mode 100644
index 00000000..1a4fb387
--- /dev/null
+++ b/gpu_bdb/bdb_tools/q03_utils.py
@@ -0,0 +1,138 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import cudf
+
+from numba import cuda
+
+from bdb_tools.readers import build_reader
+
+q03_days_in_sec_before_purchase = 864000
+q03_views_before_purchase = 5
+q03_purchased_item_IN = 10001
+q03_purchased_item_category_IN = 2, 3
+q03_limit = 100
+
+def read_tables(config, c=None):
+    table_reader = build_reader(
+        data_format=config["file_format"],
+        basepath=config["data_dir"],
+        split_row_groups=config["split_row_groups"],
+    )
+
+    item_cols = ["i_category_id", "i_item_sk"]
+    wcs_cols = [
+        "wcs_user_sk",
+        "wcs_click_time_sk",
+        "wcs_click_date_sk",
+        "wcs_item_sk",
+        "wcs_sales_sk",
+    ]
+
+    item_df = table_reader.read("item", relevant_cols=item_cols)
+    wcs_df = table_reader.read("web_clickstreams", relevant_cols=wcs_cols)
+
+    if c:
+        c.create_table("web_clickstreams", wcs_df, persist=False)
+        c.create_table("item", item_df, persist=False)
+
+    return item_df
+
+
+@cuda.jit
+def find_items_viewed_before_purchase_kernel(
+    relevant_idx_col, user_col, timestamp_col, item_col, out_col, N
+):
+    """
+    Find the past N items viewed after a relevant purchase was made,
+    as defined by the configuration of this query.
+    """
+    i = cuda.grid(1)
+
+    if i < (relevant_idx_col.size):  # boundary guard
+        # every relevant row gets N rows in the output, so we need to map the indexes
+        # back into their position in the original array
+        orig_idx = relevant_idx_col[i]
+        current_user = user_col[orig_idx]
+
+        # look at the previous N clicks (assume sorted descending)
+        rows_to_check = N
+        remaining_rows = user_col.size - orig_idx
+
+        if remaining_rows <= rows_to_check:
+            rows_to_check = remaining_rows - 1
+
+        for k in range(1, rows_to_check + 1):
+            if current_user != user_col[orig_idx + k]:
+                out_col[i * N + k - 1] = 0
+
+            # only checking relevant purchases via the relevant_idx_col
+            elif (timestamp_col[orig_idx + k] <= timestamp_col[orig_idx]) & (
+                timestamp_col[orig_idx + k]
+                >= (timestamp_col[orig_idx] - q03_days_in_sec_before_purchase)
+            ):
+                out_col[i * N + k - 1] = item_col[orig_idx + k]
+            else:
+                out_col[i * N + k - 1] = 0
+
+
+def apply_find_items_viewed(df, item_mappings):
+
+    # need to sort descending to ensure that the
+    # next N rows are the previous N clicks
+    df = df.sort_values(
+        by=["wcs_user_sk", "tstamp", "wcs_sales_sk", "wcs_item_sk"],
+        ascending=[False, False, False, False],
+    )
+    df.reset_index(drop=True, inplace=True)
+    df["relevant_flag"] = (df.wcs_sales_sk != 0) & (
+        df.wcs_item_sk == q03_purchased_item_IN
+    )
+    df["relevant_idx_pos"] = df.index.to_series()
+    df.reset_index(drop=True, inplace=True)
+    # only allocate output for the relevant rows
+    sample = df.loc[df.relevant_flag == True]
+    sample.reset_index(drop=True, inplace=True)
+
+    N = q03_views_before_purchase
+    size = len(sample)
+
+    # we know this can be int32, since it's going to contain item_sks
+    out_arr = cuda.device_array(size * N, dtype=df["wcs_item_sk"].dtype)
+
+    find_items_viewed_before_purchase_kernel.forall(size)(
+        sample["relevant_idx_pos"],
+        df["wcs_user_sk"],
+        df["tstamp"],
+        df["wcs_item_sk"],
+        out_arr,
+        N,
+    )
+
+    result = cudf.DataFrame({"prior_item_viewed": out_arr})
+
+    del out_arr
+    del df
+    del sample
+
+    filtered = result.merge(
+        item_mappings,
+        how="inner",
+        left_on=["prior_item_viewed"],
+        right_on=["i_item_sk"],
+    )
+    return filtered
+
diff --git a/gpu_bdb/bdb_tools/q04_utils.py b/gpu_bdb/bdb_tools/q04_utils.py
new file mode 100644
index 00000000..b848f840
--- /dev/null
+++ b/gpu_bdb/bdb_tools/q04_utils.py
@@ -0,0 +1,96 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import cudf
+
+from bdb_tools.sessionization import get_sessions
+
+from bdb_tools.readers import build_reader
+
+
+def read_tables(config, c=None):
+    table_reader = build_reader(
+        data_format=config["file_format"],
+        basepath=config["data_dir"],
+        split_row_groups=config["split_row_groups"],
+    )
+
+    wp_cols = ["wp_type", "wp_web_page_sk"]
+    wp_df = table_reader.read("web_page", relevant_cols=wp_cols)
+
+    wcs_cols = [
+        "wcs_user_sk",
+        "wcs_click_date_sk",
+        "wcs_click_time_sk",
+        "wcs_web_page_sk",
+        "wcs_sales_sk",
+    ]
+    wcs_df = table_reader.read("web_clickstreams", relevant_cols=wcs_cols)
+
+    if c:
+        c.create_table('web_page_wo_categorical', wp_df, persist=False)
+        c.create_table('web_clickstreams', wcs_df, persist=False)
+
+    return wp_df, wcs_df
+
+
+def abandonedShoppingCarts(df, DYNAMIC_CAT_CODE, ORDER_CAT_CODE):
+
+    # Select groups where last dynamic row comes after last order row
+    filtered_df = df[
+        (df["wp_type_codes"] == ORDER_CAT_CODE)
+        | (df["wp_type_codes"] == DYNAMIC_CAT_CODE)
+    ]
+    # Create a new column that is the concatenation of timestamp and wp_type_codes
+    # (eg:123456:3, 234567:5)
+    filtered_df["wp_type_codes"] = (
+        filtered_df["tstamp_inSec"]
+        .astype("str")
+        .str.cat(filtered_df["wp_type_codes"].astype("str"), sep=":")
+    )
+    # This gives the last occurrence (by timestamp) within the "order", "dynamic" wp_types
+    filtered_df = filtered_df.groupby(
+        ["wcs_user_sk", "session_id"], as_index=False, sort=False
+    ).agg({"wp_type_codes": "max"})
+    # If the max contains dynamic, keep the row else discard.
+    last_dynamic_df = filtered_df[
+        filtered_df["wp_type_codes"].str.contains(
+            ":" + str(DYNAMIC_CAT_CODE), regex=False
+        )
+    ]
+    del filtered_df
+
+    # Find counts for each group
+    grouped_count_df = df.groupby(
+        ["wcs_user_sk", "session_id"], as_index=False, sort=False
+    ).agg({"tstamp_inSec": "count"})
+    # Merge counts with the "dynamic" shopping cart groups
+    result = last_dynamic_df.merge(
+        grouped_count_df, on=["wcs_user_sk", "session_id"], how="inner"
+    )
+    del (last_dynamic_df, grouped_count_df)
+    return cudf.DataFrame(
+        {"pagecount": result.tstamp_inSec.sum(), "count": len(result)}
+    )
+
+
+def reduction_function(df, keep_cols, DYNAMIC_CAT_CODE, ORDER_CAT_CODE):
+    df = get_sessions(df, keep_cols=keep_cols)
+    df = abandonedShoppingCarts(
+        df, DYNAMIC_CAT_CODE=DYNAMIC_CAT_CODE, ORDER_CAT_CODE=ORDER_CAT_CODE
+    )
+    return df
+
diff --git a/gpu_bdb/bdb_tools/q05_utils.py b/gpu_bdb/bdb_tools/q05_utils.py
new file mode 100644
index 00000000..c4b71fd0
--- /dev/null
+++ b/gpu_bdb/bdb_tools/q05_utils.py
@@ -0,0 +1,104 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import cupy as cp
+
+import cuml
+from cuml.metrics import confusion_matrix
+
+from bdb_tools.cupy_metrics import cupy_precision_score
+
+from bdb_tools.readers import build_reader
+
+from sklearn.metrics import roc_auc_score
+
+# Logistic Regression params
+# solver = "LBFGS" Used by passing `penalty=None` or "l2"
+# step_size = 1 Not used
+# numCorrections = 10 Not used
+iterations = 100
+C = 10_000  # reg_lambda = 0 hence C for model is a large value
+convergence_tol = 1e-9
+
+wcs_columns = ["wcs_item_sk", "wcs_user_sk"]
+items_columns = ["i_item_sk", "i_category", "i_category_id"]
+customer_columns = ["c_customer_sk", "c_current_cdemo_sk"]
+customer_dem_columns = ["cd_demo_sk", "cd_gender", "cd_education_status"]
+
+def read_tables(config, c=None):
+    table_reader = build_reader(
+        data_format=config["file_format"],
+        basepath=config["data_dir"],
+        split_row_groups=config["split_row_groups"],
+    )
+
+    item_ddf = table_reader.read("item", relevant_cols=items_columns, index=False)
+    customer_ddf = table_reader.read(
+        "customer", relevant_cols=customer_columns, index=False
+    )
+    customer_dem_ddf = table_reader.read(
+        "customer_demographics", relevant_cols=customer_dem_columns, index=False
+    )
+    wcs_ddf = table_reader.read(
+        "web_clickstreams", relevant_cols=wcs_columns, index=False
+    )
+
+    if c:
+        c.create_table("web_clickstreams", wcs_ddf, persist=False)
+        c.create_table("customer", customer_ddf, persist=False)
+        c.create_table("item", item_ddf, persist=False)
+        c.create_table("customer_demographics", customer_dem_ddf, persist=False)
+
+    return (item_ddf, customer_ddf, customer_dem_ddf)
+
+def build_and_predict_model(ml_input_df):
+    """
+    Create a standardized feature matrix X and target array y.
+    Returns the model and accuracy statistics
+    """
+
+    feature_names = ["college_education", "male"] + [
+        "clicks_in_%d" % i for i in range(1, 8)
+    ]
+    X = ml_input_df[feature_names]
+    # Standardize input matrix
+    X = (X - X.mean()) / X.std()
+    y = ml_input_df["clicks_in_category"]
+
+    model = cuml.LogisticRegression(
+        tol=convergence_tol,
+        penalty="none",
+        solver="qn",
+        fit_intercept=True,
+        max_iter=iterations,
+        C=C,
+    )
+    model.fit(X, y)
+    #
+    # Predict and evaluate accuracy
+    # (Should be 1.0) at SF-1
+    #
+    results_dict = {}
+    y_pred = model.predict(X)
+
+    results_dict["auc"] = roc_auc_score(y.to_array(), y_pred.to_array())
+    results_dict["precision"] = cupy_precision_score(cp.asarray(y), cp.asarray(y_pred))
+    results_dict["confusion_matrix"] = confusion_matrix(
+        cp.asarray(y, dtype="int32"), cp.asarray(y_pred, dtype="int32")
+    )
+    results_dict["output_type"] = "supervised"
+    return results_dict
+
diff --git a/gpu_bdb/bdb_tools/q06_utils.py b/gpu_bdb/bdb_tools/q06_utils.py
new file mode 100644
index 00000000..ec4e02b3
--- /dev/null
+++ b/gpu_bdb/bdb_tools/q06_utils.py
@@ -0,0 +1,72 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from bdb_tools.readers import build_reader
+
+# -------- Q6 -----------
+q06_LIMIT = 100
+# --web_sales and store_sales date
+q06_YEAR = 2001
+
+
+def read_tables(config, c=None):
+    table_reader = build_reader(
+        data_format=config["file_format"],
+        basepath=config["data_dir"],
+        split_row_groups=config["split_row_groups"],
+    )
+
+    web_sales_cols = [
+        "ws_bill_customer_sk",
+        "ws_sold_date_sk",
+        "ws_ext_list_price",
+        "ws_ext_wholesale_cost",
+        "ws_ext_discount_amt",
+        "ws_ext_sales_price",
+    ]
+    store_sales_cols = [
+        "ss_customer_sk",
+        "ss_sold_date_sk",
+        "ss_ext_list_price",
+        "ss_ext_wholesale_cost",
+        "ss_ext_discount_amt",
+        "ss_ext_sales_price",
+    ]
+    date_cols = ["d_date_sk", "d_year", "d_moy"]
+    customer_cols = [
+        "c_customer_sk",
+        "c_customer_id",
+        "c_email_address",
+        "c_first_name",
+        "c_last_name",
+        "c_preferred_cust_flag",
+        "c_birth_country",
+        "c_login",
+    ]
+
+    ws_df = table_reader.read("web_sales", relevant_cols=web_sales_cols)
+    ss_df = table_reader.read("store_sales", relevant_cols=store_sales_cols)
+    date_df = table_reader.read("date_dim", relevant_cols=date_cols)
+    customer_df = table_reader.read("customer", relevant_cols=customer_cols)
+
+    if c:
+        c.create_table('web_sales', ws_df, persist=False)
+        c.create_table('store_sales', ss_df, persist=False)
+        c.create_table('date_dim', date_df, persist=False)
+        c.create_table('customer', customer_df, persist=False)
+
+    return (ws_df, ss_df, date_df, customer_df)
+
diff --git a/gpu_bdb/bdb_tools/q07_utils.py b/gpu_bdb/bdb_tools/q07_utils.py
new file mode 100644
index 00000000..e55b54f1
--- /dev/null
+++ b/gpu_bdb/bdb_tools/q07_utils.py
@@ -0,0 +1,55 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from bdb_tools.readers import build_reader
+
+def read_tables(config, c=None):
+    table_reader = build_reader(
+        data_format=config["file_format"],
+        basepath=config["data_dir"],
+        split_row_groups=config["split_row_groups"],
+    )
+
+    item_cols = ["i_item_sk", "i_current_price", "i_category"]
+    store_sales_cols = ["ss_item_sk", "ss_customer_sk", "ss_sold_date_sk"]
+    date_cols = ["d_date_sk", "d_year", "d_moy"]
+    customer_cols = ["c_customer_sk", "c_current_addr_sk"]
+    customer_address_cols = ["ca_address_sk", "ca_state"]
+
+    item_df = table_reader.read("item", relevant_cols=item_cols)
+    store_sales_df = table_reader.read("store_sales", relevant_cols=store_sales_cols)
+    date_dim_df = table_reader.read("date_dim", relevant_cols=date_cols)
+    customer_df = table_reader.read("customer", relevant_cols=customer_cols)
+    customer_address_df = table_reader.read(
+        "customer_address", relevant_cols=customer_address_cols
+    )
+
+    if c:
+        c.create_table("item", item_df, persist=False)
+        c.create_table("customer", customer_df, persist=False)
+        c.create_table("store_sales", store_sales_df, persist=False)
+        c.create_table("date_dim", date_dim_df, persist=False)
+        c.create_table("customer_address", customer_address_df, persist=False)
+
+    return (
+        item_df,
+        store_sales_df,
+        date_dim_df,
+        customer_df,
+        customer_address_df,
+    )
+
+
diff --git a/gpu_bdb/bdb_tools/q08_utils.py b/gpu_bdb/bdb_tools/q08_utils.py
new file mode 100644
index 00000000..2a220cb1
--- /dev/null
+++ b/gpu_bdb/bdb_tools/q08_utils.py
@@ -0,0 +1,158 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import cudf
+
+import cupy as cp
+import numpy as np
+
+from bdb_tools.readers import build_reader
+
+q08_STARTDATE = "2001-09-02"
+q08_ENDDATE = "2002-09-02"
+q08_SECONDS_BEFORE_PURCHASE = 259200
+NA_FLAG = 0
+
+def read_tables(config, c=None):
+    table_reader = build_reader(
+        data_format=config["file_format"],
+        basepath=config["data_dir"],
+        split_row_groups=config["split_row_groups"],
+    )
+
+    date_dim_cols = ["d_date_sk", "d_date"]
+    web_page_cols = ["wp_web_page_sk", "wp_type"]
+    web_sales_cols = ["ws_net_paid", "ws_order_number", "ws_sold_date_sk"]
+    wcs_cols = [
+        "wcs_user_sk",
+        "wcs_sales_sk",
+        "wcs_click_date_sk",
+        "wcs_click_time_sk",
+        "wcs_web_page_sk",
+    ]
+
+    date_dim_df = table_reader.read("date_dim", relevant_cols=date_dim_cols)
+    web_page_df = table_reader.read("web_page", relevant_cols=web_page_cols)
+    web_sales_df = table_reader.read("web_sales", relevant_cols=web_sales_cols)
+    wcs_df = table_reader.read("web_clickstreams", relevant_cols=wcs_cols)
+
+    if c:
+        c.create_table("web_clickstreams", wcs_df, persist=False)
+        c.create_table("web_sales", web_sales_df, persist=False)
+        c.create_table("web_page", web_page_df, persist=False)
+        c.create_table("date_dim", date_dim_df, persist=False)
+
+    return (date_dim_df, web_page_df, web_sales_df)
+
+def get_session_id_from_session_boundary(session_change_df, last_session_len):
+    """
+        This function returns session starts given a session change df
+    """
+
+    user_session_ids = session_change_df.tstamp_inSec
+
+    ### up shift the session length df
+    session_len = session_change_df["t_index"].diff().reset_index(drop=True)
+    session_len = session_len.shift(-1)
+
+    try:
+        session_len.iloc[-1] = last_session_len
+    except (AssertionError, IndexError):  # IndexError in numba >= 0.48
+        session_len = cudf.Series([])
+
+    session_id_final_series = (
+        cudf.Series(user_session_ids).repeat(session_len).reset_index(drop=True)
+    )
+    return session_id_final_series
+
+
+def get_session_id(df):
+    """
+        This function creates a session id column for each click
+        The session id grows in incremeant for each user's susbequent session
+        Session boundry is defined by the time_out
+    """
+
+    df["user_change_flag"] = df["wcs_user_sk"].diff(periods=1) != 0
+    df["user_change_flag"] = df["user_change_flag"].fillna(True)
+    df["session_change_flag"] = df["review_flag"] | df["user_change_flag"]
+
+    df = df.reset_index(drop=True)
+    df["t_index"] = cp.arange(start=0, stop=len(df), dtype=np.int32)
+
+    session_change_df = df[df["session_change_flag"]].reset_index(drop=True)
+    try:
+        last_session_len = len(df) - session_change_df["t_index"].iloc[-1]
+    except (AssertionError, IndexError):  # IndexError in numba >= 0.48
+        last_session_len = 0
+
+    session_ids = get_session_id_from_session_boundary(
+        session_change_df, last_session_len
+    )
+
+    assert len(session_ids) == len(df)
+    return session_ids
+
+
+def get_sessions(df):
+    df = df.sort_values(
+        by=["wcs_user_sk", "tstamp_inSec", "wcs_sales_sk", "wp_type_codes"]
+    ).reset_index(drop=True)
+    df["session_id"] = get_session_id(df)
+    return df
+
+
+def get_unique_sales_keys_from_sessions(sessionized, review_cat_code):
+    sessionized["relevant"] = (
+        (sessionized.tstamp_inSec - sessionized.session_id)
+        <= q08_SECONDS_BEFORE_PURCHASE
+    ) & (sessionized.wcs_sales_sk != NA_FLAG)
+    unique_sales_sk = (
+        sessionized.query(f"wcs_sales_sk != {NA_FLAG}")
+        .query("relevant == True")
+        .query(f"wp_type_codes != {review_cat_code}")
+        .wcs_sales_sk.unique()
+    )
+
+    return unique_sales_sk
+
+
+def prep_for_sessionization(df, review_cat_code):
+    df = df.fillna(NA_FLAG)
+    df = df.sort_values(
+        by=["wcs_user_sk", "tstamp_inSec", "wcs_sales_sk", "wp_type_codes"]
+    ).reset_index(drop=True)
+
+    review_df = df.loc[df["wp_type_codes"] == review_cat_code]
+    # per user, the index of the first review
+    # need this to decide if a review was "recent enough"
+    every_users_first_review = (
+        review_df[["wcs_user_sk", "tstamp_inSec"]]
+        .drop_duplicates()
+        .reset_index()
+        .groupby("wcs_user_sk")["index"]
+        .min()
+        .reset_index()
+    )
+    every_users_first_review.columns = ["wcs_user_sk", "first_review_index"]
+
+    # then reset the index to keep the old index before parallel join
+    df_merged = df.reset_index().merge(
+        every_users_first_review, how="left", on="wcs_user_sk"
+    )
+    df_filtered = df_merged.query("index >= first_review_index")
+    return df_filtered
+
diff --git a/gpu_bdb/bdb_tools/q09_utils.py b/gpu_bdb/bdb_tools/q09_utils.py
new file mode 100644
index 00000000..42fce78d
--- /dev/null
+++ b/gpu_bdb/bdb_tools/q09_utils.py
@@ -0,0 +1,91 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from bdb_tools.readers import build_reader
+
+
+# -------- Q9 -----------
+q09_year = 2001
+
+q09_part1_ca_country = "United States"
+q09_part1_ca_state_IN = "KY", "GA", "NM"
+q09_part1_net_profit_min = 0
+q09_part1_net_profit_max = 2000
+q09_part1_education_status = "4 yr Degree"
+q09_part1_marital_status = "M"
+q09_part1_sales_price_min = 100
+q09_part1_sales_price_max = 150
+
+q09_part2_ca_country = "United States"
+q09_part2_ca_state_IN = "MT", "OR", "IN"
+q09_part2_net_profit_min = 150
+q09_part2_net_profit_max = 3000
+q09_part2_education_status = "4 yr Degree"
+q09_part2_marital_status = "M"
+q09_part2_sales_price_min = 50
+q09_part2_sales_price_max = 200
+
+q09_part3_ca_country = "United States"
+q09_part3_ca_state_IN = "WI", "MO", "WV"
+q09_part3_net_profit_min = 50
+q09_part3_net_profit_max = 25000
+q09_part3_education_status = "4 yr Degree"
+q09_part3_marital_status = "M"
+q09_part3_sales_price_min = 150
+q09_part3_sales_price_max = 200
+
+def read_tables(config, c=None):
+    table_reader = build_reader(
+        data_format=config["file_format"],
+        basepath=config["data_dir"],
+        split_row_groups=config["split_row_groups"],
+    )
+
+    ss_columns = [
+        "ss_quantity",
+        "ss_sold_date_sk",
+        "ss_addr_sk",
+        "ss_store_sk",
+        "ss_cdemo_sk",
+        "ss_sales_price",
+        "ss_net_profit",
+    ]
+
+    store_sales = table_reader.read("store_sales", relevant_cols=ss_columns)
+
+    ca_columns = ["ca_address_sk", "ca_country", "ca_state"]
+    customer_address = table_reader.read("customer_address", relevant_cols=ca_columns)
+
+    cd_columns = ["cd_demo_sk", "cd_marital_status", "cd_education_status"]
+    customer_demographics = table_reader.read(
+        "customer_demographics", relevant_cols=cd_columns
+    )
+
+    dd_columns = ["d_year", "d_date_sk"]
+    date_dim = table_reader.read("date_dim", relevant_cols=dd_columns)
+
+    s_columns = ["s_store_sk"]
+    store = table_reader.read("store", relevant_cols=s_columns)
+
+    if c:
+        c.create_table("store_sales", store_sales, persist=False)
+        c.create_table("customer_address", customer_address, persist=False)
+        c.create_table("customer_demographics", customer_demographics, persist=False)
+        c.create_table("date_dim", date_dim, persist=False)
+        c.create_table("store", store, persist=False)
+
+    return (store_sales, customer_address, customer_demographics, date_dim, store)
+
diff --git a/gpu_bdb/bdb_tools/q10_utils.py b/gpu_bdb/bdb_tools/q10_utils.py
new file mode 100644
index 00000000..938aa45f
--- /dev/null
+++ b/gpu_bdb/bdb_tools/q10_utils.py
@@ -0,0 +1,39 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from bdb_tools.readers import build_reader
+
+eol_char = "è"
+
+def read_tables(config, c=None):
+
+    ### splitting by row groups for better parallelism
+    table_reader = build_reader(
+        data_format=config["file_format"],
+        basepath=config["data_dir"],
+        split_row_groups=True,
+    )
+    product_reviews_cols = ["pr_item_sk", "pr_review_content", "pr_review_sk"]
+
+    product_reviews_df = table_reader.read(
+        "product_reviews", relevant_cols=product_reviews_cols,
+    )
+
+    if c:
+        c.create_table("product_reviews", product_reviews_df, persist=False)
+
+    return product_reviews_df
+
diff --git a/gpu_bdb/bdb_tools/q11_utils.py b/gpu_bdb/bdb_tools/q11_utils.py
new file mode 100644
index 00000000..603d3d79
--- /dev/null
+++ b/gpu_bdb/bdb_tools/q11_utils.py
@@ -0,0 +1,49 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from bdb_tools.readers import build_reader
+
+def read_tables(config, c=None):
+    table_reader = build_reader(
+        data_format=config["file_format"],
+        basepath=config["data_dir"],
+        split_row_groups=config["split_row_groups"],
+    )
+
+    product_review_cols = [
+        "pr_review_rating",
+        "pr_item_sk",
+    ]
+    web_sales_cols = [
+        "ws_sold_date_sk",
+        "ws_net_paid",
+        "ws_item_sk",
+    ]
+    date_cols = ["d_date_sk", "d_date"]
+
+    pr_df = table_reader.read("product_reviews", relevant_cols=product_review_cols)
+    # we only read int columns here so it should scale up to sf-10k as just 26M rows
+    pr_df = pr_df.repartition(npartitions=1)
+
+    ws_df = table_reader.read("web_sales", relevant_cols=web_sales_cols)
+    date_df = table_reader.read("date_dim", relevant_cols=date_cols)
+
+    if c:
+        c.create_table("web_sales", ws_df, persist=False)
+        c.create_table("product_reviews", pr_df, persist=False)
+        c.create_table("date_dim", date_df, persist=False)
+
+    return (pr_df, ws_df, date_df)
diff --git a/gpu_bdb/bdb_tools/q12_utils.py b/gpu_bdb/bdb_tools/q12_utils.py
new file mode 100644
index 00000000..e1b72cd2
--- /dev/null
+++ b/gpu_bdb/bdb_tools/q12_utils.py
@@ -0,0 +1,44 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from bdb_tools.readers import build_reader
+
+
+q12_i_category_IN = "'Books', 'Electronics'"
+
+item_cols = ["i_item_sk", "i_category"]
+store_sales_cols = ["ss_item_sk", "ss_sold_date_sk", "ss_customer_sk"]
+wcs_cols = ["wcs_user_sk", "wcs_click_date_sk", "wcs_item_sk", "wcs_sales_sk"]
+
+
+def read_tables(config, c=None):
+    table_reader = build_reader(
+        data_format=config["file_format"],
+        basepath=config["data_dir"],
+        split_row_groups=config["split_row_groups"],
+    )
+
+    item_df = table_reader.read("item", relevant_cols=item_cols)
+    store_sales_df = table_reader.read("store_sales", relevant_cols=store_sales_cols)
+    wcs_df = table_reader.read("web_clickstreams", relevant_cols=wcs_cols)
+
+    if c:
+        c.create_table("web_clickstreams", wcs_df, persist=False)
+        c.create_table("store_sales", store_sales_df, persist=False)
+        c.create_table("item", item_df, persist=False)
+
+    return item_df, store_sales_df
+
diff --git a/gpu_bdb/bdb_tools/q13_utils.py b/gpu_bdb/bdb_tools/q13_utils.py
new file mode 100644
index 00000000..96910386
--- /dev/null
+++ b/gpu_bdb/bdb_tools/q13_utils.py
@@ -0,0 +1,45 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from bdb_tools.readers import build_reader
+
+def read_tables(config, c=None):
+    table_reader = build_reader(
+        data_format=config["file_format"],
+        basepath=config["data_dir"],
+        split_row_groups=config["split_row_groups"],
+    )
+
+    date_cols = ["d_date_sk", "d_year"]
+    date_dim_df = table_reader.read("date_dim", relevant_cols=date_cols)
+
+    customer_cols = ["c_customer_sk", "c_customer_id", "c_first_name", "c_last_name"]
+    customer_df = table_reader.read("customer", relevant_cols=customer_cols)
+
+    s_sales_cols = ["ss_sold_date_sk", "ss_customer_sk", "ss_net_paid"]
+    s_sales_df = table_reader.read("store_sales", relevant_cols=s_sales_cols)
+
+    w_sales_cols = ["ws_sold_date_sk", "ws_bill_customer_sk", "ws_net_paid"]
+    web_sales_df = table_reader.read("web_sales", relevant_cols=w_sales_cols)
+
+    if c:
+        c.create_table("date_dim", date_dim_df, persist=False)
+        c.create_table("customer", customer_df, persist=False)
+        c.create_table("store_sales", s_sales_df, persist=False)
+        c.create_table("web_sales", web_sales_df, persist=False)
+
+    return (date_dim_df, customer_df, s_sales_df, web_sales_df)
+
diff --git a/gpu_bdb/bdb_tools/q14_utils.py b/gpu_bdb/bdb_tools/q14_utils.py
new file mode 100644
index 00000000..b7c900b4
--- /dev/null
+++ b/gpu_bdb/bdb_tools/q14_utils.py
@@ -0,0 +1,47 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from bdb_tools.readers import build_reader
+
+def read_tables(config, c=None):
+    table_reader = build_reader(
+        data_format=config["file_format"],
+        basepath=config["data_dir"],
+        split_row_groups=config["split_row_groups"],
+    )
+
+    ws_columns = ["ws_ship_hdemo_sk", "ws_web_page_sk", "ws_sold_time_sk"]
+    web_sales = table_reader.read("web_sales", relevant_cols=ws_columns)
+
+    hd_columns = ["hd_demo_sk", "hd_dep_count"]
+    household_demographics = table_reader.read(
+        "household_demographics", relevant_cols=hd_columns
+    )
+
+    wp_columns = ["wp_web_page_sk", "wp_char_count"]
+    web_page = table_reader.read("web_page", relevant_cols=wp_columns)
+
+    td_columns = ["t_time_sk", "t_hour"]
+    time_dim = table_reader.read("time_dim", relevant_cols=td_columns)
+
+    if c:
+        c.create_table("household_demographics", household_demographics, persist=False)
+        c.create_table("web_page", web_page, persist=False)
+        c.create_table("web_sales", web_sales, persist=False)
+        c.create_table("time_dim", time_dim, persist=False)
+
+    return (web_sales, household_demographics, web_page, time_dim)
+
diff --git a/gpu_bdb/bdb_tools/q15_utils.py b/gpu_bdb/bdb_tools/q15_utils.py
new file mode 100644
index 00000000..08f1f6d7
--- /dev/null
+++ b/gpu_bdb/bdb_tools/q15_utils.py
@@ -0,0 +1,47 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from bdb_tools.readers import build_reader
+
+# --store_sales date range
+q15_startDate = "2001-09-02"
+# --+1year
+q15_endDate = "2002-09-02"
+q15_store_sk = 10
+
+store_sales_cols = ["ss_sold_date_sk", "ss_net_paid", "ss_store_sk", "ss_item_sk"]
+date_cols = ["d_date", "d_date_sk"]
+item_cols = ["i_item_sk", "i_category_id"]
+
+
+def read_tables(config, c=None):
+    table_reader = build_reader(
+        data_format=config["file_format"],
+        basepath=config["data_dir"],
+        split_row_groups=config["split_row_groups"],
+    )
+
+    store_sales_df = table_reader.read("store_sales", relevant_cols=store_sales_cols)
+    date_dim_df = table_reader.read("date_dim", relevant_cols=date_cols)
+    item_df = table_reader.read("item", relevant_cols=item_cols)
+
+    if c:
+        c.create_table("store_sales", store_sales_df, persist=False)
+        c.create_table("date_dim", date_dim_df, persist=False)
+        c.create_table("item", item_df, persist=False)
+
+    return store_sales_df, date_dim_df, item_df
+
diff --git a/gpu_bdb/bdb_tools/q16_utils.py b/gpu_bdb/bdb_tools/q16_utils.py
new file mode 100644
index 00000000..8631bb28
--- /dev/null
+++ b/gpu_bdb/bdb_tools/q16_utils.py
@@ -0,0 +1,52 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from bdb_tools.readers import build_reader
+
+websale_cols = [
+    "ws_order_number",
+    "ws_item_sk",
+    "ws_warehouse_sk",
+    "ws_sold_date_sk",
+    "ws_sales_price",
+]
+web_returns_cols = ["wr_order_number", "wr_item_sk", "wr_refunded_cash"]
+date_cols = ["d_date", "d_date_sk"]
+item_cols = ["i_item_sk", "i_item_id"]
+warehouse_cols = ["w_warehouse_sk", "w_state"]
+
+def read_tables(config, c=None):
+    table_reader = build_reader(
+        data_format=config["file_format"],
+        basepath=config["data_dir"],
+        split_row_groups=config["split_row_groups"],
+    )
+
+    web_sales_df = table_reader.read("web_sales", relevant_cols=websale_cols)
+    web_returns_df = table_reader.read("web_returns", relevant_cols=web_returns_cols)
+    date_dim_df = table_reader.read("date_dim", relevant_cols=date_cols)
+    item_df = table_reader.read("item", relevant_cols=item_cols)
+    warehouse_df = table_reader.read("warehouse", relevant_cols=warehouse_cols)
+
+    if c:
+        c.create_table("web_sales", web_sales_df, persist=False)
+        c.create_table("web_returns", web_returns_df, persist=False)
+        c.create_table("date_dim", date_dim_df, persist=False)
+        c.create_table("item", item_df, persist=False)
+        c.create_table("warehouse", warehouse_df, persist=False)
+
+    return web_sales_df, web_returns_df, date_dim_df, item_df, warehouse_df
+
diff --git a/gpu_bdb/bdb_tools/q17_utils.py b/gpu_bdb/bdb_tools/q17_utils.py
new file mode 100644
index 00000000..cbcb80f3
--- /dev/null
+++ b/gpu_bdb/bdb_tools/q17_utils.py
@@ -0,0 +1,74 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from bdb_tools.readers import build_reader
+
+q17_gmt_offset = -5.0
+# --store_sales date
+q17_year = 2001
+q17_month = 12
+
+store_sales_cols = [
+    "ss_ext_sales_price",
+    "ss_sold_date_sk",
+    "ss_store_sk",
+    "ss_customer_sk",
+    "ss_promo_sk",
+    "ss_item_sk",
+]
+item_cols = ["i_category", "i_item_sk"]
+customer_cols = ["c_customer_sk", "c_current_addr_sk"]
+store_cols = ["s_gmt_offset", "s_store_sk"]
+date_cols = ["d_date_sk", "d_year", "d_moy"]
+customer_address_cols = ["ca_address_sk", "ca_gmt_offset"]
+promotion_cols = ["p_channel_email", "p_channel_dmail", "p_channel_tv", "p_promo_sk"]
+
+def read_tables(config, c=None):
+    table_reader = build_reader(
+        data_format=config["file_format"],
+        basepath=config["data_dir"],
+        split_row_groups=config["split_row_groups"],
+    )
+
+    store_sales_df = table_reader.read("store_sales", relevant_cols=store_sales_cols)
+    item_df = table_reader.read("item", relevant_cols=item_cols)
+    customer_df = table_reader.read("customer", relevant_cols=customer_cols)
+    store_df = table_reader.read("store", relevant_cols=store_cols)
+    date_dim_df = table_reader.read("date_dim", relevant_cols=date_cols)
+    customer_address_df = table_reader.read(
+        "customer_address", relevant_cols=customer_address_cols
+    )
+    promotion_df = table_reader.read("promotion", relevant_cols=promotion_cols)
+
+    if c:
+        c.create_table("store_sales", store_sales_df, persist=False)
+        c.create_table("item", item_df, persist=False)
+        c.create_table("customer", customer_df, persist=False)
+        c.create_table("store", store_df, persist=False)
+        c.create_table("date_dim", date_dim_df, persist=False)
+        c.create_table("customer_address", customer_address_df, persist=False)
+        c.create_table("promotion", promotion_df, persist=False)
+
+    return (
+        store_sales_df,
+        item_df,
+        customer_df,
+        store_df,
+        date_dim_df,
+        customer_address_df,
+        promotion_df,
+    )
+
diff --git a/gpu_bdb/bdb_tools/q18_utils.py b/gpu_bdb/bdb_tools/q18_utils.py
new file mode 100644
index 00000000..c0a9d45a
--- /dev/null
+++ b/gpu_bdb/bdb_tools/q18_utils.py
@@ -0,0 +1,136 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import cupy as cp
+import cudf
+from cudf._lib.strings import find_multiple
+
+from bdb_tools.readers import build_reader
+
+q18_startDate = "2001-05-02"
+# --+90days
+q18_endDate = "2001-09-02"
+
+EOL_CHAR = "è"
+
+
+def read_tables(config, c=None):
+    table_reader = build_reader(
+        data_format=config["file_format"], basepath=config["data_dir"],
+    )
+
+    store_sales_cols = [
+        "ss_store_sk",
+        "ss_sold_date_sk",
+        "ss_net_paid",
+    ]
+    date_cols = ["d_date_sk", "d_date"]
+    store_cols = ["s_store_sk", "s_store_name"]
+
+    store_sales = table_reader.read("store_sales", relevant_cols=store_sales_cols)
+    date_dim = table_reader.read("date_dim", relevant_cols=date_cols)
+    store = table_reader.read("store", relevant_cols=store_cols)
+
+    ### splitting by row groups for better parallelism
+    pr_table_reader = build_reader(
+        data_format=config["file_format"],
+        basepath=config["data_dir"],
+        split_row_groups=True,
+    )
+
+    product_reviews_cols = ["pr_review_date", "pr_review_content", "pr_review_sk"]
+    product_reviews = pr_table_reader.read(
+        "product_reviews", relevant_cols=product_reviews_cols,
+    )
+
+    if c:
+        c.create_table("store", store, persist=False)
+        c.create_table("store_sales", store_sales, persist=False)
+        c.create_table("date_dim", date_dim, persist=False)
+        c.create_table("product_reviews", product_reviews, persist=False)
+
+    return store_sales, date_dim, store, product_reviews
+
+
+def create_found_reshaped_with_global_pos(found, targets):
+    """Given the dataframe created by mapping find_targets_in_reviews,
+    create a new dataframe in which the nonzero values in each row are exploded
+    to get their own row. Each row will contain the word, its mapping in the column order,
+    and the pr_review_sk for the review from which it came.
+    Having these as two separate functions makes managing dask metadata easier.
+    """
+
+    target_df = cudf.DataFrame({"word": targets}).reset_index(drop=False)
+    target_df.columns = ["word_mapping", "word"]
+
+    df_clean = found.drop(["pr_review_sk"], axis=1)
+
+    row_idxs, col_idxs = df_clean.values.nonzero()
+
+    found_reshaped = cudf.DataFrame(
+        {"word_mapping": col_idxs, "pr_review_sk": found["pr_review_sk"].iloc[row_idxs]}
+    )
+    found_reshaped = found_reshaped.merge(target_df, on="word_mapping", how="inner")[
+        ["word", "pr_review_sk"]
+    ]
+    return found_reshaped
+
+
+def find_targets_in_reviews_helper(ddf, targets, str_col_name="pr_review_content"):
+    """returns a N x K matrix, where N is the number of rows in ddf that
+    contain one of the target words and K is the number of words in targets.
+    
+    If a target word is found in a review, the value in that row, column
+    is non-zero.
+    
+    At the end, any row with non-zero values is returned.
+    
+    """
+
+    lowered = ddf[str_col_name].str.lower()
+
+    ## TODO: Do the replace/any in cupy land before going to cuDF
+    resdf = cudf.DataFrame(
+        cp.asarray(
+            find_multiple.find_multiple(lowered._column, targets._column)
+        ).reshape(-1, len(targets))
+    )
+
+    resdf = resdf.replace([0, -1], [1, 0])
+    found_mask = resdf.any(axis=1)
+    resdf["pr_review_sk"] = ddf["pr_review_sk"]
+    found = resdf.loc[found_mask]
+    return create_found_reshaped_with_global_pos(found, targets)
+
+
+def find_relevant_reviews(df, targets, str_col_name="pr_review_content"):
+    """
+     This function finds the  reviews containg target stores and returns the 
+     relevant reviews
+    """
+
+    targets = cudf.Series(targets)
+    targets_lower = targets.str.lower()
+    reviews_found = find_targets_in_reviews_helper(df, targets_lower)[
+        ["word", "pr_review_sk"]
+    ]
+
+    combined = reviews_found.merge(
+        df[["pr_review_date", "pr_review_sk"]], how="inner", on=["pr_review_sk"]
+    )
+
+    return combined
+
diff --git a/gpu_bdb/bdb_tools/q19_utils.py b/gpu_bdb/bdb_tools/q19_utils.py
new file mode 100644
index 00000000..105a914e
--- /dev/null
+++ b/gpu_bdb/bdb_tools/q19_utils.py
@@ -0,0 +1,57 @@
+
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from bdb_tools.readers import build_reader
+
+q19_returns_dates_IN = ["2004-03-08", "2004-08-02", "2004-11-15", "2004-12-20"]
+
+eol_char = "è"
+
+
+def read_tables(config, c=None):
+    table_reader = build_reader(
+        data_format=config["file_format"], basepath=config["data_dir"],
+    )
+    date_dim_cols = ["d_week_seq", "d_date_sk", "d_date"]
+    date_dim_df = table_reader.read("date_dim", relevant_cols=date_dim_cols)
+    store_returns_cols = ["sr_returned_date_sk", "sr_item_sk", "sr_return_quantity"]
+    store_returns_df = table_reader.read(
+        "store_returns", relevant_cols=store_returns_cols
+    )
+    web_returns_cols = ["wr_returned_date_sk", "wr_item_sk", "wr_return_quantity"]
+    web_returns_df = table_reader.read("web_returns", relevant_cols=web_returns_cols)
+
+    ### splitting by row groups for better parallelism
+    pr_table_reader = build_reader(
+        data_format=config["file_format"],
+        basepath=config["data_dir"],
+        split_row_groups=True,
+    )
+
+    product_reviews_cols = ["pr_item_sk", "pr_review_content", "pr_review_sk"]
+    product_reviews_df = pr_table_reader.read(
+        "product_reviews", relevant_cols=product_reviews_cols
+    )
+
+    if c:
+        c.create_table('web_returns', web_returns_df, persist=False)
+        c.create_table('date_dim', date_dim_df, persist=False)
+        c.create_table('product_reviews', product_reviews_df, persist=False)
+        c.create_table('store_returns', store_returns_df, persist=False)
+
+    return date_dim_df, store_returns_df, web_returns_df, product_reviews_df
+
diff --git a/gpu_bdb/bdb_tools/q20_utils.py b/gpu_bdb/bdb_tools/q20_utils.py
new file mode 100644
index 00000000..1373d4be
--- /dev/null
+++ b/gpu_bdb/bdb_tools/q20_utils.py
@@ -0,0 +1,83 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import dask_cudf
+
+from dask import delayed
+
+from bdb_tools.utils import train_clustering_model
+
+from bdb_tools.readers import build_reader
+
+# q20 parameters
+N_CLUSTERS = 8
+CLUSTER_ITERATIONS = 20
+N_ITER = 5
+
+def read_tables(config, c=None):
+    table_reader = build_reader(
+        data_format=config["file_format"],
+        basepath=config["data_dir"],
+        split_row_groups=config["split_row_groups"],
+    )
+
+    store_sales_cols = [
+        "ss_customer_sk",
+        "ss_ticket_number",
+        "ss_item_sk",
+        "ss_net_paid",
+    ]
+    store_returns_cols = [
+        "sr_item_sk",
+        "sr_customer_sk",
+        "sr_ticket_number",
+        "sr_return_amt",
+    ]
+
+    store_sales_df = table_reader.read("store_sales", relevant_cols=store_sales_cols)
+    store_returns_df = table_reader.read(
+        "store_returns", relevant_cols=store_returns_cols
+    )
+
+    if c:
+        c.create_table("store_sales", store_sales_df, persist=False)
+        c.create_table("store_returns", store_returns_df, persist=False)
+
+    return store_sales_df, store_returns_df
+
+
+def get_clusters(client, ml_input_df, feature_cols):
+    """
+    Takes the dask client, kmeans_input_df and feature columns.
+    Returns a dictionary matching the output required for q20
+    """
+    ml_tasks = [
+        delayed(train_clustering_model)(df, N_CLUSTERS, CLUSTER_ITERATIONS, N_ITER)
+        for df in ml_input_df[feature_cols].to_delayed()
+    ]
+
+    results_dict = client.compute(*ml_tasks, sync=True)
+
+    labels = results_dict["cid_labels"]
+
+    labels_final = dask_cudf.from_cudf(labels, npartitions=ml_input_df.npartitions)
+    ml_input_df["label"] = labels_final.reset_index()[0]
+
+    output = ml_input_df[["user_sk", "label"]]
+
+    results_dict["cid_labels"] = output
+    return results_dict
+
diff --git a/gpu_bdb/bdb_tools/q21_utils.py b/gpu_bdb/bdb_tools/q21_utils.py
new file mode 100644
index 00000000..453aea48
--- /dev/null
+++ b/gpu_bdb/bdb_tools/q21_utils.py
@@ -0,0 +1,69 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from bdb_tools.readers import build_reader
+
+store_sales_cols = [
+    "ss_item_sk",
+    "ss_store_sk",
+    "ss_customer_sk",
+    "ss_ticket_number",
+    "ss_quantity",
+    "ss_sold_date_sk",
+]
+date_cols = ["d_date_sk", "d_year", "d_moy"]
+websale_cols = ["ws_item_sk", "ws_bill_customer_sk", "ws_quantity", "ws_sold_date_sk"]
+sr_cols = [
+    "sr_item_sk",
+    "sr_customer_sk",
+    "sr_ticket_number",
+    "sr_return_quantity",
+    "sr_returned_date_sk",
+]
+store_cols = ["s_store_name", "s_store_id", "s_store_sk"]
+item_cols = ["i_item_id", "i_item_desc", "i_item_sk"]
+
+def read_tables(config, c=None):
+    table_reader = build_reader(
+        data_format=config["file_format"],
+        basepath=config["data_dir"],
+        split_row_groups=config["split_row_groups"],
+    )
+
+    store_sales_df = table_reader.read("store_sales", relevant_cols=store_sales_cols)
+    date_dim_df = table_reader.read("date_dim", relevant_cols=date_cols)
+    web_sales_df = table_reader.read("web_sales", relevant_cols=websale_cols)
+    store_returns_df = table_reader.read("store_returns", relevant_cols=sr_cols)
+    store_table_df = table_reader.read("store", relevant_cols=store_cols)
+    item_table_df = table_reader.read("item", relevant_cols=item_cols)
+
+    if c:
+        c.create_table("store_sales", store_sales_df, persist=False)
+        c.create_table("date_dim", date_dim_df, persist=False)
+        c.create_table("item", item_table_df, persist=False)
+        c.create_table("web_sales", web_sales_df, persist=False)
+        c.create_table("store_returns", store_returns_df, persist=False)
+        c.create_table("store", store_table_df, persist=False)
+
+    return (
+        store_sales_df,
+        date_dim_df,
+        web_sales_df,
+        store_returns_df,
+        store_table_df,
+        item_table_df,
+    )
+
diff --git a/gpu_bdb/bdb_tools/q22_utils.py b/gpu_bdb/bdb_tools/q22_utils.py
new file mode 100644
index 00000000..db44f325
--- /dev/null
+++ b/gpu_bdb/bdb_tools/q22_utils.py
@@ -0,0 +1,56 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from bdb_tools.readers import build_reader
+from bdb_tools.utils import convert_datestring_to_days
+
+q22_date = "2001-05-08"
+q22_i_current_price_min = 0.98
+q22_i_current_price_max = 1.5
+
+
+def read_tables(config, c=None):
+    table_reader = build_reader(
+        data_format=config["file_format"],
+        basepath=config["data_dir"],
+        split_row_groups=config["split_row_groups"],
+    )
+    inv_columns = [
+        "inv_item_sk",
+        "inv_warehouse_sk",
+        "inv_date_sk",
+        "inv_quantity_on_hand",
+    ]
+    inventory = table_reader.read("inventory", relevant_cols=inv_columns)
+
+    item_columns = ["i_item_id", "i_current_price", "i_item_sk"]
+    item = table_reader.read("item", relevant_cols=item_columns)
+
+    warehouse_columns = ["w_warehouse_sk", "w_warehouse_name"]
+    warehouse = table_reader.read("warehouse", relevant_cols=warehouse_columns)
+
+    dd_columns = ["d_date_sk", "d_date"]
+    date_dim = table_reader.read("date_dim", relevant_cols=dd_columns)
+    date_dim = date_dim.map_partitions(convert_datestring_to_days)
+
+    if c:
+        c.create_table('inventory', inventory, persist=False)
+        c.create_table('item', item, persist=False)
+        c.create_table('warehouse', warehouse, persist=False)
+        c.create_table('date_dim', date_dim, persist=False)
+
+    return inventory, item, warehouse, date_dim
+
diff --git a/gpu_bdb/bdb_tools/q23_utils.py b/gpu_bdb/bdb_tools/q23_utils.py
new file mode 100644
index 00000000..43ef19a2
--- /dev/null
+++ b/gpu_bdb/bdb_tools/q23_utils.py
@@ -0,0 +1,45 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from bdb_tools.readers import build_reader
+
+q23_year = 2001
+q23_month = 1
+q23_coefficient = 1.3
+
+
+def read_tables(config, c=None):
+    table_reader = build_reader(
+        data_format=config["file_format"], basepath=config["data_dir"],
+    )
+
+    date_cols = ["d_date_sk", "d_year", "d_moy"]
+    date_df = table_reader.read("date_dim", relevant_cols=date_cols)
+
+    inv_cols = [
+        "inv_warehouse_sk",
+        "inv_item_sk",
+        "inv_date_sk",
+        "inv_quantity_on_hand",
+    ]
+    inv_df = table_reader.read("inventory", relevant_cols=inv_cols)
+
+    if c:
+        c.create_table('inventory', inv_df, persist=False)
+        c.create_table('date_dim', date_df, persist=False)
+
+    return date_df, inv_df
+
diff --git a/gpu_bdb/bdb_tools/q24_utils.py b/gpu_bdb/bdb_tools/q24_utils.py
new file mode 100644
index 00000000..a413eb97
--- /dev/null
+++ b/gpu_bdb/bdb_tools/q24_utils.py
@@ -0,0 +1,49 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from bdb_tools.readers import build_reader
+
+ws_cols = ["ws_item_sk", "ws_sold_date_sk", "ws_quantity"]
+item_cols = ["i_item_sk", "i_current_price"]
+imp_cols = [
+    "imp_item_sk",
+    "imp_competitor_price",
+    "imp_start_date",
+    "imp_end_date",
+    "imp_sk",
+]
+ss_cols = ["ss_item_sk", "ss_sold_date_sk", "ss_quantity"]
+
+def read_tables(config, c=None):
+    table_reader = build_reader(
+        data_format=config["file_format"],
+        basepath=config["data_dir"],
+        split_row_groups=config["split_row_groups"],
+    )
+    ### read tables
+    ws_df = table_reader.read("web_sales", relevant_cols=ws_cols)
+    item_df = table_reader.read("item", relevant_cols=item_cols)
+    imp_df = table_reader.read("item_marketprices", relevant_cols=imp_cols)
+    ss_df = table_reader.read("store_sales", relevant_cols=ss_cols)
+
+    if c:
+        c.create_table("web_sales", ws_df, persist=False)
+        c.create_table("item", item_df, persist=False)
+        c.create_table("item_marketprices", imp_df, persist=False)
+        c.create_table("store_sales", ss_df, persist=False)
+
+    return ws_df, item_df, imp_df, ss_df
+
diff --git a/gpu_bdb/bdb_tools/q25_utils.py b/gpu_bdb/bdb_tools/q25_utils.py
new file mode 100644
index 00000000..523598f5
--- /dev/null
+++ b/gpu_bdb/bdb_tools/q25_utils.py
@@ -0,0 +1,80 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import dask_cudf
+
+from bdb_tools.utils import train_clustering_model
+
+from bdb_tools.readers import build_reader
+
+from dask import delayed
+
+q25_date = "2002-01-02"
+
+N_CLUSTERS = 8
+CLUSTER_ITERATIONS = 20
+N_ITER = 5
+
+def read_tables(config, c=None):
+    table_reader = build_reader(
+        data_format=config["file_format"],
+        basepath=config["data_dir"],
+        split_row_groups=config["split_row_groups"],
+    )
+
+    ss_cols = ["ss_customer_sk", "ss_sold_date_sk", "ss_ticket_number", "ss_net_paid"]
+    ws_cols = [
+        "ws_bill_customer_sk",
+        "ws_sold_date_sk",
+        "ws_order_number",
+        "ws_net_paid",
+    ]
+    datedim_cols = ["d_date_sk", "d_date"]
+
+    ss_ddf = table_reader.read("store_sales", relevant_cols=ss_cols, index=False)
+    ws_ddf = table_reader.read("web_sales", relevant_cols=ws_cols, index=False)
+    datedim_ddf = table_reader.read("date_dim", relevant_cols=datedim_cols, index=False)
+
+    if c:
+        c.create_table("web_sales", ws_ddf, persist=False)
+        c.create_table("store_sales", ss_ddf, persist=False)
+        c.create_table("date_dim", datedim_ddf, persist=False)
+
+    return ss_ddf, ws_ddf, datedim_ddf
+
+
+def get_clusters(client, ml_input_df):
+
+    ml_tasks = [
+        delayed(train_clustering_model)(df, N_CLUSTERS, CLUSTER_ITERATIONS, N_ITER)
+        for df in ml_input_df.to_delayed()
+    ]
+    results_dict = client.compute(*ml_tasks, sync=True)
+
+    output = ml_input_df.index.to_frame().reset_index(drop=True)
+
+    labels_final = dask_cudf.from_cudf(
+        results_dict["cid_labels"], npartitions=output.npartitions
+    )
+    output["label"] = labels_final.reset_index()[0]
+
+    # Sort based on CDH6.1 q25-result formatting
+    output = output.sort_values(["cid"])
+
+    results_dict["cid_labels"] = output
+    return results_dict
+
+
diff --git a/gpu_bdb/bdb_tools/q26_utils.py b/gpu_bdb/bdb_tools/q26_utils.py
new file mode 100644
index 00000000..5f299565
--- /dev/null
+++ b/gpu_bdb/bdb_tools/q26_utils.py
@@ -0,0 +1,42 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from bdb_tools.readers import build_reader
+
+Q26_CATEGORY = "Books"
+Q26_ITEM_COUNT = 5
+N_CLUSTERS = 8
+CLUSTER_ITERATIONS = 20
+N_ITER = 5
+
+def read_tables(config, c=None):
+    table_reader = build_reader(
+        data_format=config["file_format"],
+        basepath=config["data_dir"],
+        split_row_groups=config["split_row_groups"],
+    )
+
+    ss_cols = ["ss_customer_sk", "ss_item_sk"]
+    items_cols = ["i_item_sk", "i_category", "i_class_id"]
+
+    ss_ddf = table_reader.read("store_sales", relevant_cols=ss_cols, index=False)
+    items_ddf = table_reader.read("item", relevant_cols=items_cols, index=False)
+
+    if c:
+        c.create_table("store_sales", ss_ddf, persist=False)
+        c.create_table("item", items_ddf, persist=False)
+
+    return ss_ddf, items_ddf
+
diff --git a/gpu_bdb/bdb_tools/q27_utils.py b/gpu_bdb/bdb_tools/q27_utils.py
new file mode 100644
index 00000000..167cd2a0
--- /dev/null
+++ b/gpu_bdb/bdb_tools/q27_utils.py
@@ -0,0 +1,54 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import spacy
+
+from bdb_tools.readers import build_reader
+
+q27_pr_item_sk = 10002
+EOL_CHAR = "."
+
+def read_tables(config, c=None):
+    ### splitting by row groups for better parallelism
+    table_reader = build_reader(
+        data_format=config["file_format"],
+        basepath=config["data_dir"],
+        split_row_groups=True,
+    )
+    product_reviews_cols = ["pr_item_sk", "pr_review_content", "pr_review_sk"]
+    product_reviews_df = table_reader.read(
+        "product_reviews", relevant_cols=product_reviews_cols
+    )
+
+    if c:
+        c.create_table("product_reviews", product_reviews_df, persist=False)
+
+    return product_reviews_df
+
+
+def ner_parser(df, col_string, batch_size=256):
+    spacy.require_gpu()
+    nlp = spacy.load("en_core_web_sm")
+    docs = nlp.pipe(df[col_string], disable=["tagger", "parser"], batch_size=batch_size)
+    out = []
+    for doc in docs:
+        l = [ent.text for ent in doc.ents if ent.label_ == "ORG"]
+        val = ", "
+        l = val.join(l)
+        out.append(l)
+    df["company_name_list"] = out
+    return df
+
diff --git a/gpu_bdb/bdb_tools/q28_utils.py b/gpu_bdb/bdb_tools/q28_utils.py
new file mode 100644
index 00000000..c594dae9
--- /dev/null
+++ b/gpu_bdb/bdb_tools/q28_utils.py
@@ -0,0 +1,303 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import numpy as np
+import cupy as cp
+import cupy
+
+import cudf
+
+import dask
+
+from cuml.feature_extraction.text import HashingVectorizer
+from cuml.dask.naive_bayes import MultinomialNB as DistMNB
+from cuml.dask.common import to_dask_cudf
+from cuml.dask.common.input_utils import DistributedDataHandler
+
+from distributed import wait
+
+from uuid import uuid1
+
+from bdb_tools.readers import build_reader
+
+N_FEATURES = 2 ** 23  # Spark is doing 2^20
+ngram_range = (1, 2)
+preprocessor = lambda s:s.str.lower()
+norm = None
+alternate_sign = False
+
+def read_tables(config, c=None):
+    ### splitting by row groups for better parallelism
+    table_reader = build_reader(
+        data_format=config["file_format"],
+        basepath=config["data_dir"],
+        split_row_groups=True,
+    )
+
+    columns = [
+        "pr_review_content",
+        "pr_review_rating",
+        "pr_review_sk",
+    ]
+    pr_df = table_reader.read("product_reviews", relevant_cols=columns)
+
+    if c:
+        c.create_table("product_reviews", pr_df, persist=False)
+
+    return pr_df
+
+
+def gpu_hashing_vectorizer(x):
+    vec = HashingVectorizer(n_features=N_FEATURES,
+                            alternate_sign=alternate_sign,
+                            ngram_range=ngram_range,
+                            norm=norm,
+                            preprocessor=preprocessor
+     )
+    return vec.fit_transform(x)
+
+
+def map_labels(ser):
+    output_ser = cudf.Series(cudf.core.column.full(size=len(ser), fill_value=2, dtype=np.int32))
+    zero_flag = (ser==1) | (ser==2)
+    output_ser.loc[zero_flag]=0
+
+    three_flag = (ser==3)
+    output_ser.loc[three_flag]=1
+
+    return output_ser
+
+def build_features(t):
+    X = t["pr_review_content"]
+    X = X.map_partitions(
+        gpu_hashing_vectorizer,
+        meta=dask.array.from_array(
+            cupy.sparse.csr_matrix(cupy.zeros(1, dtype=cp.float32))
+        ),
+    )
+
+    X = X.astype(np.float32).persist()
+    X.compute_chunk_sizes()
+
+    return X
+
+
+def build_labels(reviews_df):
+    y = reviews_df["pr_review_rating"].map_partitions(map_labels)
+    y = y.map_partitions(lambda x: cupy.asarray(x, cupy.int32)).persist()
+    y.compute_chunk_sizes()
+
+    return y
+
+def categoricalize(num_sr):
+    return num_sr.astype("str").str.replace(["0", "1", "2"], ["NEG", "NEUT", "POS"])
+
+
+def sum_tp_fp(y_y_pred, nclasses):
+
+    y, y_pred = y_y_pred
+    res = cp.zeros((nclasses, 2), order="F")
+
+    for i in range(nclasses):
+        pos_pred_ix = cp.where(y_pred == i)[0]
+
+        # short circuit
+        if len(pos_pred_ix) == 0:
+            res[i] = 0
+            break
+
+        tp_sum = (y_pred[pos_pred_ix] == y[pos_pred_ix]).sum()
+        fp_sum = (y_pred[pos_pred_ix] != y[pos_pred_ix]).sum()
+        res[i][0] = tp_sum
+        res[i][1] = fp_sum
+    return res
+
+def precision_score(client, y, y_pred, average="binary"):
+
+    nclasses = len(cp.unique(y.map_blocks(lambda x: cp.unique(x)).compute()))
+
+    if average == "binary" and nclasses > 2:
+        raise ValueError
+
+    if nclasses < 2:
+        raise ValueError("Single class precision is not yet supported")
+
+    ddh = DistributedDataHandler.create([y, y_pred])
+
+    precision_scores = client.compute(
+        [
+            client.submit(sum_tp_fp, part, nclasses, workers=[worker])
+            for worker, part in ddh.gpu_futures
+        ],
+        sync=True,
+    )
+
+    res = cp.zeros((nclasses, 2), order="F")
+
+    for i in precision_scores:
+        res += i
+
+    if average == "binary" or average == "macro":
+
+        prec = cp.zeros(nclasses)
+        for i in range(nclasses):
+            tp_sum, fp_sum = res[i]
+            prec[i] = (tp_sum / (tp_sum + fp_sum)).item()
+
+        if average == "binary":
+            return prec[nclasses - 1].item()
+        else:
+            return prec.mean().item()
+    else:
+        global_tp = cp.sum(res[:, 0])
+        global_fp = cp.sum(res[:, 1])
+
+        return global_tp / (global_tp + global_fp).item()
+
+
+def local_cm(y_y_pred, unique_labels, sample_weight):
+
+    y_true, y_pred = y_y_pred
+    labels = unique_labels
+
+    n_labels = labels.size
+
+    # Assume labels are monotonically increasing for now.
+
+    # intersect y_pred, y_true with labels, eliminate items not in labels
+    ind = cp.logical_and(y_pred < n_labels, y_true < n_labels)
+    y_pred = y_pred[ind]
+    y_true = y_true[ind]
+
+    if sample_weight is None:
+        sample_weight = cp.ones(y_true.shape[0], dtype=np.int64)
+    else:
+        sample_weight = cp.asarray(sample_weight)
+
+    sample_weight = sample_weight[ind]
+
+    cm = cp.sparse.coo_matrix(
+        (sample_weight, (y_true, y_pred)), shape=(n_labels, n_labels), dtype=cp.float32,
+    ).toarray()
+
+    return cp.nan_to_num(cm)
+
+
+def confusion_matrix(client, y_true, y_pred, normalize=None, sample_weight=None):
+
+    unique_classes = cp.unique(y_true.map_blocks(lambda x: cp.unique(x)).compute())
+    nclasses = len(unique_classes)
+
+    ddh = DistributedDataHandler.create([y_true, y_pred])
+
+    cms = client.compute(
+        [
+            client.submit(
+                local_cm, part, unique_classes, sample_weight, workers=[worker]
+            )
+            for worker, part in ddh.gpu_futures
+        ],
+        sync=True,
+    )
+
+    cm = cp.zeros((nclasses, nclasses))
+    for i in cms:
+        cm += i
+
+    with np.errstate(all="ignore"):
+        if normalize == "true":
+            cm = cm / cm.sum(axis=1, keepdims=True)
+        elif normalize == "pred":
+            cm = cm / cm.sum(axis=0, keepdims=True)
+        elif normalize == "all":
+            cm = cm / cm.sum()
+        cm = cp.nan_to_num(cm)
+
+    return cm
+
+
+def accuracy_score(client, y, y_hat):
+
+    ddh = DistributedDataHandler.create([y_hat, y])
+
+    def _count_accurate_predictions(y_hat_y):
+        y_hat, y = y_hat_y
+        y_hat = cp.asarray(y_hat, dtype=y_hat.dtype)
+        y = cp.asarray(y, dtype=y.dtype)
+        return y.shape[0] - cp.count_nonzero(y - y_hat)
+
+    key = uuid1()
+
+    futures = client.compute(
+        [
+            client.submit(
+                _count_accurate_predictions,
+                worker_future[1],
+                workers=[worker_future[0]],
+                key="%s-%s" % (key, idx),
+            )
+            for idx, worker_future in enumerate(ddh.gpu_futures)
+        ],
+        sync=True,
+    )
+
+    return sum(futures) / y.shape[0]
+
+
+def post_etl_processing(client, train_data, test_data):
+
+    # Feature engineering
+    X_train = build_features(train_data)
+    X_test = build_features(test_data)
+
+    y_train = build_labels(train_data)
+    y_test = build_labels(test_data)
+
+    # Perform ML
+    model = DistMNB(client=client, alpha=0.001)
+    model.fit(X_train, y_train)
+
+    ### this regression seems to be coming from here
+    y_hat = model.predict(X_test).persist()
+
+    # Compute distributed performance metrics
+    acc = accuracy_score(client, y_test, y_hat)
+
+    print("Accuracy: " + str(acc))
+    prec = precision_score(client, y_test, y_hat, average="macro")
+
+    print("Precision: " + str(prec))
+    cmat = confusion_matrix(client, y_test, y_hat)
+
+    print("Confusion Matrix: " + str(cmat))
+
+    # Place results back in original Dataframe
+    ddh = DistributedDataHandler.create(y_hat)
+    test_preds = to_dask_cudf(
+        [client.submit(cudf.Series, part) for w, part in ddh.gpu_futures]
+    )
+
+    test_preds = test_preds.map_partitions(categoricalize)
+
+    test_data["prediction"] = test_preds
+
+    final_data = test_data[["pr_review_sk", "pr_review_rating", "prediction"]].persist()
+
+    final_data = final_data.sort_values("pr_review_sk").reset_index(drop=True)
+    wait(final_data)
+    return final_data, acc, prec, cmat
+
+
diff --git a/gpu_bdb/bdb_tools/q29_utils.py b/gpu_bdb/bdb_tools/q29_utils.py
new file mode 100644
index 00000000..b0e0cd8f
--- /dev/null
+++ b/gpu_bdb/bdb_tools/q29_utils.py
@@ -0,0 +1,38 @@
+
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from bdb_tools.readers import build_reader
+
+q29_limit = 100
+
+
+def read_tables(config, c=None):
+    table_reader = build_reader(
+        data_format=config["file_format"], basepath=config["data_dir"],
+    )
+    item_cols = ["i_item_sk", "i_category_id"]
+    item_df = table_reader.read("item", relevant_cols=item_cols)
+
+    ws_cols = ["ws_order_number", "ws_item_sk"]
+    ws_df = table_reader.read("web_sales", relevant_cols=ws_cols)
+
+    if c:
+        c.create_table('item', item_df, persist=False)
+        c.create_table('web_sales', ws_df, persist=False)
+
+    return item_df, ws_df
+
diff --git a/gpu_bdb/bdb_tools/q30_utils.py b/gpu_bdb/bdb_tools/q30_utils.py
new file mode 100644
index 00000000..2d8e3309
--- /dev/null
+++ b/gpu_bdb/bdb_tools/q30_utils.py
@@ -0,0 +1,44 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from bdb_tools.readers import build_reader
+
+# session timeout in secs
+q30_session_timeout_inSec = 3600
+# query output limit
+q30_limit = 40
+
+
+
+def read_tables(config, c=None):
+    table_reader = build_reader(
+        data_format=config["file_format"],
+        basepath=config["data_dir"],
+        split_row_groups=config["split_row_groups"],
+    )
+
+    item_cols = ["i_category_id", "i_item_sk"]
+    item_df = table_reader.read("item", relevant_cols=item_cols)
+
+    wcs_cols = ["wcs_user_sk", "wcs_item_sk", "wcs_click_date_sk", "wcs_click_time_sk"]
+    wcs_df = table_reader.read("web_clickstreams", relevant_cols=wcs_cols)
+
+    if c:
+        c.create_table('web_clickstreams', wcs_df, persist=False)
+        c.create_table('item', item_df, persist=False)
+
+    return item_df
+
diff --git a/gpu_bdb/bdb_tools/rmm_monitor.py b/gpu_bdb/bdb_tools/rmm_monitor.py
new file mode 100644
index 00000000..62e74473
--- /dev/null
+++ b/gpu_bdb/bdb_tools/rmm_monitor.py
@@ -0,0 +1,78 @@
+import os
+import csv
+import rmm
+import tempfile
+import asyncio
+
+from dask.distributed import Client, Worker, WorkerPlugin
+
+from typing import List
+
+
+class DependencyInstaller(WorkerPlugin):
+    def __init__(self, dependencies: List[str]):
+        self._depencendies = " ".join(f"'{dep}'" for dep in dependencies)
+
+    def setup(self, _worker: Worker):
+        os.system(f"conda install -c rapidsai-nightly -c rapidsai -c nvidia -c conda-forge -c defaults {self._depencendies}")
+
+# Wrap this in a method used to initialize the module + pass in teh client instance
+dependency_installer = DependencyInstaller(["pynvml"])
+
+#client = Client()
+#client.register_worker_plugin(dependency_installer)
+
+class RMMResourceMonitor:
+    """
+    Distributed montor for RMM resource allocations
+    """
+
+    def __init__( self, client, outputdir='/tmp'  ):
+        self._client = client if isinstance(client, Client) else None
+        self._outputdir=outputdir
+
+    def __dispatch__( self, method, **kwargs ):
+        if self._client:
+            self._client.run( method, **kwargs )
+        else:
+            return method(*args, **kwargs )
+
+    def get_remote_output_dir( self ):
+        return self._outputdir
+
+    def begin_logging( self, prefix="rmmlog"):
+        """
+        enable rmm logging into dask temporary directory
+        """
+
+        def _rmmlogstart( basedir, prefix ):
+            import os
+            fname=f"{prefix}_{os.getpid()}.log"
+            rmm.enable_logging( log_file_name=os.path.join( basedir,  fname))
+            return fname
+
+        self.__dispatch__( _rmmlogstart, prefix=prefix, basedir=self.get_remote_output_dir())
+
+    def stop_logging( self ):
+        """
+        disable rmm logging and mark files for retrieval
+        """
+        def _rmmlogstop():
+            rmm.disable_logging()
+
+        self.__dispatch__( _rmmlogstop )
+
+    def collect( self ):
+        """
+        distributed command retrieves an logfile
+        @return reference to dataframe into which rresults are being loaded
+        """
+        def _collect():
+            for fname in (rmm.get_log_filenames()):
+                print( fname )
+                #load into memory and return dask_dataframe reference?
+
+        retval = DaskDataframe()
+        for lf_future in self.__dispatch__( _collect, localfile ):
+            pass
+
diff --git a/gpu_bdb/bdb_tools/utils.py b/gpu_bdb/bdb_tools/utils.py
index 3e53cbbe..a8183ef8 100755
--- a/gpu_bdb/bdb_tools/utils.py
+++ b/gpu_bdb/bdb_tools/utils.py
@@ -249,14 +249,14 @@ def remove_benchmark_files():
 # Query Runner Utilities
 #################################
 def run_query(
-    config, client, query_func, write_func=write_result, blazing_context=None
+    config, client, query_func, write_func=write_result, sql_context=None
 ):
-    if blazing_context:
-        run_bsql_query(
+    if sql_context:
+        run_sql_query(
             config=config,
             client=client,
             query_func=query_func,
-            blazing_context=blazing_context,
+            sql_context=sql_context,
             write_func=write_func,
         )
     else:
@@ -303,8 +303,8 @@ def run_dask_cudf_query(config, client, query_func, write_func=write_result):
     push_payload_to_googlesheet(config)
 
 
-def run_bsql_query(
-    config, client, query_func, blazing_context, write_func=write_result
+def run_sql_query(
+    config, client, query_func, sql_context, write_func=write_result
 ):
     """
     Common utility to perform all steps needed to execute a dask-cudf version
@@ -320,7 +320,7 @@ def run_bsql_query(
             dask_profile=config.get("dask_profile"),
             data_dir=data_dir,
             client=client,
-            bc=blazing_context,
+            c=sql_context,
             config=config,
         )
 
@@ -382,7 +382,7 @@ def gpubdb_argparser():
         "sheet": os.environ.get("GOOGLE_SPREADSHEET_NAME"),
         "tab": os.environ.get("GOOGLE_SPREADSHEET_TAB"),
         "scheduler_file_path": os.environ.get("SCHEDULER_FILE"),
-        "benchmark_runner_include_bsql": os.environ.get("RUNNER_INCLUDE_BSQL"),
+        "benchmark_runner_include_sql": os.environ.get("RUNNER_INCLUDE_SQL"),
     }
 
     for key in args.keys():
@@ -789,7 +789,7 @@ def build_benchmark_googlesheet_payload(config):
             "Protocol": "UCX" if data.get("nvlink") == True else "TCP",
             "NVLINK": data.get("nvlink", "NA"),
             "Infiniband": data.get("infiniband", "NA"),
-            "Query Type": "blazing" if is_blazing_query() else "dask",
+            "Query Type": "sql" if is_sql_query() else "dask",
             "File Format": data.get("file_format"),
             "Time (seconds)": query_time + writing_time
             if query_time and writing_time
@@ -810,7 +810,7 @@ def build_benchmark_googlesheet_payload(config):
             "Data Location": data.get("data_dir"),
             "Current Time": current_time,
             "cuDF Version": data.get("cudf"),
-            "BlazingSQL Version": data.get("blazingsql"),
+            "Dask SQL Version": data.get("sql"),
             "Dask Version": data.get("dask"),
             "Distributed Version": data.get("distributed"),
             "Dask-CUDA Version": data.get("dask-cuda"),
@@ -827,15 +827,15 @@ def build_benchmark_googlesheet_payload(config):
     return payload
 
 
-def is_blazing_query():
+def is_sql_query():
     """
-    Method that returns true if caller of the utility is a blazing query, returns false otherwise
+    Method that returns true if caller of the utility is a SQL query, returns false otherwise
     Assumes that caller is 3 levels above the stack
-    query_of_interest -> utils.push_to_google_sheet -> utils.build_payload -> utils.is_blazing_query
+    query_of_interest -> utils.push_to_google_sheet -> utils.build_payload -> utils.is_sql_query
 
-    Another potential solution is checking sys.modules.get("blazing") to check blazing is imported
+    Another potential solution is checking sys.modules.get("dask_sql") to check Dask-SQL is imported
     """
-    return "bsql" in inspect.stack()[-3].function
+    return "sql" in inspect.stack()[-3].function
 
 
 def _get_benchmarked_method_time(
@@ -866,7 +866,7 @@ def generate_library_information():
         "dask-cuda",
         "rmm",
         "cupy",
-        "blazingsql",
+        "dask-sql",
     ]
 
     conda_list_command = (
@@ -904,7 +904,7 @@ def push_payload_to_googlesheet(config):
       payload = build_benchmark_googlesheet_payload(config)
       s = gc.open(config["sheet"])
       tab = s.worksheet(config["tab"])
-      tab.append_row(payload, value_input_option='USER_ENTERED')
+      tab.append_row(payload, value_input_option='USER_ENTERED', table_range='A2')
 
 
 #################################
diff --git a/gpu_bdb/benchmark_runner.py b/gpu_bdb/benchmark_runner.py
index 6fdef1df..6c383ab5 100755
--- a/gpu_bdb/benchmark_runner.py
+++ b/gpu_bdb/benchmark_runner.py
@@ -21,9 +21,11 @@ def load_query(qnum, fn):
     return mod.main
 
 
-dask_qnums = [str(i).zfill(2) for i in range(1, 31)]
-bsql_qnums = [str(i).zfill(2) for i in range(1, 31)]
+dask_qnums = [str(i).zfill(2) for i in map(int,os.getenv("DASK_QNUMS"," ".join(map(str,range(1, 31)))).split())]
+sql_qnums = [str(i).zfill(2) for i in map(int,os.getenv("BSQL_QNUMS"," ".join(map(str,range(1, 31)))).split())]
 
+from random import shuffle
+shuffle(dask_qnums)
 
 if __name__ == "__main__":
     from bdb_tools.cluster_startup import attach_to_cluster, import_query_libs
@@ -32,66 +34,94 @@ def load_query(qnum, fn):
     import_query_libs()
     config = gpubdb_argparser()
     config["run_id"] = uuid.uuid4().hex
-    include_blazing = config.get("benchmark_runner_include_bsql")
+
+    include_sql = config.get("benchmark_runner_include_sql")
 
     dask_queries = {
         qnum: load_query(qnum, f"queries/q{qnum}/gpu_bdb_query_{qnum}.py")
         for qnum in dask_qnums
     }
 
-    if include_blazing:
-        bsql_queries = {
-            qnum: load_query(qnum, f"queries/q{qnum}/gpu_bdb_query_{qnum}_sql.py")
-            for qnum in bsql_qnums
+    if include_sql:
+        sql_queries = {
+            qnum: load_query(qnum, f"queries/q{qnum}/gpu_bdb_query_{qnum}_dask_sql.py")
+            for qnum in sql_qnums
+        }
+    else:
+        dask_queries = {
+            qnum: load_query(qnum, f"queries/q{qnum}/gpu_bdb_query_{qnum}.py")
+            for qnum in dask_qnums
         }
 
-    client, bc = attach_to_cluster(config, create_blazing_context=include_blazing)
+    client, c = attach_to_cluster(config, create_sql_context=include_sql)
     # Preload required libraries for queries on all workers
     client.run(import_query_libs)
 
     base_path = os.getcwd()
 
-    # Run BSQL Queries
-    if include_blazing and len(bsql_qnums) > 0:
-        print("Blazing Queries")
-        for qnum, q_func in bsql_queries.items():
-            print(qnum)
-
-            qpath = f"{base_path}/queries/q{qnum}/"
-            os.chdir(qpath)
-            if os.path.exists("current_query_num.txt"):
-                os.remove("current_query_num.txt")
-            with open("current_query_num.txt", "w") as fp:
-                fp.write(qnum)
-
-            for r in range(N_REPEATS):
-                run_query(
-                    config=config,
-                    client=client,
-                    query_func=q_func,
-                    blazing_context=bc,
-                )
-                client.run(gc.collect)
-                client.run_on_scheduler(gc.collect)
-                gc.collect()
-                time.sleep(3)
+    if config.get('benchmark_runner_log_rmm', False) or config.get('benchmark_runner_log_tasks', False):
+
+        from bdb_tools import RMMResourceMonitor
+        from bdb_tools import DaskTaskLogger
+
+        rmm_analyzer=RMMResourceMonitor(client=client,
+                                        outputdir=os.getenv('OUTPUT_DIR', '/tmp'))
+        dasktasklog=DaskTaskLogger( client=client,
+                                    outputdir=os.getenv('OUTPUT_DIR', '/tmp'))
+
+        orig_run_query=run_query
+        def logged_run_query( *args, **kwargs ):
+            rmm_analyzer.begin_logging( prefix=f"rmmlog{qnum}")
+            dasktasklog.mark_begin()
+            orig_run_query( *args, **kwargs )
+            rmm_analyzer.stop_logging()
+            dasktasklog.save_tasks( prefix=f"dasktasklog{qnum}")
+
+        run_query=logged_run_query
+
+    # Run Dask SQL Queries
+    if include_sql and len(sql_qnums) > 0:
+        print("Dask SQL Queries")
+        for r in range(N_REPEATS):
+          for qnum, q_func in sql_queries.items():
+                print(f"run {r+1}: q{qnum}")
+
+                qpath = f"{base_path}/queries/q{qnum}/"
+                os.chdir(qpath)
+                if os.path.exists("current_query_num.txt"):
+                    os.remove("current_query_num.txt")
+                with open("current_query_num.txt", "w") as fp:
+                    fp.write(qnum)
+
+                    run_query(
+                        config=config,
+                        client=client,
+                        query_func=q_func,
+                        sql_context=c,
+                    )
+                    client.run(gc.collect)
+                    client.run_on_scheduler(gc.collect)
+                    gc.collect()
+                    time.sleep(3)
 
     # Run Pure Dask Queries
     if len(dask_qnums) > 0:
         print("Pure Dask Queries")
-        for qnum, q_func in dask_queries.items():
-            print(qnum)
-
-            qpath = f"{base_path}/queries/q{qnum}/"
-            os.chdir(qpath)
-            if os.path.exists("current_query_num.txt"):
-                os.remove("current_query_num.txt")
-            with open("current_query_num.txt", "w") as fp:
-                fp.write(qnum)
-
-            for r in range(N_REPEATS):
-                run_query(config=config, client=client, query_func=q_func)
-                client.run(gc.collect)
-                client.run_on_scheduler(gc.collect)
-                gc.collect()
-                time.sleep(3)
+        for r in range(N_REPEATS):
+            for qnum, q_func in dask_queries.items():
+                print(f"run {r+1}: q{qnum}")
+
+                qpath = f"{base_path}/queries/q{qnum}/"
+                os.chdir(qpath)
+                if os.path.exists("current_query_num.txt"):
+                    os.remove("current_query_num.txt")
+                with open("current_query_num.txt", "w") as fp:
+                    fp.write(qnum)
+
+                    run_query(config=config, client=client, query_func=q_func)
+                    client.run(gc.collect)
+                    client.run_on_scheduler(gc.collect)
+                    gc.collect()
+                    time.sleep(3)
+
+
diff --git a/gpu_bdb/benchmark_runner/benchmark_config.yaml b/gpu_bdb/benchmark_runner/benchmark_config.yaml
index 54d6bfb8..95af4169 100755
--- a/gpu_bdb/benchmark_runner/benchmark_config.yaml
+++ b/gpu_bdb/benchmark_runner/benchmark_config.yaml
@@ -1,19 +1,21 @@
 # benchmark config yaml
 ### Please fill these accordingly
-data_dir:
+data_dir: /raid/gpu-bdb/sf1000/parquet_2gb
 output_dir:
 file_format: parquet
 output_filetype: parquet
 split_row_groups: False
 repartition_small_table: True
-benchmark_runner_include_bsql:
+benchmark_runner_include_sql:
+benchmark_runner_log_rmm: False
+benchmark_runner_log_tasks: False
 
-scheduler_file_path:
+scheduler_file_path: /raid/adattagupta/dask-sql-work/dask-local-directory/scheduler.json
 dask_profile: False
 
 verify_results: False
 verify_dir:
 
-sheet:
-tab:
+sheet: GPU-BDB Dask-SQL
+tab: SF1K Dask-SQL
 get_read_time: False
diff --git a/gpu_bdb/queries/load_test/gpu_bdb_load_test.py b/gpu_bdb/queries/load_test/gpu_bdb_load_test.py
index ab57193f..830e9977 100755
--- a/gpu_bdb/queries/load_test/gpu_bdb_load_test.py
+++ b/gpu_bdb/queries/load_test/gpu_bdb_load_test.py
@@ -24,7 +24,7 @@
 tables = [table.split(".")[0] for table in os.listdir(spark_schema_dir)]
 
 scale = [x for x in config["data_dir"].split("/") if "sf" in x][0]
-part_size = 3
+part_size = 2
 chunksize = "128 MiB"
 
 # Spark uses different names for column types, and RAPIDS doesn't yet support Decimal types.
@@ -127,7 +127,7 @@ def repartition(table, outdir, npartitions=None, chunksize=None, compression="sn
     )
     read_csv_table(table, chunksize).repartition(
         npartitions=npartitions
-    ).to_parquet(outdir + table, compression=compression)
+    ).to_parquet(outdir + table, compression=compression, index=False)
 
 
 def main(client, config):
diff --git a/gpu_bdb/queries/q01/gpu_bdb_query_01.py b/gpu_bdb/queries/q01/gpu_bdb_query_01.py
index 041f674c..6cd6bbd7 100755
--- a/gpu_bdb/queries/q01/gpu_bdb_query_01.py
+++ b/gpu_bdb/queries/q01/gpu_bdb_query_01.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -15,8 +15,14 @@
 #
 
 from bdb_tools.utils import benchmark, gpubdb_argparser, run_query
-from bdb_tools.readers import build_reader
 
+from bdb_tools.q01_utils import (
+    q01_i_category_id_IN,
+    q01_ss_store_sk_IN,
+    q01_viewed_together_count,
+    q01_limit,
+    read_tables
+)
 
 ### Implementation Notes:
 # `drop_duplicates` and `groupby` by default brings result to single partition
@@ -27,30 +33,6 @@
 # Settinng  index + merge using  map_parition can be a work-around if dask native merge is slow
 
 
-# -------- Q1 -----------
-q01_i_category_id_IN = [1, 2, 3]
-# -- sf1 -> 11 stores, 90k sales in 820k lines
-q01_ss_store_sk_IN = [10, 20, 33, 40, 50]
-q01_viewed_together_count = 50
-q01_limit = 100
-
-
-item_cols = ["i_item_sk", "i_category_id"]
-ss_cols = ["ss_item_sk", "ss_store_sk", "ss_ticket_number"]
-
-
-def read_tables(config):
-    table_reader = build_reader(
-        data_format=config["file_format"],
-        basepath=config["data_dir"],
-        split_row_groups=config["split_row_groups"],
-    )
-
-    item_df = table_reader.read("item", relevant_cols=item_cols)
-    ss_df = table_reader.read("store_sales", relevant_cols=ss_cols)
-    return item_df, ss_df
-
-
 ### Inner Self join to get pairs
 #     Select t1.ss_item_sk as item_sk_1 , t2.ss_item_sk as item_sk_2
 #     FROM (
@@ -163,8 +145,6 @@ def main(client, config):
 
 if __name__ == "__main__":
     from bdb_tools.cluster_startup import attach_to_cluster
-    import cudf
-    import dask_cudf
 
     config = gpubdb_argparser()
     client, bc = attach_to_cluster(config)
diff --git a/gpu_bdb/queries/q01/gpu_bdb_query_01_dask_sql.py b/gpu_bdb/queries/q01/gpu_bdb_query_01_dask_sql.py
new file mode 100755
index 00000000..9d0f21ad
--- /dev/null
+++ b/gpu_bdb/queries/q01/gpu_bdb_query_01_dask_sql.py
@@ -0,0 +1,77 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from bdb_tools.cluster_startup import attach_to_cluster
+
+from bdb_tools.utils import (
+    benchmark,
+    gpubdb_argparser,
+    run_query,
+)
+
+from bdb_tools.q01_utils import (
+    q01_i_category_id_IN,
+    q01_ss_store_sk_IN,
+    q01_viewed_together_count,
+    q01_limit,
+    read_tables
+)
+
+from dask.distributed import wait
+
+def main(data_dir, client, c, config):
+    benchmark(read_tables, config, c, dask_profile=config["dask_profile"])
+
+    query_distinct = f"""
+        SELECT DISTINCT ss_item_sk, ss_ticket_number
+        FROM store_sales s, item i
+        WHERE s.ss_item_sk = i.i_item_sk
+        AND i.i_category_id IN {q01_i_category_id_IN}
+        AND s.ss_store_sk IN {q01_ss_store_sk_IN}
+    """
+    result_distinct = c.sql(query_distinct)
+
+    result_distinct = result_distinct.persist()
+    wait(result_distinct)
+    c.create_table("distinct_table", result_distinct, persist=False)
+
+    query = f"""
+        SELECT item_sk_1, item_sk_2, COUNT(*) AS cnt
+        FROM
+        (
+            SELECT CAST(t1.ss_item_sk as BIGINT) AS item_sk_1,
+                CAST(t2.ss_item_sk AS BIGINT) AS item_sk_2
+            FROM distinct_table t1
+            INNER JOIN distinct_table t2
+            ON t1.ss_ticket_number = t2.ss_ticket_number
+            WHERE t1.ss_item_sk < t2.ss_item_sk
+        )
+        GROUP BY item_sk_1, item_sk_2
+        HAVING  COUNT(*) > {q01_viewed_together_count}
+        ORDER BY cnt DESC, CAST(item_sk_1 AS VARCHAR),
+                 CAST(item_sk_2 AS VARCHAR)
+        LIMIT {q01_limit}
+    """
+    result = c.sql(query)
+
+    c.drop_table("distinct_table")
+    return result
+
+
+if __name__ == "__main__":
+    config = gpubdb_argparser()
+    client, c = attach_to_cluster(config, create_sql_context=True)
+    run_query(config=config, client=client, query_func=main, sql_context=c)
diff --git a/gpu_bdb/queries/q02/gpu_bdb_query_02.py b/gpu_bdb/queries/q02/gpu_bdb_query_02.py
index cc7cb5a5..c6c11e40 100755
--- a/gpu_bdb/queries/q02/gpu_bdb_query_02.py
+++ b/gpu_bdb/queries/q02/gpu_bdb_query_02.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -19,8 +19,13 @@
     gpubdb_argparser,
     run_query,
 )
-from bdb_tools.readers import build_reader
 from bdb_tools.sessionization import get_distinct_sessions
+from bdb_tools.q02_utils import (
+    q02_item_sk,
+    q02_limit,
+    q02_session_timeout_inSec,
+    read_tables
+)
 
 ### Implementation Notes:
 
@@ -28,13 +33,6 @@
 # The bottleneck of current implimenation is `set-index`, once ucx is working correctly
 # it should go away
 
-# -------- Q2 -----------
-q02_item_sk = 10001
-q02_MAX_ITEMS_PER_BASKET = 5000000
-q02_limit = 30
-q02_session_timeout_inSec = 3600
-
-
 def get_relevant_item_series(df, q02_item_sk):
     """
         Returns relevant items directly
@@ -65,17 +63,6 @@ def reduction_function(df, q02_session_timeout_inSec):
     return grouped_df
 
 
-def read_tables(config):
-    table_reader = build_reader(
-        data_format=config["file_format"],
-        basepath=config["data_dir"],
-        split_row_groups=config["split_row_groups"],
-    )
-    wcs_cols = ["wcs_user_sk", "wcs_item_sk", "wcs_click_date_sk", "wcs_click_time_sk"]
-    wcs_df = table_reader.read("web_clickstreams", relevant_cols=wcs_cols)
-    return wcs_df
-
-
 def pre_repartition_task(wcs_df):
 
     f_wcs_df = wcs_df[
@@ -149,8 +136,6 @@ def main(client, config):
 
 if __name__ == "__main__":
     from bdb_tools.cluster_startup import attach_to_cluster
-    import cudf
-    import dask_cudf
 
     config = gpubdb_argparser()
     client, bc = attach_to_cluster(config)
diff --git a/gpu_bdb/queries/q02/gpu_bdb_query_02_dask_sql.py b/gpu_bdb/queries/q02/gpu_bdb_query_02_dask_sql.py
new file mode 100755
index 00000000..38c1668f
--- /dev/null
+++ b/gpu_bdb/queries/q02/gpu_bdb_query_02_dask_sql.py
@@ -0,0 +1,88 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from bdb_tools.cluster_startup import attach_to_cluster
+
+from bdb_tools.utils import (
+    benchmark,
+    gpubdb_argparser,
+    run_query,
+)
+
+from bdb_tools.sessionization import get_distinct_sessions
+
+from bdb_tools.q02_utils import (
+    q02_item_sk,
+    q02_limit,
+    q02_session_timeout_inSec,
+    read_tables
+)
+
+def main(data_dir, client, c, config):
+    benchmark(read_tables, config, c, dask_profile=config["dask_profile"])
+
+    query_1 = """
+        SELECT
+            CAST(wcs_user_sk AS INTEGER) AS wcs_user_sk,
+            CAST(wcs_item_sk AS INTEGER) AS wcs_item_sk,
+            (wcs_click_date_sk * 86400 + wcs_click_time_sk) AS tstamp_inSec
+        FROM web_clickstreams
+        WHERE wcs_item_sk IS NOT NULL
+        AND   wcs_user_sk IS NOT NULL
+        DISTRIBUTE BY wcs_user_sk
+    """
+    wcs_result = c.sql(query_1)
+
+    session_df = wcs_result.map_partitions(
+        get_distinct_sessions,
+        keep_cols=["wcs_user_sk", "wcs_item_sk"],
+        time_out=q02_session_timeout_inSec,
+    )
+    del wcs_result
+
+    c.create_table('session_df', session_df, persist=False)
+
+    last_query = f"""
+        WITH item_df AS (
+            SELECT wcs_user_sk, session_id
+            FROM session_df
+            WHERE wcs_item_sk = {q02_item_sk}
+        )
+        SELECT sd.wcs_item_sk as item_sk_1,
+            count(sd.wcs_item_sk) as cnt
+        FROM session_df sd
+        INNER JOIN item_df id
+        ON sd.wcs_user_sk = id.wcs_user_sk
+        AND sd.session_id = id.session_id
+        AND sd.wcs_item_sk <> {q02_item_sk}
+        GROUP BY sd.wcs_item_sk
+        ORDER BY cnt desc
+        LIMIT {q02_limit}
+    """
+    result = c.sql(last_query)
+    result["item_sk_2"] = q02_item_sk
+    result_order = ["item_sk_1", "item_sk_2", "cnt"]
+    result = result[result_order]
+
+    del session_df
+    c.drop_table("session_df")
+    return result
+
+
+if __name__ == "__main__":
+    config = gpubdb_argparser()
+    client, c = attach_to_cluster(config, create_sql_context=True)
+    run_query(config=config, client=client, query_func=main, sql_context=c)
diff --git a/gpu_bdb/queries/q03/gpu_bdb_query_03.py b/gpu_bdb/queries/q03/gpu_bdb_query_03.py
index 68a04af4..a563ff83 100755
--- a/gpu_bdb/queries/q03/gpu_bdb_query_03.py
+++ b/gpu_bdb/queries/q03/gpu_bdb_query_03.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,34 +14,32 @@
 # limitations under the License.
 #
 
-import sys
 import os
 
+import cudf
+import dask_cudf
 
 from bdb_tools.utils import (
     benchmark,
     gpubdb_argparser,
     run_query,
 )
-from bdb_tools.readers import build_reader
+
+from bdb_tools.q03_utils import (
+    apply_find_items_viewed,
+    q03_purchased_item_IN,
+    q03_purchased_item_category_IN,
+    q03_limit,
+    read_tables
+)
 
 from distributed import wait
 import numpy as np
 
-from numba import cuda
 import glob
 from dask import delayed
 
-
-q03_days_in_sec_before_purchase = 864000
-q03_views_before_purchase = 5
-q03_purchased_item_IN = 10001
-q03_purchased_item_category_IN = [2, 3]
-q03_limit = 100
-
-
 def get_wcs_minima(config):
-    import dask_cudf
 
     wcs_df = dask_cudf.read_parquet(
         os.path.join(config["data_dir"], "web_clickstreams/*.parquet"),
@@ -55,7 +53,6 @@ def get_wcs_minima(config):
 
 
 def pre_repartition_task(wcs_fn, item_df, wcs_tstamp_min):
-    import cudf
 
     wcs_cols = [
         "wcs_user_sk",
@@ -65,7 +62,7 @@ def pre_repartition_task(wcs_fn, item_df, wcs_tstamp_min):
         "wcs_click_time_sk",
     ]
     wcs_df = cudf.read_parquet(wcs_fn, columns=wcs_cols)
-    wcs_df = wcs_df._drop_na_rows(subset=["wcs_user_sk", "wcs_item_sk"])
+    wcs_df = wcs_df.dropna(axis=0, subset=["wcs_user_sk", "wcs_item_sk"])
     wcs_df["tstamp"] = wcs_df["wcs_click_date_sk"] * 86400 + wcs_df["wcs_click_time_sk"]
     wcs_df["tstamp"] = wcs_df["tstamp"] - wcs_tstamp_min
 
@@ -108,108 +105,7 @@ def reduction_function(df, item_df_filtered):
     return grouped_df
 
 
-def read_tables(config):
-    table_reader = build_reader(
-        data_format=config["file_format"],
-        basepath=config["data_dir"],
-        split_row_groups=config["split_row_groups"],
-    )
-
-    item_cols = ["i_category_id", "i_item_sk"]
-    item_df = table_reader.read("item", relevant_cols=item_cols)
-    return item_df
-
-
-@cuda.jit
-def find_items_viewed_before_purchase_kernel(
-    relevant_idx_col, user_col, timestamp_col, item_col, out_col, N
-):
-    """
-    Find the past N items viewed after a relevant purchase was made,
-    as defined by the configuration of this query.
-    """
-    i = cuda.grid(1)
-    relevant_item = q03_purchased_item_IN
-
-    if i < (relevant_idx_col.size):  # boundary guard
-        # every relevant row gets N rows in the output, so we need to map the indexes
-        # back into their position in the original array
-        orig_idx = relevant_idx_col[i]
-        current_user = user_col[orig_idx]
-
-        # look at the previous N clicks (assume sorted descending)
-        rows_to_check = N
-        remaining_rows = user_col.size - orig_idx
-
-        if remaining_rows <= rows_to_check:
-            rows_to_check = remaining_rows - 1
-
-        for k in range(1, rows_to_check + 1):
-            if current_user != user_col[orig_idx + k]:
-                out_col[i * N + k - 1] = 0
-
-            # only checking relevant purchases via the relevant_idx_col
-            elif (timestamp_col[orig_idx + k] <= timestamp_col[orig_idx]) & (
-                timestamp_col[orig_idx + k]
-                >= (timestamp_col[orig_idx] - q03_days_in_sec_before_purchase)
-            ):
-                out_col[i * N + k - 1] = item_col[orig_idx + k]
-            else:
-                out_col[i * N + k - 1] = 0
-
-
-def apply_find_items_viewed(df, item_mappings):
-    import cudf
-
-    # need to sort descending to ensure that the
-    # next N rows are the previous N clicks
-    df = df.sort_values(
-        by=["wcs_user_sk", "tstamp", "wcs_sales_sk", "wcs_item_sk"],
-        ascending=[False, False, False, False],
-    )
-    df.reset_index(drop=True, inplace=True)
-    df["relevant_flag"] = (df.wcs_sales_sk != 0) & (
-        df.wcs_item_sk == q03_purchased_item_IN
-    )
-    df["relevant_idx_pos"] = df.index.to_series()
-    df.reset_index(drop=True, inplace=True)
-    # only allocate output for the relevant rows
-    sample = df.loc[df.relevant_flag == True]
-    sample.reset_index(drop=True, inplace=True)
-
-    N = q03_views_before_purchase
-    size = len(sample)
-
-    # we know this can be int32, since it's going to contain item_sks
-    out_arr = cuda.device_array(size * N, dtype=df["wcs_item_sk"].dtype)
-
-    find_items_viewed_before_purchase_kernel.forall(size)(
-        sample["relevant_idx_pos"],
-        df["wcs_user_sk"],
-        df["tstamp"],
-        df["wcs_item_sk"],
-        out_arr,
-        N,
-    )
-
-    result = cudf.DataFrame({"prior_item_viewed": out_arr})
-
-    del out_arr
-    del df
-    del sample
-
-    filtered = result.merge(
-        item_mappings,
-        how="inner",
-        left_on=["prior_item_viewed"],
-        right_on=["i_item_sk"],
-    )
-    return filtered
-
-
 def main(client, config):
-    import dask_cudf
-    import cudf
 
     item_df = benchmark(
         read_tables,
@@ -289,8 +185,6 @@ def main(client, config):
 
 if __name__ == "__main__":
     from bdb_tools.cluster_startup import attach_to_cluster
-    import cudf
-    import dask_cudf
 
     config = gpubdb_argparser()
     client, bc = attach_to_cluster(config)
diff --git a/gpu_bdb/queries/q03/gpu_bdb_query_03_dask_sql.py b/gpu_bdb/queries/q03/gpu_bdb_query_03_dask_sql.py
new file mode 100755
index 00000000..031dfe0f
--- /dev/null
+++ b/gpu_bdb/queries/q03/gpu_bdb_query_03_dask_sql.py
@@ -0,0 +1,101 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from bdb_tools.cluster_startup import attach_to_cluster
+
+from bdb_tools.utils import (
+    benchmark,
+    gpubdb_argparser,
+    run_query,
+)
+
+from bdb_tools.q03_utils import (
+    apply_find_items_viewed,
+    q03_purchased_item_IN,
+    q03_purchased_item_category_IN,
+    q03_limit,
+    read_tables
+)
+
+from dask.distributed import wait
+
+
+def main(data_dir, client, c, config):
+    benchmark(read_tables, config, c, dask_profile=config["dask_profile"])
+
+    query_1 = """
+        SELECT i_item_sk,
+            CAST(i_category_id AS TINYINT) AS i_category_id
+        FROM item
+    """
+    item_df = c.sql(query_1)
+
+    item_df = item_df.persist()
+    wait(item_df)
+    c.create_table("item_df", item_df, persist=False)
+
+    query_2 = """
+        SELECT CAST(w.wcs_user_sk AS INTEGER) as wcs_user_sk,
+            wcs_click_date_sk * 86400 + wcs_click_time_sk AS tstamp,
+            CAST(w.wcs_item_sk AS INTEGER) as wcs_item_sk,
+            CAST(COALESCE(w.wcs_sales_sk, 0) AS INTEGER) as wcs_sales_sk
+        FROM web_clickstreams AS w
+        INNER JOIN item_df AS i ON w.wcs_item_sk = i.i_item_sk
+        WHERE w.wcs_user_sk IS NOT NULL
+        AND w.wcs_item_sk IS NOT NULL
+        DISTRIBUTE BY wcs_user_sk
+    """
+    merged_df = c.sql(query_2)
+
+    query_3 = f"""
+        SELECT i_item_sk, i_category_id
+        FROM item_df
+        WHERE i_category_id IN {q03_purchased_item_category_IN}
+    """
+    item_df_filtered = c.sql(query_3)
+
+    product_view_results = merged_df.map_partitions(
+        apply_find_items_viewed, item_mappings=item_df_filtered
+    )
+    
+
+    c.drop_table("item_df")
+    del item_df
+    del merged_df
+    del item_df_filtered
+
+    c.create_table('product_result', product_view_results, persist=False)
+
+    last_query = f"""
+        SELECT CAST({q03_purchased_item_IN} AS BIGINT) AS purchased_item,
+            i_item_sk AS lastviewed_item,
+            COUNT(i_item_sk) AS cnt
+        FROM product_result
+        GROUP BY i_item_sk
+        ORDER BY purchased_item, cnt desc, lastviewed_item
+        LIMIT {q03_limit}
+    """
+    result = c.sql(last_query)
+
+    c.drop_table("product_result")
+    del product_view_results
+    return result
+
+
+if __name__ == "__main__":
+    config = gpubdb_argparser()
+    client, c = attach_to_cluster(config, create_sql_context=True)
+    run_query(config=config, client=client, query_func=main, sql_context=c)
diff --git a/gpu_bdb/queries/q04/gpu_bdb_query_04.py b/gpu_bdb/queries/q04/gpu_bdb_query_04.py
index 9c4cb5a5..98fba61e 100755
--- a/gpu_bdb/queries/q04/gpu_bdb_query_04.py
+++ b/gpu_bdb/queries/q04/gpu_bdb_query_04.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,100 +14,20 @@
 # limitations under the License.
 #
 
-import sys
-
+import cudf
 
 from bdb_tools.utils import (
     benchmark,
     gpubdb_argparser,
     run_query,
 )
-from bdb_tools.readers import build_reader
-from bdb_tools.sessionization import get_sessions
-
-
-# parameters
-q04_session_timeout_inSec = 3600
-
-
-def read_tables(config):
-    table_reader = build_reader(
-        data_format=config["file_format"],
-        basepath=config["data_dir"],
-        split_row_groups=config["split_row_groups"],
-    )
-
-    wp_cols = ["wp_type", "wp_web_page_sk"]
-    wp_df = table_reader.read("web_page", relevant_cols=wp_cols)
-
-    wcs_cols = [
-        "wcs_user_sk",
-        "wcs_click_date_sk",
-        "wcs_click_time_sk",
-        "wcs_web_page_sk",
-        "wcs_sales_sk",
-    ]
-    web_clicksteams_df = table_reader.read("web_clickstreams", relevant_cols=wcs_cols)
-
-    return wp_df, web_clicksteams_df
-
-
-def abandonedShoppingCarts(df, DYNAMIC_CAT_CODE, ORDER_CAT_CODE):
-    import cudf
-
-    # TODO: test without reset index
-    df.reset_index(drop=True, inplace=True)
-
-    # Select groups where last dynamic row comes after last order row
-    filtered_df = df[
-        (df["wp_type_codes"] == ORDER_CAT_CODE)
-        | (df["wp_type_codes"] == DYNAMIC_CAT_CODE)
-    ]
-    # TODO: test without reset index
-    filtered_df.reset_index(drop=True, inplace=True)
-    # Create a new column that is the concatenation of timestamp and wp_type_codes
-    # (eg:123456:3, 234567:5)
-    filtered_df["wp_type_codes"] = (
-        filtered_df["tstamp_inSec"]
-        .astype("str")
-        .str.cat(filtered_df["wp_type_codes"].astype("str"), sep=":")
-    )
-    # This gives the last occurrence (by timestamp) within the "order", "dynamic" wp_types
-    filtered_df = filtered_df.groupby(
-        ["wcs_user_sk", "session_id"], as_index=False, sort=False
-    ).agg({"wp_type_codes": "max"})
-    # If the max contains dynamic, keep the row else discard.
-    last_dynamic_df = filtered_df[
-        filtered_df["wp_type_codes"].str.contains(
-            ":" + str(DYNAMIC_CAT_CODE), regex=False
-        )
-    ]
-    del filtered_df
-
-    # Find counts for each group
-    grouped_count_df = df.groupby(
-        ["wcs_user_sk", "session_id"], as_index=False, sort=False
-    ).agg({"tstamp_inSec": "count"})
-    # Merge counts with the "dynamic" shopping cart groups
-    result = last_dynamic_df.merge(
-        grouped_count_df, on=["wcs_user_sk", "session_id"], how="inner"
-    )
-    del (last_dynamic_df, grouped_count_df)
-    return cudf.DataFrame(
-        {"pagecount": result.tstamp_inSec.sum(), "count": len(result)}
-    )
-
-
-def reduction_function(df, keep_cols, DYNAMIC_CAT_CODE, ORDER_CAT_CODE):
-    df = get_sessions(df, keep_cols=keep_cols)
-    df = abandonedShoppingCarts(
-        df, DYNAMIC_CAT_CODE=DYNAMIC_CAT_CODE, ORDER_CAT_CODE=ORDER_CAT_CODE
-    )
-    return df
 
+from bdb_tools.q04_utils import (
+    reduction_function,
+    read_tables
+)
 
 def main(client, config):
-    import cudf
 
     wp, wcs_df = benchmark(
         read_tables,
@@ -166,8 +86,6 @@ def main(client, config):
 
 if __name__ == "__main__":
     from bdb_tools.cluster_startup import attach_to_cluster
-    import cudf
-    import dask_cudf
 
     config = gpubdb_argparser()
     client, bc = attach_to_cluster(config)
diff --git a/gpu_bdb/queries/q04/gpu_bdb_query_04_dask_sql.py b/gpu_bdb/queries/q04/gpu_bdb_query_04_dask_sql.py
new file mode 100755
index 00000000..3af8ef10
--- /dev/null
+++ b/gpu_bdb/queries/q04/gpu_bdb_query_04_dask_sql.py
@@ -0,0 +1,94 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import cudf
+
+from bdb_tools.cluster_startup import attach_to_cluster
+
+from bdb_tools.utils import (
+    benchmark,
+    gpubdb_argparser,
+    run_query,
+)
+
+from bdb_tools.q04_utils import (
+    reduction_function,
+    read_tables
+)
+
+from dask.distributed import wait
+
+
+def main(data_dir, client, c, config):
+    benchmark(read_tables, config, c, dask_profile=config["dask_profile"])
+
+    query_web_page = """
+        SELECT wp_type, wp_web_page_sk
+        FROM web_page_wo_categorical
+    """
+    wp = c.sql(query_web_page)
+
+    # Convert wp_type to categorical and get cat_id of review and dynamic type
+    wp["wp_type"] = wp["wp_type"].map_partitions(
+                                    lambda ser: ser.astype("category"))
+    
+    cpu_categories = wp["wp_type"].compute().cat.categories.to_pandas()
+
+    DYNAMIC_CAT_CODE = cpu_categories.get_loc("dynamic")
+    ORDER_CAT_CODE = cpu_categories.get_loc("order")
+
+    # ### cast to minimum viable dtype
+    wp["wp_type_codes"] = wp["wp_type"].cat.codes
+    cols_2_keep = ["wp_web_page_sk", "wp_type_codes"]
+    wp = wp[cols_2_keep]
+
+    wp = wp.persist()
+    wait(wp)
+    c.create_table('web_page', wp, persist=False)
+
+    query = """
+        SELECT
+            c.wcs_user_sk,
+            w.wp_type_codes,
+            (wcs_click_date_sk * 86400 + wcs_click_time_sk) AS tstamp_inSec
+        FROM web_clickstreams c, web_page w
+        WHERE c.wcs_web_page_sk = w.wp_web_page_sk
+        AND   c.wcs_web_page_sk IS NOT NULL
+        AND   c.wcs_user_sk     IS NOT NULL
+        AND   c.wcs_sales_sk    IS NULL --abandoned implies: no sale
+        DISTRIBUTE BY wcs_user_sk
+    """
+    merged_df = c.sql(query)
+
+    keep_cols = ["wcs_user_sk", "wp_type_codes", "tstamp_inSec"]
+    result_df = merged_df.map_partitions(
+        reduction_function, keep_cols, DYNAMIC_CAT_CODE, ORDER_CAT_CODE
+    )
+
+    result = result_df["pagecount"].sum() / result_df["count"].sum()
+    # Persist before computing to ensure scalar transfer only on compute
+    result = result.persist()
+
+    result = result.compute()
+    result_df = cudf.DataFrame({"sum(pagecount)/count(*)": [result]})
+    c.drop_table("web_page")
+    return result_df
+
+
+if __name__ == "__main__":
+    config = gpubdb_argparser()
+    client, c = attach_to_cluster(config, create_sql_context=True)
+    run_query(config=config, client=client, query_func=main, sql_context=c)
diff --git a/gpu_bdb/queries/q05/gpu_bdb_query_05.py b/gpu_bdb/queries/q05/gpu_bdb_query_05.py
index 290cf127..5e99a10f 100755
--- a/gpu_bdb/queries/q05/gpu_bdb_query_05.py
+++ b/gpu_bdb/queries/q05/gpu_bdb_query_05.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,25 +14,27 @@
 # limitations under the License.
 #
 
-import sys
 import os
 import glob
 
+import cudf
+import dask_cudf
+
 from bdb_tools.utils import (
     benchmark,
     gpubdb_argparser,
     run_query,
 )
 
-from bdb_tools.readers import build_reader
-from bdb_tools.cupy_metrics import cupy_precision_score
+from bdb_tools.q05_utils import (
+    build_and_predict_model,
+    wcs_columns,
+    read_tables
+)
 
-import cupy as cp
 import numpy as np
 from dask import delayed
-import dask
 import pandas as pd
-from sklearn.metrics import roc_auc_score
 
 #
 # Query Configuration
@@ -40,84 +42,10 @@
 COLLEGE_ED_STRS = ["Advanced Degree", "College", "4 yr Degree", "2 yr Degree"]
 Q05_I_CATEGORY = "Books"
 
-wcs_columns = ["wcs_item_sk", "wcs_user_sk"]
-items_columns = ["i_item_sk", "i_category", "i_category_id"]
-customer_columns = ["c_customer_sk", "c_current_cdemo_sk"]
-customer_dem_columns = ["cd_demo_sk", "cd_gender", "cd_education_status"]
-
-# Logistic Regression params
-# solver = "LBFGS" Used by passing `penalty=None` or "l2"
-# step_size = 1 Not used
-# numCorrections = 10 Not used
-iterations = 100
-C = 10_000  # reg_lambda = 0 hence C for model is a large value
-convergence_tol = 1e-9
-
-
-def read_tables(config):
-    table_reader = build_reader(
-        data_format=config["file_format"],
-        basepath=config["data_dir"],
-        split_row_groups=config["split_row_groups"],
-    )
-
-    item_ddf = table_reader.read("item", relevant_cols=items_columns, index=False)
-    customer_ddf = table_reader.read(
-        "customer", relevant_cols=customer_columns, index=False
-    )
-    customer_dem_ddf = table_reader.read(
-        "customer_demographics", relevant_cols=customer_dem_columns, index=False
-    )
-
-    return (item_ddf, customer_ddf, customer_dem_ddf)
-
-
-def build_and_predict_model(ml_input_df):
-    """
-    Create a standardized feature matrix X and target array y.
-    Returns the model and accuracy statistics
-    """
-    import cuml
-    from cuml.metrics import confusion_matrix
-
-    feature_names = ["college_education", "male"] + [
-        "clicks_in_%d" % i for i in range(1, 8)
-    ]
-    X = ml_input_df[feature_names]
-    # Standardize input matrix
-    X = (X - X.mean()) / X.std()
-    y = ml_input_df["clicks_in_category"]
-
-    model = cuml.LogisticRegression(
-        tol=convergence_tol,
-        penalty="none",
-        solver="qn",
-        fit_intercept=True,
-        max_iter=iterations,
-        C=C,
-    )
-    model.fit(X, y)
-    #
-    # Predict and evaluate accuracy
-    # (Should be 1.0) at SF-1
-    #
-    results_dict = {}
-    y_pred = model.predict(X)
-
-    results_dict["auc"] = roc_auc_score(y.to_array(), y_pred.to_array())
-    results_dict["precision"] = cupy_precision_score(cp.asarray(y), cp.asarray(y_pred))
-    results_dict["confusion_matrix"] = confusion_matrix(
-        cp.asarray(y, dtype="int32"), cp.asarray(y_pred, dtype="int32")
-    )
-    results_dict["output_type"] = "supervised"
-    return results_dict
-
-
 def get_groupby_results(file_list, item_df):
     """
         Functionial approach for better scaling
     """
-    import cudf
 
     sum_by_cat_ddf = None
     for fn in file_list:
@@ -129,12 +57,12 @@ def get_groupby_results(file_list, item_df):
         keep_cols = ["wcs_user_sk", "i_category_id", "clicks_in_category"]
         wcs_ddf = wcs_ddf[keep_cols]
 
-        wcs_ddf = cudf.DataFrame.one_hot_encoding(
+        wcs_ddf = cudf.get_dummies(
             wcs_ddf,
-            column="i_category_id",
+            columns=["i_category_id"],
             prefix="clicks_in",
             prefix_sep="_",
-            cats=[i for i in range(1, 8)],
+            cats={"i_category_id":np.arange(1, 8, dtype="int32")},
             dtype=np.int8,
         )
         keep_cols = ["wcs_user_sk", "clicks_in_category"] + [
@@ -162,8 +90,6 @@ def get_groupby_results(file_list, item_df):
 
 
 def main(client, config):
-    import cudf
-    import dask_cudf
 
     item_ddf, customer_ddf, customer_dem_ddf = benchmark(
         read_tables,
@@ -268,9 +194,6 @@ def main(client, config):
 
 if __name__ == "__main__":
     from bdb_tools.cluster_startup import attach_to_cluster
-    import cudf
-    import dask_cudf
-    import cuml
 
     config = gpubdb_argparser()
     client, bc = attach_to_cluster(config)
diff --git a/gpu_bdb/queries/q05/gpu_bdb_query_05_dask_sql.py b/gpu_bdb/queries/q05/gpu_bdb_query_05_dask_sql.py
new file mode 100755
index 00000000..e0a628ca
--- /dev/null
+++ b/gpu_bdb/queries/q05/gpu_bdb_query_05_dask_sql.py
@@ -0,0 +1,97 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from bdb_tools.cluster_startup import attach_to_cluster
+from dask.distributed import wait
+from dask import delayed
+
+from bdb_tools.utils import (
+    benchmark,
+    gpubdb_argparser,
+    run_query,
+)
+from bdb_tools.q05_utils import (
+    build_and_predict_model,
+    read_tables
+)
+
+def main(data_dir, client, c, config):
+    benchmark(read_tables, config, c, dask_profile=config["dask_profile"])
+
+    query = """
+        SELECT
+            --wcs_user_sk,
+            clicks_in_category,
+            CASE WHEN cd_education_status IN ('Advanced Degree', 'College', '4 yr Degree', '2 yr Degree') 
+            THEN 1 ELSE 0 END AS college_education,
+            CASE WHEN cd_gender = 'M' THEN 1 ELSE 0 END AS male,
+            clicks_in_1,
+            clicks_in_2,
+            clicks_in_3,
+            clicks_in_4,
+            clicks_in_5,
+            clicks_in_6,
+            clicks_in_7
+        FROM
+        ( 
+            SELECT 
+                wcs_user_sk,
+                SUM( CASE WHEN i_category = 'Books' THEN 1 ELSE 0 END) AS clicks_in_category,
+                SUM( CASE WHEN i_category_id = 1 THEN 1 ELSE 0 END) AS clicks_in_1,
+                SUM( CASE WHEN i_category_id = 2 THEN 1 ELSE 0 END) AS clicks_in_2,
+                SUM( CASE WHEN i_category_id = 3 THEN 1 ELSE 0 END) AS clicks_in_3,
+                SUM( CASE WHEN i_category_id = 4 THEN 1 ELSE 0 END) AS clicks_in_4,
+                SUM( CASE WHEN i_category_id = 5 THEN 1 ELSE 0 END) AS clicks_in_5,
+                SUM( CASE WHEN i_category_id = 6 THEN 1 ELSE 0 END) AS clicks_in_6,
+                SUM( CASE WHEN i_category_id = 7 THEN 1 ELSE 0 END) AS clicks_in_7
+            FROM web_clickstreams
+            INNER JOIN item it ON 
+            (
+                wcs_item_sk = i_item_sk
+                AND wcs_user_sk IS NOT NULL
+            )
+            GROUP BY  wcs_user_sk
+        ) q05_user_clicks_in_cat
+        INNER JOIN customer ct ON wcs_user_sk = c_customer_sk
+        INNER JOIN customer_demographics ON c_current_cdemo_sk = cd_demo_sk
+    """
+
+    cust_and_clicks_ddf = c.sql(query)
+
+    cust_and_clicks_ddf = cust_and_clicks_ddf.repartition(npartitions=1)
+
+    # Convert clicks_in_category to a binary label
+    cust_and_clicks_ddf["clicks_in_category"] = (
+        cust_and_clicks_ddf["clicks_in_category"]
+        > cust_and_clicks_ddf["clicks_in_category"].mean()
+    ).astype("int64")
+
+    # Converting the dataframe to float64 as cuml logistic reg requires this
+    ml_input_df = cust_and_clicks_ddf.astype("float64")
+
+    ml_input_df = ml_input_df.persist()
+    wait(ml_input_df)
+
+    ml_tasks = [delayed(build_and_predict_model)(df) for df in ml_input_df.to_delayed()]
+    results_dict = client.compute(*ml_tasks, sync=True)
+
+    return results_dict
+
+
+if __name__ == "__main__":
+    config = gpubdb_argparser()
+    client, c = attach_to_cluster(config, create_sql_context=True)
+    run_query(config=config, client=client, query_func=main, sql_context=c)
diff --git a/gpu_bdb/queries/q06/gpu_bdb_query_06.py b/gpu_bdb/queries/q06/gpu_bdb_query_06.py
index b7326ab3..9e3e9ff7 100755
--- a/gpu_bdb/queries/q06/gpu_bdb_query_06.py
+++ b/gpu_bdb/queries/q06/gpu_bdb_query_06.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,64 +14,17 @@
 # limitations under the License.
 #
 
-import sys
-
-
 from bdb_tools.utils import (
     benchmark,
     gpubdb_argparser,
     run_query,
 )
-from bdb_tools.readers import build_reader
-from distributed import wait
-
-
-q06_YEAR = 2001
-q6_limit_rows = 100
-
-
-def read_tables(config):
-    table_reader = build_reader(
-        data_format=config["file_format"],
-        basepath=config["data_dir"],
-        split_row_groups=config["split_row_groups"],
-    )
-
-    web_sales_cols = [
-        "ws_bill_customer_sk",
-        "ws_sold_date_sk",
-        "ws_ext_list_price",
-        "ws_ext_wholesale_cost",
-        "ws_ext_discount_amt",
-        "ws_ext_sales_price",
-    ]
-    store_sales_cols = [
-        "ss_customer_sk",
-        "ss_sold_date_sk",
-        "ss_ext_list_price",
-        "ss_ext_wholesale_cost",
-        "ss_ext_discount_amt",
-        "ss_ext_sales_price",
-    ]
-    date_cols = ["d_date_sk", "d_year", "d_moy"]
-    customer_cols = [
-        "c_customer_sk",
-        "c_customer_id",
-        "c_email_address",
-        "c_first_name",
-        "c_last_name",
-        "c_preferred_cust_flag",
-        "c_birth_country",
-        "c_login",
-    ]
-
-    ws_df = table_reader.read("web_sales", relevant_cols=web_sales_cols)
-    ss_df = table_reader.read("store_sales", relevant_cols=store_sales_cols)
-    date_df = table_reader.read("date_dim", relevant_cols=date_cols)
-    customer_df = table_reader.read("customer", relevant_cols=customer_cols)
-
-    return (ws_df, ss_df, date_df, customer_df)
 
+from bdb_tools.q06_utils import (
+    q06_YEAR,
+    q06_LIMIT,
+    read_tables
+)
 
 def get_sales_ratio(df, table="store_sales"):
     assert table in ("store_sales", "web_sales")
@@ -247,13 +200,11 @@ def main(client, config):
         )
     )
 
-    return result_df.head(q6_limit_rows)
+    return result_df.head(q06_LIMIT)
 
 
 if __name__ == "__main__":
     from bdb_tools.cluster_startup import attach_to_cluster
-    import cudf
-    import dask_cudf
 
     config = gpubdb_argparser()
     client, bc = attach_to_cluster(config)
diff --git a/gpu_bdb/queries/q06/gpu_bdb_query_06_dask_sql.py b/gpu_bdb/queries/q06/gpu_bdb_query_06_dask_sql.py
new file mode 100755
index 00000000..736319c4
--- /dev/null
+++ b/gpu_bdb/queries/q06/gpu_bdb_query_06_dask_sql.py
@@ -0,0 +1,103 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from bdb_tools.cluster_startup import attach_to_cluster
+
+from bdb_tools.utils import (
+    benchmark,
+    gpubdb_argparser,
+    run_query,
+)
+
+from bdb_tools.q06_utils import (
+    q06_LIMIT,
+    q06_YEAR,
+    read_tables
+)
+
+def main(data_dir, client, c, config):
+    benchmark(read_tables, config, c, dask_profile=config["dask_profile"])
+
+    query = f"""
+        WITH temp_table_1 as
+        (
+            SELECT ss_customer_sk AS customer_sk,
+                sum( case when (d_year = {q06_YEAR}) THEN (((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2.0) ELSE 0.0 END)
+                    AS first_year_total,
+                sum( case when (d_year = {q06_YEAR + 1}) THEN (((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2.0) ELSE 0.0 END)
+                    AS second_year_total
+            FROM store_sales,
+                date_dim
+            WHERE ss_sold_date_sk = d_date_sk
+            AND   d_year BETWEEN {q06_YEAR} AND {q06_YEAR + 1}
+            GROUP BY ss_customer_sk
+            -- first_year_total is an aggregation, rewrite all sum () statement
+            HAVING sum( case when (d_year = {q06_YEAR}) THEN (((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2.0) ELSE 0.0 END) > 0.0
+        ),
+        temp_table_2 AS
+        (
+            SELECT ws_bill_customer_sk AS customer_sk ,
+                sum( case when (d_year = {q06_YEAR}) THEN (((ws_ext_list_price-ws_ext_wholesale_cost-ws_ext_discount_amt)+ws_ext_sales_price)/2.0) ELSE 0.0 END)
+                    AS first_year_total,
+                sum( case when (d_year = {q06_YEAR + 1}) THEN (((ws_ext_list_price-ws_ext_wholesale_cost-ws_ext_discount_amt)+ws_ext_sales_price)/2.0) ELSE 0.0 END)
+                    AS second_year_total
+            FROM web_sales,
+                 date_dim
+            WHERE ws_sold_date_sk = d_date_sk
+            AND   d_year BETWEEN {q06_YEAR} AND {q06_YEAR + 1}
+            GROUP BY ws_bill_customer_sk
+            -- required to avoid division by 0, because later we will divide by this value
+            HAVING sum( case when (d_year = {q06_YEAR}) THEN (((ws_ext_list_price-ws_ext_wholesale_cost-ws_ext_discount_amt)+ws_ext_sales_price)/2.0)ELSE 0.0 END) > 0.0
+        )
+        -- MAIN QUERY
+        SELECT
+            CAST( (web.second_year_total / web.first_year_total) AS DOUBLE) AS web_sales_increase_ratio,
+            c_customer_sk,
+            c_first_name,
+            c_last_name,
+            c_preferred_cust_flag,
+            c_birth_country,
+            c_login,
+            c_email_address
+        FROM temp_table_1 store,
+            temp_table_2 web,
+            customer c
+        WHERE store.customer_sk = web.customer_sk
+        AND  web.customer_sk = c_customer_sk
+        -- if customer has sales in first year for both store and websales,
+        -- select him only if web second_year_total/first_year_total
+        -- ratio is bigger then his store second_year_total/first_year_total ratio.
+        AND (web.second_year_total / web.first_year_total) >
+            (store.second_year_total / store.first_year_total)
+        ORDER BY
+            web_sales_increase_ratio DESC,
+            c_customer_sk,
+            c_first_name,
+            c_last_name,
+            c_preferred_cust_flag,
+            c_birth_country,
+            c_login
+        LIMIT {q06_LIMIT}
+    """
+    result = c.sql(query)
+    return result
+
+
+if __name__ == "__main__":
+    config = gpubdb_argparser()
+    client, c = attach_to_cluster(config, create_sql_context=True)
+    run_query(config=config, client=client, query_func=main, sql_context=c)
+
diff --git a/gpu_bdb/queries/q07/gpu_bdb_query_07.py b/gpu_bdb/queries/q07/gpu_bdb_query_07.py
index a14cbcfd..89e8903a 100755
--- a/gpu_bdb/queries/q07/gpu_bdb_query_07.py
+++ b/gpu_bdb/queries/q07/gpu_bdb_query_07.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,16 +14,12 @@
 # limitations under the License.
 #
 
-import sys
-
-
 from bdb_tools.utils import (
     benchmark,
     gpubdb_argparser,
     run_query,
 )
-from bdb_tools.readers import build_reader
-
+from bdb_tools.q07_utils import read_tables
 
 q07_HIGHER_PRICE_RATIO = 1.2
 # --store_sales date
@@ -51,44 +47,10 @@ def create_high_price_items_df(item_df):
     return high_price_items_df
 
 
-def read_tables(config):
-    table_reader = build_reader(
-        data_format=config["file_format"],
-        basepath=config["data_dir"],
-        split_row_groups=config["split_row_groups"],
-    )
-
-    item_cols = ["i_item_sk", "i_current_price", "i_category"]
-    store_sales_cols = ["ss_item_sk", "ss_customer_sk", "ss_sold_date_sk"]
-    store_cols = ["s_store_sk"]
-    date_cols = ["d_date_sk", "d_year", "d_moy"]
-    customer_cols = ["c_customer_sk", "c_current_addr_sk"]
-    customer_address_cols = ["ca_address_sk", "ca_state"]
-
-    item_df = table_reader.read("item", relevant_cols=item_cols)
-    store_sales_df = table_reader.read("store_sales", relevant_cols=store_sales_cols)
-    store_df = table_reader.read("store", relevant_cols=store_cols)
-    date_dim_df = table_reader.read("date_dim", relevant_cols=date_cols)
-    customer_df = table_reader.read("customer", relevant_cols=customer_cols)
-    customer_address_df = table_reader.read(
-        "customer_address", relevant_cols=customer_address_cols
-    )
-
-    return (
-        item_df,
-        store_sales_df,
-        store_df,
-        date_dim_df,
-        customer_df,
-        customer_address_df,
-    )
-
-
 def main(client, config):
     (
         item_df,
         store_sales_df,
-        store_df,
         date_dim_df,
         customer_df,
         customer_address_df,
@@ -155,8 +117,6 @@ def main(client, config):
 
 if __name__ == "__main__":
     from bdb_tools.cluster_startup import attach_to_cluster
-    import cudf
-    import dask_cudf
 
     config = gpubdb_argparser()
     client, bc = attach_to_cluster(config)
diff --git a/gpu_bdb/queries/q07/gpu_bdb_query_07_dask_sql.py b/gpu_bdb/queries/q07/gpu_bdb_query_07_dask_sql.py
new file mode 100755
index 00000000..8ff1e73f
--- /dev/null
+++ b/gpu_bdb/queries/q07/gpu_bdb_query_07_dask_sql.py
@@ -0,0 +1,74 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from bdb_tools.cluster_startup import attach_to_cluster
+
+from bdb_tools.utils import (
+    benchmark,
+    gpubdb_argparser,
+    run_query,
+)
+
+from bdb_tools.q07_utils import read_tables
+
+def main(data_dir, client, c, config):
+    benchmark(read_tables, config, c, dask_profile=config["dask_profile"])
+
+    query = """
+		WITH temp_table as 
+		(
+			SELECT k.i_item_sk
+			FROM item k,
+			(
+				SELECT i_category, 
+					SUM(j.i_current_price) / COUNT(j.i_current_price) * 1.2 AS avg_price
+				FROM item j
+				GROUP BY j.i_category
+			) avgCategoryPrice
+			WHERE avgCategoryPrice.i_category = k.i_category
+			AND k.i_current_price > avgCategoryPrice.avg_price 
+		)
+		SELECT ca_state, COUNT(*) AS cnt
+		FROM
+			customer_address a,
+			customer c,
+			store_sales s,
+			temp_table highPriceItems
+		WHERE a.ca_address_sk = c.c_current_addr_sk
+		AND c.c_customer_sk = s.ss_customer_sk
+		AND ca_state IS NOT NULL
+		AND ss_item_sk = highPriceItems.i_item_sk
+		AND s.ss_sold_date_sk IN
+		( 
+			SELECT d_date_sk
+			FROM date_dim
+			WHERE d_year = 2004
+			AND d_moy = 7
+		)
+		GROUP BY ca_state
+		HAVING COUNT(*) >= 10
+		ORDER BY cnt DESC, ca_state
+		LIMIT 10
+	"""
+
+    result = c.sql(query)
+    return result
+
+
+if __name__ == "__main__":
+	config = gpubdb_argparser()
+	client, c = attach_to_cluster(config, create_sql_context=True)
+	run_query(config=config, client=client, query_func=main, sql_context=c)
diff --git a/gpu_bdb/queries/q08/gpu_bdb_query_08.py b/gpu_bdb/queries/q08/gpu_bdb_query_08.py
index 686ea05b..451cbe9e 100755
--- a/gpu_bdb/queries/q08/gpu_bdb_query_08.py
+++ b/gpu_bdb/queries/q08/gpu_bdb_query_08.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,34 +14,33 @@
 # limitations under the License.
 #
 
-import sys
 import os
 import glob
 
+import cudf
+import dask_cudf
+
 from bdb_tools.utils import (
     benchmark,
     gpubdb_argparser,
     run_query,
-    convert_datestring_to_days,
+    convert_datestring_to_days
 )
-from bdb_tools.readers import build_reader
 from bdb_tools.merge_util import hash_merge
+from bdb_tools.q08_utils import (
+    get_sessions,
+    get_unique_sales_keys_from_sessions,
+    prep_for_sessionization,
+    q08_STARTDATE,
+    q08_ENDDATE,
+    read_tables
+)
 
 import numpy as np
 from distributed import wait
-import cupy as cp
-import rmm
 from dask import delayed
 
-
-q08_STARTDATE = "2001-09-02"
-q08_ENDDATE = "2002-09-02"
-q08_SECONDS_BEFORE_PURCHASE = 259200
-NA_FLAG = 0
-
-
 def etl_wcs(wcs_fn, filtered_date_df, web_page_df):
-    import cudf
 
     filtered_date_df = filtered_date_df
     web_page_df = web_page_df
@@ -81,125 +80,6 @@ def etl_wcs(wcs_fn, filtered_date_df, web_page_df):
     return merged_df[cols_to_keep]
 
 
-def get_session_id_from_session_boundary(session_change_df, last_session_len):
-    """
-        This function returns session starts given a session change df
-    """
-    import cudf
-
-    user_session_ids = session_change_df.tstamp_inSec
-
-    ### up shift the session length df
-    session_len = session_change_df["t_index"].diff().reset_index(drop=True)
-    session_len = session_len.shift(-1)
-
-    try:
-        session_len.iloc[-1] = last_session_len
-    except (AssertionError, IndexError) as e:  # IndexError in numba >= 0.48
-        session_len = cudf.Series([])
-
-    session_id_final_series = (
-        cudf.Series(user_session_ids).repeat(session_len).reset_index(drop=True)
-    )
-    return session_id_final_series
-
-
-def get_session_id(df):
-    """
-        This function creates a session id column for each click
-        The session id grows in incremeant for each user's susbequent session
-        Session boundry is defined by the time_out
-    """
-
-    df["user_change_flag"] = df["wcs_user_sk"].diff(periods=1) != 0
-    df["user_change_flag"] = df["user_change_flag"].fillna(True)
-    df["session_change_flag"] = df["review_flag"] | df["user_change_flag"]
-
-    df = df.reset_index(drop=True)
-    df["t_index"] = cp.arange(start=0, stop=len(df), dtype=np.int32)
-
-    session_change_df = df[df["session_change_flag"]].reset_index(drop=True)
-    try:
-        last_session_len = len(df) - session_change_df["t_index"].iloc[-1]
-    except (AssertionError, IndexError) as e:  # IndexError in numba >= 0.48
-        last_session_len = 0
-
-    session_ids = get_session_id_from_session_boundary(
-        session_change_df, last_session_len
-    )
-
-    assert len(session_ids) == len(df)
-    return session_ids
-
-
-def get_sessions(df):
-    df = df.sort_values(
-        by=["wcs_user_sk", "tstamp_inSec", "wcs_sales_sk", "wp_type_codes"]
-    ).reset_index(drop=True)
-    df["session_id"] = get_session_id(df)
-    return df
-
-
-def get_unique_sales_keys_from_sessions(sessionized, review_cat_code):
-    sessionized["relevant"] = (
-        (sessionized.tstamp_inSec - sessionized.session_id)
-        <= q08_SECONDS_BEFORE_PURCHASE
-    ) & (sessionized.wcs_sales_sk != NA_FLAG)
-    unique_sales_sk = (
-        sessionized.query(f"wcs_sales_sk != {NA_FLAG}")
-        .query("relevant == True")
-        .query(f"wp_type_codes != {review_cat_code}")
-        .wcs_sales_sk.unique()
-    )
-
-    return unique_sales_sk
-
-
-def prep_for_sessionization(df, review_cat_code):
-    df = df.fillna(NA_FLAG)
-    df = df.sort_values(
-        by=["wcs_user_sk", "tstamp_inSec", "wcs_sales_sk", "wp_type_codes"]
-    ).reset_index(drop=True)
-
-    review_df = df.loc[df["wp_type_codes"] == review_cat_code]
-    # per user, the index of the first review
-    # need this to decide if a review was "recent enough"
-    every_users_first_review = (
-        review_df[["wcs_user_sk", "tstamp_inSec"]]
-        .drop_duplicates()
-        .reset_index()
-        .groupby("wcs_user_sk")["index"]
-        .min()
-        .reset_index()
-    )
-    every_users_first_review.columns = ["wcs_user_sk", "first_review_index"]
-
-    # then reset the index to keep the old index before parallel join
-    df_merged = df.reset_index().merge(
-        every_users_first_review, how="left", on="wcs_user_sk"
-    )
-    df_filtered = df_merged.query("index >= first_review_index")
-    return df_filtered
-
-
-def read_tables(config):
-    table_reader = build_reader(
-        data_format=config["file_format"],
-        basepath=config["data_dir"],
-        split_row_groups=config["split_row_groups"],
-    )
-
-    date_dim_cols = ["d_date_sk", "d_date"]
-    web_page_cols = ["wp_web_page_sk", "wp_type"]
-    web_sales_cols = ["ws_net_paid", "ws_order_number", "ws_sold_date_sk"]
-
-    date_dim_df = table_reader.read("date_dim", relevant_cols=date_dim_cols)
-    web_page_df = table_reader.read("web_page", relevant_cols=web_page_cols)
-    web_sales_df = table_reader.read("web_sales", relevant_cols=web_sales_cols)
-
-    return (date_dim_df, web_page_df, web_sales_df)
-
-
 def reduction_function(df, REVIEW_CAT_CODE):
 
     # category code of review records
@@ -213,8 +93,6 @@ def reduction_function(df, REVIEW_CAT_CODE):
 
 
 def main(client, config):
-    import cudf
-    import dask_cudf
 
     (date_dim_df, web_page_df, web_sales_df) = benchmark(
         read_tables,
@@ -327,8 +205,6 @@ def main(client, config):
 
 if __name__ == "__main__":
     from bdb_tools.cluster_startup import attach_to_cluster
-    import cudf
-    import dask_cudf
 
     config = gpubdb_argparser()
     client, bc = attach_to_cluster(config)
diff --git a/gpu_bdb/queries/q08/gpu_bdb_query_08_dask_sql.py b/gpu_bdb/queries/q08/gpu_bdb_query_08_dask_sql.py
new file mode 100755
index 00000000..6a85bc1c
--- /dev/null
+++ b/gpu_bdb/queries/q08/gpu_bdb_query_08_dask_sql.py
@@ -0,0 +1,137 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from bdb_tools.cluster_startup import attach_to_cluster
+
+from bdb_tools.utils import (
+    benchmark,
+    gpubdb_argparser,
+    run_query,
+)
+
+from bdb_tools.q08_utils import (
+    get_sessions,
+    get_unique_sales_keys_from_sessions,
+    prep_for_sessionization,
+    q08_STARTDATE,
+    q08_ENDDATE,
+    read_tables
+)
+
+from dask.distributed import wait
+
+def main(data_dir, client, c, config):
+    benchmark(read_tables, config, c, dask_profile=config["dask_profile"])
+
+    query_1 = f"""
+        SELECT d_date_sk
+        FROM date_dim
+        WHERE CAST(d_date as date) IN (date '{q08_STARTDATE}',
+                                       date '{q08_ENDDATE}')
+        ORDER BY CAST(d_date as date) asc
+    """
+    result_dates_sk_filter = c.sql(query_1).compute()
+
+    # because `result_dates_sk_filter` has repetitive index
+    result_dates_sk_filter.index = list(range(0, result_dates_sk_filter.shape[0]))
+    q08_start_dt = result_dates_sk_filter['d_date_sk'][0]
+    q08_end_dt = result_dates_sk_filter['d_date_sk'][1]
+
+    query_aux = """
+        SELECT
+            wp_web_page_sk,
+            wp_type
+        FROM web_page
+    """
+    web_page_df = c.sql(query_aux)
+
+    # cast to minimum viable dtype
+    web_page_df["wp_type"] = web_page_df["wp_type"].map_partitions(
+        lambda ser: ser.astype("category")
+    )
+
+    cpu_categories = web_page_df["wp_type"].compute().cat.categories.to_pandas()
+    REVIEW_CAT_CODE = cpu_categories.get_loc("review")
+
+    web_page_df["wp_type_codes"] = web_page_df["wp_type"].cat.codes
+
+    web_page_newcols = ["wp_web_page_sk", "wp_type_codes"]
+    web_page_df = web_page_df[web_page_newcols]
+
+    web_page_df = web_page_df.persist()
+    wait(web_page_df)
+    c.create_table('web_page_2', web_page_df, persist=False)
+
+    query_2 = f"""
+        SELECT
+            CAST(wcs_user_sk AS INTEGER) AS wcs_user_sk,
+            (wcs_click_date_sk * 86400 + wcs_click_time_sk) AS tstamp_inSec,
+            wcs_sales_sk,
+            wp_type_codes
+        FROM web_clickstreams
+        INNER JOIN web_page_2 ON wcs_web_page_sk = wp_web_page_sk
+        WHERE wcs_user_sk IS NOT NULL
+        AND wcs_click_date_sk BETWEEN {q08_start_dt} AND {q08_end_dt}
+        --in the future we want to remove this ORDER BY
+        DISTRIBUTE BY wcs_user_sk
+    """
+    merged_df = c.sql(query_2)
+
+    c.drop_table("web_page_2")
+    del web_page_df
+    
+    merged_df = merged_df.shuffle(on=["wcs_user_sk"])
+    merged_df["review_flag"] = merged_df.wp_type_codes == REVIEW_CAT_CODE
+
+    prepped = merged_df.map_partitions(
+        prep_for_sessionization, review_cat_code=REVIEW_CAT_CODE
+    )
+
+    sessionized = prepped.map_partitions(get_sessions)
+
+    unique_review_sales = sessionized.map_partitions(
+        get_unique_sales_keys_from_sessions, review_cat_code=REVIEW_CAT_CODE
+    )
+    
+    unique_review_sales = unique_review_sales.to_frame()
+
+    unique_review_sales = unique_review_sales.persist()
+    wait(unique_review_sales)
+    c.create_table("reviews", unique_review_sales, persist=False)
+    last_query = f"""
+        SELECT
+            CAST(review_total AS BIGINT) AS q08_review_sales_amount,
+            CAST(total - review_total AS BIGINT) AS no_q08_review_sales_amount
+        FROM
+        (
+            SELECT
+            SUM(ws_net_paid) AS total,
+            SUM(CASE when wcs_sales_sk IS NULL THEN 0 ELSE 1 END * ws_net_paid) AS review_total
+            FROM web_sales
+            LEFT OUTER JOIN reviews ON ws_order_number = wcs_sales_sk
+            WHERE ws_sold_date_sk between {q08_start_dt} AND {q08_end_dt}
+        )
+    """
+    result = c.sql(last_query)
+
+    c.drop_table("reviews")
+    return result
+
+
+if __name__ == "__main__":
+    config = gpubdb_argparser()
+    client, c = attach_to_cluster(config, create_sql_context=True)
+    run_query(config=config, client=client, query_func=main, sql_context=c)
diff --git a/gpu_bdb/queries/q09/gpu_bdb_query_09.py b/gpu_bdb/queries/q09/gpu_bdb_query_09.py
index 8c4bc9d8..0d44b61a 100755
--- a/gpu_bdb/queries/q09/gpu_bdb_query_09.py
+++ b/gpu_bdb/queries/q09/gpu_bdb_query_09.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,86 +14,44 @@
 # limitations under the License.
 #
 
-from dask.distributed import Client
-import sys
+import cudf
 
 from bdb_tools.utils import (
     benchmark,
     gpubdb_argparser,
     run_query,
 )
-from bdb_tools.readers import build_reader
-
-
-def read_tables(config):
-    table_reader = build_reader(
-        data_format=config["file_format"],
-        basepath=config["data_dir"],
-        split_row_groups=config["split_row_groups"],
-    )
-
-    ss_columns = [
-        "ss_quantity",
-        "ss_sold_date_sk",
-        "ss_addr_sk",
-        "ss_store_sk",
-        "ss_cdemo_sk",
-        "ss_sales_price",
-        "ss_net_profit",
-    ]
-
-    store_sales = table_reader.read("store_sales", relevant_cols=ss_columns)
-
-    ca_columns = ["ca_address_sk", "ca_country", "ca_state"]
-    customer_address = table_reader.read("customer_address", relevant_cols=ca_columns)
-
-    cd_columns = ["cd_demo_sk", "cd_marital_status", "cd_education_status"]
-    customer_demographics = table_reader.read(
-        "customer_demographics", relevant_cols=cd_columns
-    )
-
-    dd_columns = ["d_year", "d_date_sk"]
-    date_dim = table_reader.read("date_dim", relevant_cols=dd_columns)
-
-    s_columns = ["s_store_sk"]
-    store = table_reader.read("store", relevant_cols=s_columns)
-
-    return store_sales, customer_address, customer_demographics, date_dim, store
 
+from bdb_tools.q09_utils import (
+    q09_year,
+    q09_part1_ca_country,
+    q09_part1_ca_state_IN,
+    q09_part1_net_profit_min,
+    q09_part1_net_profit_max,
+    q09_part1_education_status,
+    q09_part1_marital_status,
+    q09_part1_sales_price_min,
+    q09_part1_sales_price_max,
+    q09_part2_ca_country,
+    q09_part2_ca_state_IN,
+    q09_part2_net_profit_min,
+    q09_part2_net_profit_max,
+    q09_part2_education_status,
+    q09_part2_marital_status,
+    q09_part2_sales_price_min,
+    q09_part2_sales_price_max,
+    q09_part3_ca_country,
+    q09_part3_ca_state_IN,
+    q09_part3_net_profit_min,
+    q09_part3_net_profit_max,
+    q09_part3_education_status,
+    q09_part3_marital_status,
+    q09_part3_sales_price_min,
+    q09_part3_sales_price_max,
+    read_tables
+)
 
 def main(client, config):
-    import cudf
-
-    # Conf variables
-
-    q09_year = 2001
-
-    q09_part1_ca_country = "United States"
-    q09_part1_ca_state_IN = "KY", "GA", "NM"
-    q09_part1_net_profit_min = 0
-    q09_part1_net_profit_max = 2000
-    q09_part1_education_status = "4 yr Degree"
-    q09_part1_marital_status = "M"
-    q09_part1_sales_price_min = 100
-    q09_part1_sales_price_max = 150
-
-    q09_part2_ca_country = "United States"
-    q09_part2_ca_state_IN = "MT", "OR", "IN"
-    q09_part2_net_profit_min = 150
-    q09_part2_net_profit_max = 3000
-    q09_part2_education_status = "4 yr Degree"
-    q09_part2_marital_status = "M"
-    q09_part2_sales_price_min = 50
-    q09_part2_sales_price_max = 200
-
-    q09_part3_ca_country = "United States"
-    q09_part3_ca_state_IN = "WI", "MO", "WV"
-    q09_part3_net_profit_min = 50
-    q09_part3_net_profit_max = 25000
-    q09_part3_education_status = "4 yr Degree"
-    q09_part3_marital_status = "M"
-    q09_part3_sales_price_min = 150
-    q09_part3_sales_price_max = 200
 
     (
         store_sales,
@@ -208,8 +166,6 @@ def main(client, config):
 
 if __name__ == "__main__":
     from bdb_tools.cluster_startup import attach_to_cluster
-    import cudf
-    import dask_cudf
 
     config = gpubdb_argparser()
     client, bc = attach_to_cluster(config)
diff --git a/gpu_bdb/queries/q09/gpu_bdb_query_09_dask_sql.py b/gpu_bdb/queries/q09/gpu_bdb_query_09_dask_sql.py
new file mode 100755
index 00000000..16e71c7c
--- /dev/null
+++ b/gpu_bdb/queries/q09/gpu_bdb_query_09_dask_sql.py
@@ -0,0 +1,125 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from bdb_tools.cluster_startup import attach_to_cluster
+
+from bdb_tools.utils import (
+    benchmark,
+    gpubdb_argparser,
+    run_query,
+)
+
+from bdb_tools.q09_utils import (
+    q09_year,
+    q09_part1_ca_country,
+    q09_part1_ca_state_IN,
+    q09_part1_net_profit_min,
+    q09_part1_net_profit_max,
+    q09_part1_education_status,
+    q09_part1_marital_status,
+    q09_part1_sales_price_min,
+    q09_part1_sales_price_max,
+    q09_part2_ca_country,
+    q09_part2_ca_state_IN,
+    q09_part2_net_profit_min,
+    q09_part2_net_profit_max,
+    q09_part2_education_status,
+    q09_part2_marital_status,
+    q09_part2_sales_price_min,
+    q09_part2_sales_price_max,
+    q09_part3_ca_country,
+    q09_part3_ca_state_IN,
+    q09_part3_net_profit_min,
+    q09_part3_net_profit_max,
+    q09_part3_education_status,
+    q09_part3_marital_status,
+    q09_part3_sales_price_min,
+    q09_part3_sales_price_max,
+    read_tables
+)
+
+
+def main(data_dir, client, c, config):
+    benchmark(read_tables, config, c, dask_profile=config["dask_profile"])
+
+    query = f"""
+        SELECT SUM(ss1.ss_quantity)
+        FROM store_sales ss1,
+            date_dim dd,customer_address ca1,
+            store s,
+            customer_demographics cd
+        -- select date range
+        WHERE ss1.ss_sold_date_sk = dd.d_date_sk
+        AND dd.d_year = {q09_year}
+        AND ss1.ss_addr_sk = ca1.ca_address_sk
+        AND s.s_store_sk = ss1.ss_store_sk
+        AND cd.cd_demo_sk = ss1.ss_cdemo_sk
+        AND
+        (
+            (
+                cd.cd_marital_status = '{q09_part1_marital_status}'
+                AND cd.cd_education_status = '{q09_part1_education_status}'
+                AND {q09_part1_sales_price_min} <= ss1.ss_sales_price
+                AND ss1.ss_sales_price <= {q09_part1_sales_price_max}
+            )
+            OR
+            (
+                cd.cd_marital_status = '{q09_part2_marital_status}'
+                AND cd.cd_education_status = '{q09_part2_education_status}'
+                AND {q09_part2_sales_price_min} <= ss1.ss_sales_price
+                AND ss1.ss_sales_price <= {q09_part2_sales_price_max}
+            )
+            OR
+            (
+                cd.cd_marital_status = '{q09_part3_marital_status}'
+                AND cd.cd_education_status = '{q09_part3_education_status}'
+                AND {q09_part3_sales_price_min} <= ss1.ss_sales_price
+                AND ss1.ss_sales_price <= {q09_part3_sales_price_max}
+            )
+        )
+        AND
+        (
+            (
+                ca1.ca_country = '{q09_part1_ca_country}'
+                AND ca1.ca_state IN {q09_part1_ca_state_IN}
+                AND {q09_part1_net_profit_min} <= ss1.ss_net_profit
+                AND ss1.ss_net_profit <= {q09_part1_net_profit_max}
+            )
+            OR
+            (
+                ca1.ca_country = '{q09_part2_ca_country}'
+                AND ca1.ca_state IN {q09_part2_ca_state_IN}
+                AND {q09_part2_net_profit_min} <= ss1.ss_net_profit
+                AND ss1.ss_net_profit <= {q09_part2_net_profit_max}
+            )
+            OR
+            (
+                ca1.ca_country = '{q09_part3_ca_country}'
+                AND ca1.ca_state IN {q09_part3_ca_state_IN}
+                AND {q09_part3_net_profit_min} <= ss1.ss_net_profit
+                AND ss1.ss_net_profit <= {q09_part3_net_profit_max}
+            )
+        )
+    """
+    result = c.sql(query)
+    result.columns = ["sum(ss_quantity)"]
+    return result
+
+
+if __name__ == "__main__":
+    config = gpubdb_argparser()
+    client, c = attach_to_cluster(config, create_sql_context=True)
+    run_query(config=config, client=client, query_func=main, sql_context=c)
diff --git a/gpu_bdb/queries/q10/gpu_bdb_query_10.py b/gpu_bdb/queries/q10/gpu_bdb_query_10.py
index cb24ef88..4cc3e833 100755
--- a/gpu_bdb/queries/q10/gpu_bdb_query_10.py
+++ b/gpu_bdb/queries/q10/gpu_bdb_query_10.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,47 +14,25 @@
 # limitations under the License.
 #
 
-import sys
 import os
 
+import cudf
+import dask_cudf
+
 from bdb_tools.utils import (
     benchmark,
     gpubdb_argparser,
     run_query,
 )
 from bdb_tools.text import create_sentences_from_reviews, create_words_from_sentences
+from bdb_tools.q10_utils import (
+    eol_char,
+    read_tables
+)
 
-
-import rmm
-import cupy as cp
-import distributed
-
-from bdb_tools.readers import build_reader
-from dask.distributed import Client, wait
-
-
-# -------- Q10 -----------
-eol_char = "è"
-
-
-def read_tables(config):
-
-    ### splitting by row groups for better parallelism
-    table_reader = build_reader(
-        data_format=config["file_format"],
-        basepath=config["data_dir"],
-        split_row_groups=True,
-    )
-    product_reviews_cols = ["pr_item_sk", "pr_review_content", "pr_review_sk"]
-
-    product_reviews_df = table_reader.read(
-        "product_reviews", relevant_cols=product_reviews_cols,
-    )
-    return product_reviews_df
-
+from dask.distributed import wait
 
 def load_sentiment_words(filename, sentiment):
-    import cudf
 
     with open(filename) as fh:
         sentiment_words = list(map(str.strip, fh.readlines()))
@@ -67,8 +45,6 @@ def load_sentiment_words(filename, sentiment):
 
 
 def main(client, config):
-    import cudf
-    import dask_cudf
 
     product_reviews_df = benchmark(
         read_tables,
@@ -150,8 +126,6 @@ def main(client, config):
 
 if __name__ == "__main__":
     from bdb_tools.cluster_startup import attach_to_cluster
-    import cudf
-    import dask_cudf
 
     config = gpubdb_argparser()
     client, bc = attach_to_cluster(config)
diff --git a/gpu_bdb/queries/q10/gpu_bdb_query_10_dask_sql.py b/gpu_bdb/queries/q10/gpu_bdb_query_10_dask_sql.py
new file mode 100755
index 00000000..64dba763
--- /dev/null
+++ b/gpu_bdb/queries/q10/gpu_bdb_query_10_dask_sql.py
@@ -0,0 +1,146 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+
+import dask_cudf
+
+from bdb_tools.cluster_startup import attach_to_cluster
+
+from bdb_tools.utils import (
+    benchmark,
+    gpubdb_argparser,
+    run_query,
+)
+
+from bdb_tools.text import (
+    create_sentences_from_reviews,
+    create_words_from_sentences
+)
+
+from bdb_tools.q10_utils import (
+    eol_char,
+    read_tables
+)
+
+from dask.distributed import wait
+
+def main(data_dir, client, c, config):
+    benchmark(read_tables, config, c, dask_profile=config["dask_profile"])
+
+    query_1 = """
+        SELECT pr_item_sk,
+            pr_review_content,
+            pr_review_sk
+        FROM product_reviews
+        where pr_review_content IS NOT NULL
+        ORDER BY pr_item_sk, pr_review_content, pr_review_sk
+    """
+    product_reviews_df = c.sql(query_1)
+
+    product_reviews_df[
+        "pr_review_content"
+    ] = product_reviews_df.pr_review_content.str.lower()
+    product_reviews_df[
+        "pr_review_content"
+    ] = product_reviews_df.pr_review_content.str.replace(
+        [".", "?", "!"], [eol_char], regex=False
+    )
+
+    sentences = product_reviews_df.map_partitions(create_sentences_from_reviews)
+
+    product_reviews_df = product_reviews_df[["pr_item_sk", "pr_review_sk"]]
+    product_reviews_df["pr_review_sk"] = product_reviews_df["pr_review_sk"].astype("int32")
+
+    # need the global position in the sentence tokenized df
+    sentences["x"] = 1
+    sentences["sentence_tokenized_global_pos"] = sentences.x.cumsum()
+    del sentences["x"]
+
+    word_df = sentences.map_partitions(
+        create_words_from_sentences,
+        global_position_column="sentence_tokenized_global_pos",
+    )
+
+    product_reviews_df = product_reviews_df.persist()
+    wait(product_reviews_df)
+    c.create_table('product_reviews_df', product_reviews_df, persist=False)
+    
+    sentences = sentences.persist()
+    wait(sentences)
+    c.create_table('sentences', sentences, persist=False)
+
+    # These files come from the official TPCx-BB kit
+    # We extracted them from bigbenchqueriesmr.jar
+    # Need to pass the absolute path for these txt files
+    sentiment_dir = os.path.join(config["data_dir"], "sentiment_files")
+    ns_df = dask_cudf.read_csv(os.path.join(sentiment_dir, "negativeSentiment.txt"), names=["sentiment_word"], persist=False)
+    c.create_table('negative_sentiment', ns_df, persist=False)
+    ps_df = dask_cudf.read_csv(os.path.join(sentiment_dir, "positiveSentiment.txt"), names=["sentiment_word"], persist=False)
+    c.create_table('positive_sentiment', ps_df, persist=False)
+
+    word_df = word_df.persist()
+    wait(word_df)
+    c.create_table('word_df', word_df, persist=False)
+
+    query = '''
+        SELECT pr_item_sk as item_sk,
+            sentence as review_sentence,
+            sentiment,
+            sentiment_word FROM
+        (
+            SELECT review_idx_global_pos,
+                sentiment_word,
+                sentiment,
+                sentence FROM
+            (
+                WITH sent_df AS
+                (
+                    (SELECT sentiment_word, 'POS' as sentiment
+                        FROM positive_sentiment
+                        GROUP BY sentiment_word)
+                    UNION ALL
+                    (SELECT sentiment_word, 'NEG' as sentiment
+                        FROM negative_sentiment
+                        GROUP BY sentiment_word)
+                )
+                SELECT * FROM word_df
+                INNER JOIN sent_df
+                ON word_df.word = sent_df.sentiment_word
+            ) word_sentence_sentiment
+            LEFT JOIN sentences
+            ON word_sentence_sentiment.sentence_idx_global_pos = sentences.sentence_tokenized_global_pos
+        ) temp
+        INNER JOIN product_reviews_df
+        ON temp.review_idx_global_pos = product_reviews_df.pr_review_sk
+        ORDER BY item_sk, review_sentence, sentiment, sentiment_word
+    '''
+    result = c.sql(query)
+
+    c.drop_table("product_reviews_df")
+    del product_reviews_df
+    c.drop_table("sentences")
+    del sentences
+    c.drop_table("word_df")
+    del word_df
+
+    return result
+
+
+if __name__ == "__main__":
+    config = gpubdb_argparser()
+    client, c = attach_to_cluster(config, create_sql_context=True)
+    run_query(config=config, client=client, query_func=main, sql_context=c)
diff --git a/gpu_bdb/queries/q11/gpu_bdb_query_11.py b/gpu_bdb/queries/q11/gpu_bdb_query_11.py
index 6ff0b5b3..224daf40 100755
--- a/gpu_bdb/queries/q11/gpu_bdb_query_11.py
+++ b/gpu_bdb/queries/q11/gpu_bdb_query_11.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,8 +14,7 @@
 # limitations under the License.
 #
 
-import sys
-
+import cudf
 
 from bdb_tools.utils import (
     benchmark,
@@ -23,46 +22,15 @@
     run_query,
     convert_datestring_to_days,
 )
-from bdb_tools.readers import build_reader
 
-from numba import cuda
-import numpy as np
+from bdb_tools.q11_utils import read_tables
 
+import numpy as np
 
 q11_start_date = "2003-01-02"
 q11_end_date = "2003-02-02"
 
-
-def read_tables(config):
-    table_reader = build_reader(
-        data_format=config["file_format"],
-        basepath=config["data_dir"],
-        split_row_groups=config["split_row_groups"],
-    )
-
-    product_review_cols = [
-        "pr_review_rating",
-        "pr_item_sk",
-    ]
-    web_sales_cols = [
-        "ws_sold_date_sk",
-        "ws_net_paid",
-        "ws_item_sk",
-    ]
-    date_cols = ["d_date_sk", "d_date"]
-
-    pr_df = table_reader.read("product_reviews", relevant_cols=product_review_cols)
-    # we only read int columns here so it should scale up to sf-10k as just 26M rows
-    pr_df = pr_df.repartition(npartitions=1)
-
-    ws_df = table_reader.read("web_sales", relevant_cols=web_sales_cols)
-    date_df = table_reader.read("date_dim", relevant_cols=date_cols)
-
-    return pr_df, ws_df, date_df
-
-
 def main(client, config):
-    import cudf
 
     pr_df, ws_df, date_df = benchmark(
         read_tables,
@@ -125,8 +93,6 @@ def main(client, config):
 
 if __name__ == "__main__":
     from bdb_tools.cluster_startup import attach_to_cluster
-    import cudf
-    import dask_cudf
 
     config = gpubdb_argparser()
     client, bc = attach_to_cluster(config)
diff --git a/gpu_bdb/queries/q11/gpu_bdb_query_11_dask_sql.py b/gpu_bdb/queries/q11/gpu_bdb_query_11_dask_sql.py
new file mode 100755
index 00000000..b5d41715
--- /dev/null
+++ b/gpu_bdb/queries/q11/gpu_bdb_query_11_dask_sql.py
@@ -0,0 +1,67 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from bdb_tools.cluster_startup import attach_to_cluster
+import cudf
+
+from bdb_tools.utils import (
+    benchmark,
+    gpubdb_argparser,
+    run_query,
+)
+
+from bdb_tools.q11_utils import read_tables
+
+def main(data_dir, client, c, config):
+    benchmark(read_tables, config, c, dask_profile=config["dask_profile"])
+
+    query = """
+        WITH p AS
+        (
+            SELECT
+                pr_item_sk,
+                count(pr_item_sk) AS r_count,
+                AVG( CAST(pr_review_rating AS DOUBLE) ) avg_rating  
+            FROM product_reviews
+            WHERE pr_item_sk IS NOT NULL
+            GROUP BY pr_item_sk
+        ), s AS
+        (
+            SELECT
+                ws_item_sk
+            FROM web_sales ws
+            INNER JOIN date_dim d ON ws.ws_sold_date_sk = d.d_date_sk
+            WHERE ws_item_sk IS NOT null
+            AND CAST(d.d_date AS DATE) >= DATE '2003-01-02'
+            AND CAST(d.d_date AS DATE) <= DATE '2003-02-02'
+            GROUP BY ws_item_sk
+        )
+        SELECT p.r_count    AS x,
+            p.avg_rating AS y
+        FROM s INNER JOIN p ON p.pr_item_sk = s.ws_item_sk
+    """
+
+    result = c.sql(query)
+    sales_corr = result["x"].corr(result["y"]).compute()
+    result_df = cudf.DataFrame([sales_corr])
+    result_df.columns = ["corr(CAST(reviews_count AS DOUBLE), avg_rating)"]
+    return result_df
+
+
+if __name__ == "__main__":
+    config = gpubdb_argparser()
+    client, c = attach_to_cluster(config, create_sql_context=True)
+    run_query(config=config, client=client, query_func=main, sql_context=c)
diff --git a/gpu_bdb/queries/q12/gpu_bdb_query_12.py b/gpu_bdb/queries/q12/gpu_bdb_query_12.py
index e912c6f3..39d05a42 100755
--- a/gpu_bdb/queries/q12/gpu_bdb_query_12.py
+++ b/gpu_bdb/queries/q12/gpu_bdb_query_12.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,16 +14,18 @@
 # limitations under the License.
 #
 
-import sys
 import os
 import glob
 
+import cudf
+import dask_cudf
+
 from bdb_tools.utils import (
     benchmark,
     gpubdb_argparser,
     run_query,
 )
-from bdb_tools.readers import build_reader
+from bdb_tools.q12_utils import read_tables
 
 from distributed import wait
 import numpy as np
@@ -36,17 +38,11 @@
 
 
 ### These parameters are not used
-# q12_startDate='2001-09-02'
-# q12_endDate1='2001-10-02'
-# q12_endDate2='2001-12-02'
 q12_i_category_IN = ["Books", "Electronics"]
 
 ### below was hard coded in the orignal query
 q12_store_sale_sk_start_date = 37134
 
-item_cols = ["i_item_sk", "i_category"]
-store_sales_cols = ["ss_item_sk", "ss_sold_date_sk", "ss_customer_sk"]
-
 ### Util Functions
 def string_filter(df, col_name, col_values):
     """
@@ -63,19 +59,6 @@ def string_filter(df, col_name, col_values):
     return df[bool_flag].reset_index(drop=True)
 
 
-def read_tables(config):
-    table_reader = build_reader(
-        data_format=config["file_format"],
-        basepath=config["data_dir"],
-        split_row_groups=config["split_row_groups"],
-    )
-
-    item_df = table_reader.read("item", relevant_cols=item_cols)
-    store_sales_df = table_reader.read("store_sales", relevant_cols=store_sales_cols)
-
-    return item_df, store_sales_df
-
-
 def filter_wcs_table(web_clickstreams_fn, filtered_item_df):
     """
         Filter web clickstreams table
@@ -90,7 +73,6 @@ def filter_wcs_table(web_clickstreams_fn, filtered_item_df):
         ##    AND wcs_user_sk IS NOT NULL
         ###   AND wcs_sales_sk IS NULL --only views, not purchases
     """
-    import cudf
 
     web_clickstreams_cols = [
         "wcs_user_sk",
@@ -150,7 +132,6 @@ def filter_ss_table(store_sales_df, filtered_item_df):
 
 
 def main(client, config):
-    import cudf, dask_cudf
 
     item_df, store_sales_df = benchmark(
         read_tables,
@@ -242,8 +223,6 @@ def main(client, config):
 
 if __name__ == "__main__":
     from bdb_tools.cluster_startup import attach_to_cluster
-    import cudf
-    import dask_cudf
 
     config = gpubdb_argparser()
     client, bc = attach_to_cluster(config)
diff --git a/gpu_bdb/queries/q12/gpu_bdb_query_12_dask_sql.py b/gpu_bdb/queries/q12/gpu_bdb_query_12_dask_sql.py
new file mode 100755
index 00000000..2656553a
--- /dev/null
+++ b/gpu_bdb/queries/q12/gpu_bdb_query_12_dask_sql.py
@@ -0,0 +1,67 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from bdb_tools.cluster_startup import attach_to_cluster
+
+from bdb_tools.utils import (
+    benchmark,
+    gpubdb_argparser,
+    run_query,
+)
+
+from bdb_tools.q12_utils import read_tables
+
+q12_i_category_IN = "'Books', 'Electronics'"
+
+def main(data_dir, client, c, config):
+    benchmark(read_tables, config, c, dask_profile=config["dask_profile"])
+
+    query = f"""
+        SELECT DISTINCT wcs_user_sk
+        FROM
+        (
+            SELECT DISTINCT
+                wcs_user_sk,
+                wcs_click_date_sk
+            FROM web_clickstreams, item
+            WHERE wcs_click_date_sk BETWEEN 37134 AND 37164
+            AND i_category IN ({q12_i_category_IN})
+            AND wcs_item_sk = i_item_sk
+            AND wcs_user_sk IS NOT NULL
+            AND wcs_sales_sk IS NULL
+        ) webInRange,
+        (
+            SELECT DISTINCT
+                ss_customer_sk,
+                ss_sold_date_sk
+            FROM store_sales, item
+            WHERE ss_sold_date_sk BETWEEN 37134 AND 37224
+            AND i_category IN ({q12_i_category_IN}) -- filter given category
+            AND ss_item_sk = i_item_sk
+            AND ss_customer_sk IS NOT NULL
+        ) storeInRange
+        WHERE wcs_user_sk = ss_customer_sk
+        AND wcs_click_date_sk < ss_sold_date_sk
+        ORDER BY wcs_user_sk
+    """
+    result = c.sql(query)
+    return result
+
+
+if __name__ == "__main__":
+    config = gpubdb_argparser()
+    client, c = attach_to_cluster(config, create_sql_context=True)
+    run_query(config=config, client=client, query_func=main, sql_context=c)
diff --git a/gpu_bdb/queries/q13/gpu_bdb_query_13.py b/gpu_bdb/queries/q13/gpu_bdb_query_13.py
index a61dbfec..79afefe9 100755
--- a/gpu_bdb/queries/q13/gpu_bdb_query_13.py
+++ b/gpu_bdb/queries/q13/gpu_bdb_query_13.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,14 +14,13 @@
 # limitations under the License.
 #
 
-import sys
-
 from bdb_tools.utils import (
     benchmark,
     gpubdb_argparser,
     run_query,
 )
-from bdb_tools.readers import build_reader
+from bdb_tools.q13_utils import read_tables
+
 from distributed import wait
 
 
@@ -46,28 +45,6 @@ def get_sales_ratio(df):
     return df
 
 
-def read_tables(config):
-    table_reader = build_reader(
-        data_format=config["file_format"],
-        basepath=config["data_dir"],
-        split_row_groups=config["split_row_groups"],
-    )
-
-    date_cols = ["d_date_sk", "d_year"]
-    date_dim_df = table_reader.read("date_dim", relevant_cols=date_cols)
-
-    customer_cols = ["c_customer_sk", "c_customer_id", "c_first_name", "c_last_name"]
-    customer_df = table_reader.read("customer", relevant_cols=customer_cols)
-
-    s_sales_cols = ["ss_sold_date_sk", "ss_customer_sk", "ss_net_paid"]
-    s_sales_df = table_reader.read("store_sales", relevant_cols=s_sales_cols)
-
-    w_sales_cols = ["ws_sold_date_sk", "ws_bill_customer_sk", "ws_net_paid"]
-    web_sales_df = table_reader.read("web_sales", relevant_cols=w_sales_cols)
-
-    return date_dim_df, customer_df, s_sales_df, web_sales_df
-
-
 def main(client, config):
     date_dim_df, customer_df, s_sales_df, web_sales_df = benchmark(
         read_tables,
@@ -212,8 +189,6 @@ def main(client, config):
 
 if __name__ == "__main__":
     from bdb_tools.cluster_startup import attach_to_cluster
-    import cudf
-    import dask_cudf
 
     config = gpubdb_argparser()
     client, bc = attach_to_cluster(config)
diff --git a/gpu_bdb/queries/q13/gpu_bdb_query_13_dask_sql.py b/gpu_bdb/queries/q13/gpu_bdb_query_13_dask_sql.py
new file mode 100644
index 00000000..19c501f9
--- /dev/null
+++ b/gpu_bdb/queries/q13/gpu_bdb_query_13_dask_sql.py
@@ -0,0 +1,101 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from bdb_tools.cluster_startup import attach_to_cluster
+
+from bdb_tools.utils import (
+    benchmark,
+    gpubdb_argparser,
+    run_query,
+)
+
+from bdb_tools.q13_utils import read_tables
+
+from dask.distributed import wait
+
+def main(data_dir, client, c, config):
+    benchmark(read_tables, config, c, dask_profile=config["dask_profile"])
+
+    query_1 = """
+		SELECT
+			ss.ss_customer_sk AS customer_sk,
+			sum( case when (d_year = 2001) THEN ss_net_paid ELSE 0.0 END) first_year_total,
+			sum( case when (d_year = 2002) THEN ss_net_paid ELSE 0.0 END) second_year_total
+		FROM store_sales ss
+		JOIN 
+		(
+			SELECT d_date_sk, d_year
+			FROM date_dim d
+			WHERE d.d_year in (2001, 2002)
+		) dd on ( ss.ss_sold_date_sk = dd.d_date_sk )
+		GROUP BY ss.ss_customer_sk 
+		HAVING sum( case when (d_year = 2001) THEN ss_net_paid ELSE 0.0 END) > 0.0
+	"""
+    temp_table1 = c.sql(query_1)
+
+    temp_table1 = temp_table1.persist()
+    wait(temp_table1)
+    c.create_table("temp_table1", temp_table1, persist=False)
+    query_2 = """
+		SELECT
+			ws.ws_bill_customer_sk AS customer_sk,
+			sum( case when (d_year = 2001) THEN ws_net_paid ELSE 0.0 END) first_year_total,
+			sum( case when (d_year = 2002) THEN ws_net_paid ELSE 0.0 END) second_year_total
+		FROM web_sales ws
+		JOIN 
+		(
+			SELECT d_date_sk, d_year
+			FROM date_dim d
+			WHERE d.d_year in (2001, 2002)
+		) dd ON ( ws.ws_sold_date_sk = dd.d_date_sk )
+		GROUP BY ws.ws_bill_customer_sk 
+		HAVING sum( case when (d_year = 2001) THEN ws_net_paid ELSE 0.0 END) > 0.0
+	"""
+    temp_table2 = c.sql(query_2)
+
+    temp_table2 = temp_table2.persist()
+    wait(temp_table2)
+    c.create_table("temp_table2", temp_table2, persist=False)
+    query = """
+		SELECT
+			CAST(c_customer_sk AS BIGINT) as c_customer_sk,
+			c_first_name,
+			c_last_name,
+			(store.second_year_total / store.first_year_total) AS storeSalesIncreaseRatio,
+			(web.second_year_total / web.first_year_total) AS webSalesIncreaseRatio 
+		FROM temp_table1 store,
+			temp_table2 web,
+			customer c
+		WHERE store.customer_sk = web.customer_sk
+		AND web.customer_sk = c_customer_sk
+		AND (web.second_year_total / web.first_year_total) > (store.second_year_total / store.first_year_total) 
+		ORDER BY webSalesIncreaseRatio DESC,
+			c_customer_sk,
+			c_first_name,
+			c_last_name
+		LIMIT 100
+    """
+    result = c.sql(query)
+
+    c.drop_table("temp_table1")
+    c.drop_table("temp_table2")
+    return result
+
+
+if __name__ == "__main__":
+	config = gpubdb_argparser()
+	client, c = attach_to_cluster(config, create_sql_context=True)
+	run_query(config=config, client=client, query_func=main, sql_context=c)
diff --git a/gpu_bdb/queries/q14/gpu_bdb_query_14.py b/gpu_bdb/queries/q14/gpu_bdb_query_14.py
index 52cbc09f..2549910b 100755
--- a/gpu_bdb/queries/q14/gpu_bdb_query_14.py
+++ b/gpu_bdb/queries/q14/gpu_bdb_query_14.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,46 +14,18 @@
 # limitations under the License.
 #
 
-from dask.distributed import Client
-
 import numpy as np
-import sys
 
+import cudf
 
 from bdb_tools.utils import (
     benchmark,
     gpubdb_argparser,
     run_query,
 )
-from bdb_tools.readers import build_reader
-
-
-def read_tables(config):
-    table_reader = build_reader(
-        data_format=config["file_format"],
-        basepath=config["data_dir"],
-        split_row_groups=config["split_row_groups"],
-    )
-
-    ws_columns = ["ws_ship_hdemo_sk", "ws_web_page_sk", "ws_sold_time_sk"]
-    web_sales = table_reader.read("web_sales", relevant_cols=ws_columns)
-
-    hd_columns = ["hd_demo_sk", "hd_dep_count"]
-    household_demographics = table_reader.read(
-        "household_demographics", relevant_cols=hd_columns
-    )
-
-    wp_columns = ["wp_web_page_sk", "wp_char_count"]
-    web_page = table_reader.read("web_page", relevant_cols=wp_columns)
-
-    td_columns = ["t_time_sk", "t_hour"]
-    time_dim = table_reader.read("time_dim", relevant_cols=td_columns)
-
-    return web_sales, household_demographics, web_page, time_dim
-
+from bdb_tools.q14_utils import read_tables
 
 def main(client, config):
-    import cudf
 
     q14_dependents = 5
     q14_morning_startHour = 7
@@ -140,8 +112,6 @@ def main(client, config):
 
 if __name__ == "__main__":
     from bdb_tools.cluster_startup import attach_to_cluster
-    import cudf
-    import dask_cudf
 
     config = gpubdb_argparser()
     client, bc = attach_to_cluster(config)
diff --git a/gpu_bdb/queries/q14/gpu_bdb_query_14_dask_sql.py b/gpu_bdb/queries/q14/gpu_bdb_query_14_dask_sql.py
new file mode 100755
index 00000000..ca6850f2
--- /dev/null
+++ b/gpu_bdb/queries/q14/gpu_bdb_query_14_dask_sql.py
@@ -0,0 +1,56 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from bdb_tools.cluster_startup import attach_to_cluster
+
+from bdb_tools.utils import (
+    benchmark,
+    gpubdb_argparser,
+    run_query,
+)
+
+from bdb_tools.q14_utils import read_tables
+
+def main(data_dir, client, c, config):
+    benchmark(read_tables, config, c, dask_profile=config["dask_profile"])
+
+    query = """ 
+		SELECT CASE WHEN pmc > 0.0 THEN CAST (amc AS DOUBLE) / CAST (pmc AS DOUBLE) ELSE -1.0 END AS am_pm_ratio
+		FROM 
+		(
+			SELECT SUM(amc1) AS amc, SUM(pmc1) AS pmc
+			FROM
+			(
+				SELECT
+					CASE WHEN t_hour BETWEEN 7 AND 8 THEN COUNT(1) ELSE 0 END AS amc1,
+					CASE WHEN t_hour BETWEEN 19 AND 20 THEN COUNT(1) ELSE 0 END AS pmc1
+				FROM web_sales ws
+				JOIN household_demographics hd ON (hd.hd_demo_sk = ws.ws_ship_hdemo_sk and hd.hd_dep_count = 5)
+				JOIN web_page wp ON (wp.wp_web_page_sk = ws.ws_web_page_sk and wp.wp_char_count BETWEEN 5000 AND 6000)
+				JOIN time_dim td ON (td.t_time_sk = ws.ws_sold_time_sk and td.t_hour IN (7,8,19,20))
+				GROUP BY t_hour
+			) cnt_am_pm
+		) sum_am_pm
+	"""
+
+    result = c.sql(query)
+    return result
+
+
+if __name__ == "__main__":
+	config = gpubdb_argparser()
+	client, c = attach_to_cluster(config, create_sql_context=True)
+	run_query(config=config, client=client, query_func=main, sql_context=c)
diff --git a/gpu_bdb/queries/q15/gpu_bdb_query_15.py b/gpu_bdb/queries/q15/gpu_bdb_query_15.py
index 7699d087..e8e24cd4 100755
--- a/gpu_bdb/queries/q15/gpu_bdb_query_15.py
+++ b/gpu_bdb/queries/q15/gpu_bdb_query_15.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,44 +14,21 @@
 # limitations under the License.
 #
 
-import sys
-from collections import OrderedDict
-
-
 from bdb_tools.utils import (
     benchmark,
     gpubdb_argparser,
     run_query,
     convert_datestring_to_days,
 )
-from bdb_tools.readers import build_reader
+from bdb_tools.q15_utils import (
+    q15_startDate,
+    q15_endDate,
+    q15_store_sk,
+    store_sales_cols,
+    read_tables
+)
 
 import datetime
-import numpy as np
-
-
-q15_startDate = "2001-09-02"
-q15_endDate = "2002-09-02"
-q15_store_sk = "10"
-
-store_sales_cols = ["ss_sold_date_sk", "ss_net_paid", "ss_store_sk", "ss_item_sk"]
-date_cols = ["d_date", "d_date_sk"]
-item_cols = ["i_item_sk", "i_category_id"]
-
-
-def read_tables(config):
-    table_reader = build_reader(
-        data_format=config["file_format"],
-        basepath=config["data_dir"],
-        split_row_groups=config["split_row_groups"],
-    )
-
-    store_sales_df = table_reader.read("store_sales", relevant_cols=store_sales_cols)
-    date_dim_df = table_reader.read("date_dim", relevant_cols=date_cols)
-    item_df = table_reader.read("item", relevant_cols=item_cols)
-
-    return store_sales_df, date_dim_df, item_df
-
 
 def main(client, config):
 
@@ -166,8 +143,6 @@ def main(client, config):
 
 if __name__ == "__main__":
     from bdb_tools.cluster_startup import attach_to_cluster
-    import cudf
-    import dask_cudf
 
     config = gpubdb_argparser()
     client, bc = attach_to_cluster(config)
diff --git a/gpu_bdb/queries/q15/gpu_bdb_query_15_dask_sql.py b/gpu_bdb/queries/q15/gpu_bdb_query_15_dask_sql.py
new file mode 100755
index 00000000..c04ea8c2
--- /dev/null
+++ b/gpu_bdb/queries/q15/gpu_bdb_query_15_dask_sql.py
@@ -0,0 +1,72 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from bdb_tools.cluster_startup import attach_to_cluster
+
+from bdb_tools.utils import (
+    benchmark,
+    gpubdb_argparser,
+    run_query,
+)
+
+from bdb_tools.q15_utils import (
+    q15_startDate,
+    q15_endDate,
+    q15_store_sk,
+    read_tables
+)
+
+def main(data_dir, client, c, config):
+    benchmark(read_tables, config, c, dask_profile=config["dask_profile"])
+
+    query = f"""
+        SELECT *
+        FROM
+        (
+            SELECT
+                cat,
+                ( (count(x) * SUM(xy) - SUM(x) * SUM(y)) / (count(x) * SUM(xx) - SUM(x) * SUM(x)) )  AS slope,
+                (SUM(y) - ((count(x) * SUM(xy) - SUM(x) * SUM(y)) / (count(x) * SUM(xx) - SUM(x)*SUM(x)) ) * SUM(x)) / count(x) AS intercept
+            FROM
+            (
+                SELECT
+                    i.i_category_id AS cat,
+                    s.ss_sold_date_sk AS x,
+                    CAST(SUM(s.ss_net_paid) AS DOUBLE) AS y,
+                    CAST(s.ss_sold_date_sk * SUM(s.ss_net_paid) AS DOUBLE) AS xy,
+                    CAST(s.ss_sold_date_sk * s.ss_sold_date_sk AS DOUBLE) AS xx
+                FROM store_sales s
+                INNER JOIN item i ON s.ss_item_sk = i.i_item_sk
+                INNER JOIN date_dim d ON s.ss_sold_date_sk = d.d_date_sk
+                WHERE s.ss_store_sk = {q15_store_sk}
+                AND i.i_category_id IS NOT NULL
+                AND CAST(d.d_date AS DATE) >= DATE '{q15_startDate}'
+                AND   CAST(d.d_date AS DATE) <= DATE '{q15_endDate}'
+                GROUP BY i.i_category_id, s.ss_sold_date_sk
+            ) temp
+            GROUP BY cat
+        ) regression
+        WHERE slope <= 0.0
+        ORDER BY cat
+    """
+    result = c.sql(query)
+    return result
+
+
+if __name__ == "__main__":
+    config = gpubdb_argparser()
+    client, c = attach_to_cluster(config, create_sql_context=True)
+    run_query(config=config, client=client, query_func=main, sql_context=c)
diff --git a/gpu_bdb/queries/q16/gpu_bdb_query_16.py b/gpu_bdb/queries/q16/gpu_bdb_query_16.py
index e093427d..7f2747b4 100755
--- a/gpu_bdb/queries/q16/gpu_bdb_query_16.py
+++ b/gpu_bdb/queries/q16/gpu_bdb_query_16.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,8 +14,7 @@
 # limitations under the License.
 #
 
-import sys
-
+import cudf
 
 from bdb_tools.utils import (
     benchmark,
@@ -24,7 +23,8 @@
     convert_datestring_to_days,
 )
 from bdb_tools.merge_util import hash_merge
-from bdb_tools.readers import build_reader
+from bdb_tools.q16_utils import read_tables
+
 from dask.distributed import wait
 
 import numpy as np
@@ -33,19 +33,6 @@
 ### conf
 q16_date = "2001-03-16"
 
-websale_cols = [
-    "ws_order_number",
-    "ws_item_sk",
-    "ws_warehouse_sk",
-    "ws_sold_date_sk",
-    "ws_sales_price",
-]
-web_returns_cols = ["wr_order_number", "wr_item_sk", "wr_refunded_cash"]
-date_cols = ["d_date", "d_date_sk"]
-item_cols = ["i_item_sk", "i_item_id"]
-warehouse_cols = ["w_warehouse_sk", "w_state"]
-
-
 # INSERT INTO TABLE ${hiveconf:RESULT_TABLE}
 # SELECT w_state, i_item_id,
 #  SUM(
@@ -72,23 +59,7 @@ def get_before_after_sales(df, q16_timestamp):
     return df
 
 
-def read_tables(config):
-    table_reader = build_reader(
-        data_format=config["file_format"],
-        basepath=config["data_dir"],
-        split_row_groups=config["split_row_groups"],
-    )
-
-    web_sales_df = table_reader.read("web_sales", relevant_cols=websale_cols)
-    web_returns_df = table_reader.read("web_returns", relevant_cols=web_returns_cols)
-    date_dim_df = table_reader.read("date_dim", relevant_cols=date_cols)
-    item_df = table_reader.read("item", relevant_cols=item_cols)
-    warehouse_df = table_reader.read("warehouse", relevant_cols=warehouse_cols)
-    return web_sales_df, web_returns_df, date_dim_df, item_df, warehouse_df
-
-
 def main(client, config):
-    import cudf
 
     web_sales_df, web_returns_df, date_dim_df, item_df, warehouse_df = benchmark(
         read_tables,
@@ -264,8 +235,6 @@ def main(client, config):
 
 if __name__ == "__main__":
     from bdb_tools.cluster_startup import attach_to_cluster
-    import cudf
-    import dask_cudf
 
     config = gpubdb_argparser()
     client, bc = attach_to_cluster(config)
diff --git a/gpu_bdb/queries/q16/gpu_bdb_query_16_dask_sql.py b/gpu_bdb/queries/q16/gpu_bdb_query_16_dask_sql.py
new file mode 100755
index 00000000..8ddb145e
--- /dev/null
+++ b/gpu_bdb/queries/q16/gpu_bdb_query_16_dask_sql.py
@@ -0,0 +1,93 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from bdb_tools.cluster_startup import attach_to_cluster
+
+import datetime
+from datetime import timedelta
+from bdb_tools.utils import (
+    benchmark,
+    gpubdb_argparser,
+    run_query,
+)
+
+from bdb_tools.q16_utils import read_tables
+
+def main(data_dir, client, c, config):
+    benchmark(read_tables, config, c, dask_profile=config["dask_profile"])
+
+    date = datetime.datetime(2001, 3, 16)
+    start = (date + timedelta(days=-30)).strftime("%Y-%m-%d")
+    end = (date + timedelta(days=30)).strftime("%Y-%m-%d")
+    mid = date.strftime("%Y-%m-%d")
+
+    date_query = f"""
+        SELECT d_date_sk 
+        FROM date_dim 
+        WHERE CAST(d_date as DATE) IN (DATE '{start}', DATE '{mid}', DATE '{end}') 
+        ORDER BY CAST(d_date as date) ASC
+    """
+
+    dates = c.sql(date_query)
+
+    cpu_dates = dates["d_date_sk"].compute().to_pandas()
+    cpu_dates.index = list(range(0, cpu_dates.shape[0]))
+
+    last_query = f"""
+        SELECT w_state, i_item_id,
+        SUM
+        (
+            CASE WHEN ws_sold_date_sk < {str(cpu_dates[1])}
+            THEN ws_sales_price - COALESCE(wr_refunded_cash,0)
+            ELSE 0.0 END
+        ) AS sales_before,
+        SUM
+        (
+            CASE WHEN ws_sold_date_sk >= {str(cpu_dates[1])}
+            THEN ws_sales_price - COALESCE(wr_refunded_cash,0)
+            ELSE 0.0 END
+        ) AS sales_after
+        FROM 
+        (
+            SELECT ws_item_sk, 
+                ws_warehouse_sk, 
+                ws_sold_date_sk, 
+                ws_sales_price, 
+                wr_refunded_cash
+            FROM web_sales ws
+            LEFT OUTER JOIN web_returns wr ON 
+            (
+                ws.ws_order_number = wr.wr_order_number
+                AND ws.ws_item_sk = wr.wr_item_sk
+            )
+            WHERE ws_sold_date_sk BETWEEN {str(cpu_dates[0])}
+            AND {str(cpu_dates[2])}
+        ) a1
+        JOIN item i ON a1.ws_item_sk = i.i_item_sk
+        JOIN warehouse w ON a1.ws_warehouse_sk = w.w_warehouse_sk
+        GROUP BY w_state,i_item_id 
+        ORDER BY w_state,i_item_id
+        LIMIT 100
+    """
+
+    result = c.sql(last_query)
+    return result
+
+
+if __name__ == "__main__":
+    config = gpubdb_argparser()
+    client, c = attach_to_cluster(config, create_sql_context=True)
+    run_query(config=config, client=client, query_func=main, sql_context=c)
diff --git a/gpu_bdb/queries/q17/gpu_bdb_query_17.py b/gpu_bdb/queries/q17/gpu_bdb_query_17.py
index 8f36a11e..c7113e48 100755
--- a/gpu_bdb/queries/q17/gpu_bdb_query_17.py
+++ b/gpu_bdb/queries/q17/gpu_bdb_query_17.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,72 +14,25 @@
 # limitations under the License.
 #
 
-import sys
-from collections import OrderedDict
+import cudf
 
 from bdb_tools.utils import (
     benchmark,
     gpubdb_argparser,
     left_semi_join,
     run_query,
+    )
+from bdb_tools.q17_utils import (
+    q17_gmt_offset,
+    q17_year,
+    q17_month,
+    store_sales_cols,
+    read_tables
 )
-from bdb_tools.readers import build_reader
-
 
-### conf
-q17_gmt_offset = -5
-# --store_sales date
-q17_year = 2001
-q17_month = 12
 q17_i_category_IN = "Books", "Music"
 
-
-store_sales_cols = [
-    "ss_ext_sales_price",
-    "ss_sold_date_sk",
-    "ss_store_sk",
-    "ss_customer_sk",
-    "ss_promo_sk",
-    "ss_item_sk",
-]
-item_cols = ["i_category", "i_item_sk"]
-customer_cols = ["c_customer_sk", "c_current_addr_sk"]
-store_cols = ["s_gmt_offset", "s_store_sk"]
-date_cols = ["d_date_sk", "d_year", "d_moy"]
-customer_address_cols = ["ca_address_sk", "ca_gmt_offset"]
-promotion_cols = ["p_channel_email", "p_channel_dmail", "p_channel_tv", "p_promo_sk"]
-
-
-def read_tables(config):
-    table_reader = build_reader(
-        data_format=config["file_format"],
-        basepath=config["data_dir"],
-        split_row_groups=config["split_row_groups"],
-    )
-
-    store_sales_df = table_reader.read("store_sales", relevant_cols=store_sales_cols)
-    item_df = table_reader.read("item", relevant_cols=item_cols)
-    customer_df = table_reader.read("customer", relevant_cols=customer_cols)
-    store_df = table_reader.read("store", relevant_cols=store_cols)
-    date_dim_df = table_reader.read("date_dim", relevant_cols=date_cols)
-    customer_address_df = table_reader.read(
-        "customer_address", relevant_cols=customer_address_cols
-    )
-    promotion_df = table_reader.read("promotion", relevant_cols=promotion_cols)
-
-    return (
-        store_sales_df,
-        item_df,
-        customer_df,
-        store_df,
-        date_dim_df,
-        customer_address_df,
-        promotion_df,
-    )
-
-
 def main(client, config):
-    import cudf
 
     (
         store_sales_df,
@@ -214,8 +167,6 @@ def main(client, config):
 
 if __name__ == "__main__":
     from bdb_tools.cluster_startup import attach_to_cluster
-    import cudf
-    import dask_cudf
 
     config = gpubdb_argparser()
     client, bc = attach_to_cluster(config)
diff --git a/gpu_bdb/queries/q17/gpu_bdb_query_17_dask_sql.py b/gpu_bdb/queries/q17/gpu_bdb_query_17_dask_sql.py
new file mode 100755
index 00000000..d65181e7
--- /dev/null
+++ b/gpu_bdb/queries/q17/gpu_bdb_query_17_dask_sql.py
@@ -0,0 +1,85 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from bdb_tools.cluster_startup import attach_to_cluster
+
+from bdb_tools.utils import (
+    benchmark,
+    gpubdb_argparser,
+    run_query,
+)
+
+from bdb_tools.q17_utils import (
+    q17_gmt_offset,
+    q17_year,
+    q17_month,
+    read_tables
+)
+
+q17_i_category_IN = "'Books', 'Music'"
+
+def main(data_dir, client, c, config):
+    benchmark(read_tables, config, c, dask_profile=config["dask_profile"])
+
+    query_date = f"""
+        select min(d_date_sk) as min_d_date_sk,
+            max(d_date_sk) as max_d_date_sk
+        from date_dim
+        where d_year = {q17_year}
+        and d_moy = {q17_month}
+    """
+    dates_result = c.sql(query_date).compute()
+
+    min_date_sk_val = dates_result["min_d_date_sk"][0]
+    max_date_sk_val = dates_result["max_d_date_sk"][0]
+
+    query = f"""
+        SELECT sum(promotional) as promotional,
+            sum(total) as total,
+            CASE WHEN sum(total) > 0.0 THEN (100.0 * sum(promotional)) / sum(total)
+                ELSE 0.0 END as promo_percent
+        FROM
+        (
+            SELECT p_channel_email,
+                p_channel_dmail,
+                p_channel_tv,
+                SUM( CAST(ss_ext_sales_price AS DOUBLE) ) total,
+                CASE WHEN (p_channel_dmail = 'Y' OR p_channel_email = 'Y' OR p_channel_tv = 'Y')
+                    THEN SUM(CAST(ss_ext_sales_price AS DOUBLE)) ELSE 0 END as promotional
+            FROM store_sales ss
+            INNER JOIN promotion p ON ss.ss_promo_sk = p.p_promo_sk
+            inner join item i on ss.ss_item_sk = i.i_item_sk
+            inner join store s on ss.ss_store_sk = s.s_store_sk
+            inner join customer c on c.c_customer_sk = ss.ss_customer_sk
+            inner join customer_address ca
+            on c.c_current_addr_sk = ca.ca_address_sk
+            WHERE i.i_category IN ({q17_i_category_IN})
+            AND s.s_gmt_offset = {q17_gmt_offset}
+            AND ca.ca_gmt_offset = {q17_gmt_offset}
+            AND ss.ss_sold_date_sk >= {min_date_sk_val}
+            AND ss.ss_sold_date_sk <= {max_date_sk_val}
+            GROUP BY p_channel_email, p_channel_dmail, p_channel_tv
+        ) sum_promotional
+        -- we don't need a 'ON' join condition. result is just two numbers.
+    """
+    result = c.sql(query)
+    return result
+
+
+if __name__ == "__main__":
+    config = gpubdb_argparser()
+    client, c = attach_to_cluster(config, create_sql_context=True)
+    run_query(config=config, client=client, query_func=main, sql_context=c)
diff --git a/gpu_bdb/queries/q18/gpu_bdb_query_18.py b/gpu_bdb/queries/q18/gpu_bdb_query_18.py
index 899d1c86..f8c260a1 100755
--- a/gpu_bdb/queries/q18/gpu_bdb_query_18.py
+++ b/gpu_bdb/queries/q18/gpu_bdb_query_18.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,10 +14,10 @@
 # limitations under the License.
 #
 
-import sys
 import os
 
-from collections import OrderedDict
+import cudf
+import dask_cudf
 
 from bdb_tools.utils import (
     benchmark,
@@ -25,136 +25,24 @@
     left_semi_join,
     run_query,
 )
-
-from bdb_tools.readers import build_reader
 from bdb_tools.text import (
     create_sentences_from_reviews,
     create_words_from_sentences,
 )
+from bdb_tools.q18_utils import (
+    find_relevant_reviews,
+    q18_startDate,
+    q18_endDate,
+    EOL_CHAR,
+    read_tables
+)
+
 import numpy as np
-import cupy as cp
 from distributed import wait
 
-
-# -------- Q18 -----------
-# -- store_sales date range
-q18_startDate = "2001-05-02"
-# --+90days
-q18_endDate = "2001-09-02"
 TEMP_TABLE1 = "TEMP_TABLE1"
-EOL_CHAR = "è"
-
-
-def read_tables(config):
-    table_reader = build_reader(
-        data_format=config["file_format"], basepath=config["data_dir"],
-    )
-
-    store_sales_cols = [
-        "ss_store_sk",
-        "ss_sold_date_sk",
-        "ss_net_paid",
-    ]
-    date_cols = ["d_date_sk", "d_date"]
-    store_cols = ["s_store_sk", "s_store_name"]
-
-    store_sales = table_reader.read("store_sales", relevant_cols=store_sales_cols)
-    date_dim = table_reader.read("date_dim", relevant_cols=date_cols)
-    store = table_reader.read("store", relevant_cols=store_cols)
-
-    ### splitting by row groups for better parallelism
-    pr_table_reader = build_reader(
-        data_format=config["file_format"],
-        basepath=config["data_dir"],
-        split_row_groups=True,
-    )
-
-    product_reviews_cols = ["pr_review_date", "pr_review_content", "pr_review_sk"]
-    product_reviews = pr_table_reader.read(
-        "product_reviews", relevant_cols=product_reviews_cols,
-    )
-
-    return store_sales, date_dim, store, product_reviews
-
-
-def create_found_reshaped_with_global_pos(found, targets):
-    """Given the dataframe created by mapping find_targets_in_reviews,
-    create a new dataframe in which the nonzero values in each row are exploded
-    to get their own row. Each row will contain the word, its mapping in the column order,
-    and the pr_review_sk for the review from which it came.
-    
-    Having these as two separate functions makes managing dask metadata easier.
-    """
-    import cudf
-
-    target_df = cudf.DataFrame({"word": targets}).reset_index(drop=False)
-    target_df.columns = ["word_mapping", "word"]
-
-    df_clean = found.drop(["pr_review_sk"], axis=1)
-
-    row_idxs, col_idxs = df_clean.values.nonzero()
-
-    found_reshaped = cudf.DataFrame(
-        {"word_mapping": col_idxs, "pr_review_sk": found["pr_review_sk"].iloc[row_idxs]}
-    )
-    found_reshaped = found_reshaped.merge(target_df, on="word_mapping", how="inner")[
-        ["word", "pr_review_sk"]
-    ]
-    return found_reshaped
-
-
-def find_targets_in_reviews_helper(ddf, targets, str_col_name="pr_review_content"):
-    """returns a N x K matrix, where N is the number of rows in ddf that
-    contain one of the target words and K is the number of words in targets.
-    
-    If a target word is found in a review, the value in that row, column
-    is non-zero.
-    
-    At the end, any row with non-zero values is returned.
-    
-    """
-    import cudf
-    from cudf._lib.strings import find_multiple
-
-    lowered = ddf[str_col_name].str.lower()
-
-    ## TODO: Do the replace/any in cupy land before going to cuDF
-    resdf = cudf.DataFrame(
-        cp.asarray(
-            find_multiple.find_multiple(lowered._column, targets._column)
-        ).reshape(-1, len(targets))
-    )
-
-    resdf = resdf.replace([0, -1], [1, 0])
-    found_mask = resdf.any(axis=1)
-    resdf["pr_review_sk"] = ddf["pr_review_sk"]
-    found = resdf.loc[found_mask]
-    return create_found_reshaped_with_global_pos(found, targets)
-
-
-def find_relevant_reviews(df, targets, str_col_name="pr_review_content"):
-    """
-     This function finds the  reviews containg target stores and returns the 
-     relevant reviews
-    """
-    import cudf
-
-    targets = cudf.Series(targets)
-    targets_lower = targets.str.lower()
-    reviews_found = find_targets_in_reviews_helper(df, targets_lower)[
-        ["word", "pr_review_sk"]
-    ]
-
-    combined = reviews_found.merge(
-        df[["pr_review_date", "pr_review_sk"]], how="inner", on=["pr_review_sk"]
-    )
-
-    return combined
-
 
 def main(client, config):
-    import cudf
-    import dask_cudf
 
     store_sales, date_dim, store, product_reviews = benchmark(
         read_tables,
@@ -236,7 +124,6 @@ def main(client, config):
         .to_arrow()
         .to_pylist()
     )
-    n_targets = len(targets)
 
     no_nulls = pr[~pr.pr_review_content.isnull()].reset_index(drop=True)
     no_nulls["pr_review_sk"] = no_nulls["pr_review_sk"].astype("int32")
@@ -337,8 +224,6 @@ def main(client, config):
 
 if __name__ == "__main__":
     from bdb_tools.cluster_startup import attach_to_cluster
-    import cudf
-    import dask_cudf
 
     config = gpubdb_argparser()
     client, bc = attach_to_cluster(config)
diff --git a/gpu_bdb/queries/q18/gpu_bdb_query_18_dask_sql.py b/gpu_bdb/queries/q18/gpu_bdb_query_18_dask_sql.py
new file mode 100755
index 00000000..4a1eba70
--- /dev/null
+++ b/gpu_bdb/queries/q18/gpu_bdb_query_18_dask_sql.py
@@ -0,0 +1,239 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+
+from bdb_tools.cluster_startup import attach_to_cluster
+import numpy as np
+
+import dask_cudf
+
+from bdb_tools.text import create_sentences_from_reviews, create_words_from_sentences
+
+from bdb_tools.utils import (
+    benchmark,
+    gpubdb_argparser,
+    run_query,
+)
+
+from bdb_tools.q18_utils import (
+    find_relevant_reviews,
+    q18_startDate,
+    q18_endDate,
+    EOL_CHAR,
+    read_tables
+)
+
+from dask.distributed import wait
+
+def main(data_dir, client, c, config):
+    benchmark(read_tables, config, c, dask_profile=config["dask_profile"])
+
+    query_1 = f"""
+        WITH temp_table1 AS
+        (
+            SELECT CAST(s.s_store_sk AS INTEGER) AS s_store_sk,
+                s.s_store_name ,
+                CAST(s.s_store_sk AS VARCHAR) || '_' || s.s_store_name
+                    AS store_ID
+            FROM store s,
+            (
+                SELECT temp.ss_store_sk,
+                    ((count(temp.x) * SUM(temp.xy) - SUM(temp.x) * SUM(temp.y))
+                     / (count(temp.x) * SUM(temp.xx) - SUM(temp.x) * SUM(temp.x))
+                     ) AS slope
+                FROM
+                (
+                    SELECT
+                        s.ss_store_sk,
+                        s.ss_sold_date_sk AS x,
+                        CAST( SUM(s.ss_net_paid) AS DOUBLE) AS y,
+                        s.ss_sold_date_sk * SUM(s.ss_net_paid) AS xy,
+                        s.ss_sold_date_sk * s.ss_sold_date_sk AS xx
+                        FROM store_sales s
+                        WHERE EXISTS
+                    (
+                        SELECT * -- d_date_sk
+                        FROM date_dim d
+                        WHERE s.ss_sold_date_sk = d.d_date_sk
+                        AND CAST(d.d_date AS DATE) >= DATE '{q18_startDate}'
+                        AND CAST(d.d_date AS DATE) <= DATE '{q18_endDate}'
+                    )
+                        GROUP BY s.ss_store_sk, s.ss_sold_date_sk
+                ) temp
+                GROUP BY temp.ss_store_sk
+            ) regression_analysis
+            WHERE slope <= 0 --flat or declining sales
+            AND s.s_store_sk = regression_analysis.ss_store_sk
+        )
+        SELECT * FROM temp_table1
+    """
+    stores_with_regression = c.sql(query_1)
+
+    query_2 = """
+        SELECT pr_review_date,
+            pr_review_content,
+            CAST(pr_review_sk AS INTEGER) AS pr_review_sk
+        FROM product_reviews
+        WHERE pr_review_content IS NOT NULL
+        ORDER BY pr_review_date, pr_review_content, pr_review_sk
+    """
+    no_nulls = c.sql(query_2)
+
+    targets = (
+        stores_with_regression.s_store_name.str.lower()
+        .unique()
+        .compute()
+        .to_arrow()
+        .to_pylist()
+    )
+
+    # perssiting because no_nulls is used twice
+    no_nulls = no_nulls.persist()
+
+    import cudf
+
+    temp_table2_meta_empty_df = cudf.DataFrame(
+        {
+            "word": ["a"],
+            "pr_review_sk": np.ones(1, dtype=np.int64),
+            "pr_review_date": ["a"],
+        }
+    ).head(0)
+
+    # get relevant reviews
+    combined = no_nulls.map_partitions(
+        find_relevant_reviews, targets, meta=temp_table2_meta_empty_df,
+    )
+
+    no_nulls["pr_review_content"] = no_nulls.pr_review_content.str.replace(
+        [". ", "? ", "! "], [EOL_CHAR], regex=False
+    )
+
+    stores_with_regression["store_ID"] = stores_with_regression.s_store_sk.astype(
+        "str"
+    ).str.cat(stores_with_regression.s_store_name, sep="_")
+
+    stores_with_regression[
+        "s_store_name"
+    ] = stores_with_regression.s_store_name.str.lower()
+
+    stores_with_regression = stores_with_regression.persist()
+    wait(stores_with_regression)
+    c.create_table("stores_with_regression", stores_with_regression, persist=False)
+    
+    combined = combined.persist()
+    wait(combined)
+    c.create_table("combined", combined, persist=False)
+
+    query_3 = """
+        SELECT store_ID,
+            pr_review_date,
+            CAST(pr_review_sk AS INTEGER) AS pr_review_sk
+        FROM stores_with_regression
+        INNER JOIN combined ON s_store_name = word
+    """
+    temp_table2 = c.sql(query_3)
+
+    c.drop_table("stores_with_regression")
+    del stores_with_regression
+
+    c.drop_table("combined")
+    del combined
+
+    # REAL QUERY
+    sentences = no_nulls.map_partitions(create_sentences_from_reviews)
+
+    # need the global position in the sentence tokenized df
+    sentences["x"] = 1
+    sentences["sentence_tokenized_global_pos"] = sentences.x.cumsum()
+    del sentences["x"]
+
+    word_df = sentences.map_partitions(
+        create_words_from_sentences,
+        global_position_column="sentence_tokenized_global_pos",
+    )
+
+    # This txt file comes from the official TPCx-BB kit
+    # We extracted it from bigbenchqueriesmr.jar
+    # Need to pass the absolute path for this txt file
+    sentiment_dir = os.path.join(config["data_dir"], "sentiment_files")
+    ns_df = dask_cudf.read_csv(os.path.join(sentiment_dir, "negativeSentiment.txt"), names=["sentiment_word"])
+    c.create_table('sent_df', ns_df, persist=False)
+
+    word_df = word_df.persist()
+    wait(word_df)
+    c.create_table("word_df", word_df, persist=False)
+    
+    sentences = sentences.persist()
+    wait(sentences)
+    c.create_table("sentences", sentences, persist=False)
+    
+    temp_table2 = temp_table2.persist()
+    wait(temp_table2)
+    c.create_table("temp_table2", temp_table2, persist=False)
+
+    query_4 = """
+        WITH sentences_table AS
+        (
+            select sentence,
+                review_idx_global_pos,
+                CAST(sentence_tokenized_global_pos AS BIGINT) AS
+                 sentence_tokenized_global_pos
+            from sentences
+        ), negativeSentiment AS
+        (
+            SELECT DISTINCT sentiment_word AS word
+            FROM sent_df
+        ), word_sentence_sentiment AS
+        (
+            SELECT n.word,
+                CAST(wd.sentence_idx_global_pos AS BIGINT) AS
+                    sentence_idx_global_pos,
+                'NEG' AS sentiment
+            FROM word_df wd
+            INNER JOIN negativeSentiment n ON wd.word = n.word
+        ), word_sentence_sentiment_with_sentence_info AS
+        (
+            SELECT * FROM word_sentence_sentiment
+            LEFT JOIN sentences_table
+            ON sentence_idx_global_pos = sentence_tokenized_global_pos
+        )
+        SELECT tt2.store_ID AS s_name,
+            tt2.pr_review_date AS r_date,
+            wsswsi.sentence AS r_sentence,
+            wsswsi.sentiment AS sentiment,
+            wsswsi.word AS sentiment_word
+        FROM word_sentence_sentiment_with_sentence_info wsswsi
+        INNER JOIN temp_table2 tt2
+        ON wsswsi.review_idx_global_pos = tt2.pr_review_sk
+        ORDER BY s_name, r_date, r_sentence, sentiment_word
+    """
+    result = c.sql(query_4)
+
+    c.drop_table("word_df")
+    del word_df
+    c.drop_table("sentences")
+    del sentences
+    c.drop_table("temp_table2")
+    del temp_table2
+    return result
+
+
+if __name__ == "__main__":
+    config = gpubdb_argparser()
+    client, c = attach_to_cluster(config, create_sql_context=True)
+    run_query(config=config, client=client, query_func=main, sql_context=c)
diff --git a/gpu_bdb/queries/q19/gpu_bdb_query_19.py b/gpu_bdb/queries/q19/gpu_bdb_query_19.py
index 8d4e29a2..06b51580 100755
--- a/gpu_bdb/queries/q19/gpu_bdb_query_19.py
+++ b/gpu_bdb/queries/q19/gpu_bdb_query_19.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,58 +14,26 @@
 # limitations under the License.
 #
 
-import sys
 import os
 
+import cudf
+import dask_cudf
+
 from bdb_tools.utils import (
     benchmark,
     gpubdb_argparser,
     run_query,
 )
 from bdb_tools.text import create_sentences_from_reviews, create_words_from_sentences
+from bdb_tools.q19_utils import (
+    q19_returns_dates_IN,
+    eol_char,
+    read_tables
+)
 
-
-from bdb_tools.readers import build_reader
-from dask.distributed import Client, wait
-import distributed
-
-
-# -------- Q19 -----------
-q19_returns_dates = ["2004-03-08", "2004-08-02", "2004-11-15", "2004-12-20"]
-eol_char = "è"
-
-
-def read_tables(config):
-    table_reader = build_reader(
-        data_format=config["file_format"], basepath=config["data_dir"],
-    )
-    date_dim_cols = ["d_week_seq", "d_date_sk", "d_date"]
-    date_dim_df = table_reader.read("date_dim", relevant_cols=date_dim_cols)
-    store_returns_cols = ["sr_returned_date_sk", "sr_item_sk", "sr_return_quantity"]
-    store_returns_df = table_reader.read(
-        "store_returns", relevant_cols=store_returns_cols
-    )
-    web_returns_cols = ["wr_returned_date_sk", "wr_item_sk", "wr_return_quantity"]
-    web_returns_df = table_reader.read("web_returns", relevant_cols=web_returns_cols)
-
-    ### splitting by row groups for better parallelism
-    pr_table_reader = build_reader(
-        data_format=config["file_format"],
-        basepath=config["data_dir"],
-        split_row_groups=True,
-    )
-
-    product_reviews_cols = ["pr_item_sk", "pr_review_content", "pr_review_sk"]
-    product_reviews = pr_table_reader.read(
-        "product_reviews", relevant_cols=product_reviews_cols
-    )
-
-    return date_dim_df, store_returns_df, web_returns_df, product_reviews
-
+from dask.distributed import wait
 
 def main(client, config):
-    import cudf
-    import dask_cudf
 
     date_dim_df, store_returns_df, web_returns_df, product_reviews_df = benchmark(
         read_tables,
@@ -78,7 +46,7 @@ def main(client, config):
     date_dim_df = date_dim_df.merge(
         date_dim_df, on=["d_week_seq"], how="outer", suffixes=("", "_r")
     )
-    date_dim_df = date_dim_df[date_dim_df.d_date_r.isin(q19_returns_dates)].reset_index(
+    date_dim_df = date_dim_df[date_dim_df.d_date_r.isin(q19_returns_dates_IN)].reset_index(
         drop=True
     )
 
@@ -207,8 +175,6 @@ def main(client, config):
 
 if __name__ == "__main__":
     from bdb_tools.cluster_startup import attach_to_cluster
-    import cudf
-    import dask_cudf
 
     config = gpubdb_argparser()
     client, bc = attach_to_cluster(config)
diff --git a/gpu_bdb/queries/q19/gpu_bdb_query_19_dask_sql.py b/gpu_bdb/queries/q19/gpu_bdb_query_19_dask_sql.py
new file mode 100755
index 00000000..fc2b6183
--- /dev/null
+++ b/gpu_bdb/queries/q19/gpu_bdb_query_19_dask_sql.py
@@ -0,0 +1,171 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+
+import dask_cudf
+
+from bdb_tools.cluster_startup import attach_to_cluster
+
+from bdb_tools.utils import (
+    benchmark,
+    gpubdb_argparser,
+    run_query,
+)
+
+from bdb_tools.text import (
+    create_sentences_from_reviews,
+    create_words_from_sentences
+)
+
+from bdb_tools.q19_utils import (
+    q19_returns_dates_IN,
+    eol_char,
+    read_tables
+)
+
+from dask.distributed import wait
+
+def main(data_dir, client, c, config):
+    benchmark(read_tables, config, c, dask_profile=config["dask_profile"])
+
+    query = f"""
+        WITH dateFilter AS
+        (
+            -- within the week ending a given date
+            SELECT d1.d_date_sk
+            FROM date_dim d1, date_dim d2
+            WHERE d1.d_week_seq = d2.d_week_seq
+            AND CAST(d2.d_date AS DATE) IN (DATE '{q19_returns_dates_IN[0]}',
+                                            DATE '{q19_returns_dates_IN[1]}',
+                                            DATE '{q19_returns_dates_IN[2]}',
+                                            DATE '{q19_returns_dates_IN[3]}')
+        ), fsr AS
+        (
+            --store returns in week ending given date
+            SELECT sr_item_sk, SUM(sr_return_quantity) sr_item_qty
+            FROM store_returns sr
+            INNER JOIN dateFilter d
+            ON sr.sr_returned_date_sk = d.d_date_sk
+            GROUP BY sr_item_sk --across all store and web channels
+            HAVING SUM(sr_return_quantity) > 0
+        ), fwr AS
+        (
+            --web returns in week ending given date
+            SELECT wr_item_sk, SUM(wr_return_quantity) wr_item_qty
+            FROM web_returns wr
+            INNER JOIN dateFilter d
+            ON wr.wr_returned_date_sk = d_date_sk
+            GROUP BY wr_item_sk  --across all store and web channels
+            HAVING SUM(wr_return_quantity) > 0
+        ), extract_sentiment AS
+        (
+            SELECT pr.pr_item_sk, pr.pr_review_content, pr.pr_review_sk
+            FROM product_reviews pr
+            INNER JOIN fsr
+            ON pr.pr_item_sk = fsr.sr_item_sk
+            INNER JOIN fwr
+            ON fsr.sr_item_sk = fwr.wr_item_sk
+            WHERE pr.pr_review_content IS NOT NULL ---- add as rapids
+            AND abs( CAST((sr_item_qty-wr_item_qty) AS DOUBLE) /
+                ((sr_item_qty + wr_item_qty)/2) ) <= 0.1
+        )
+        SELECT * FROM extract_sentiment
+        ORDER BY pr_item_sk, pr_review_content, pr_review_sk
+    """
+    merged_df = c.sql(query)
+
+    # second step -- Sentiment Word Extraction
+    merged_df["pr_review_sk"] = merged_df["pr_review_sk"].astype("int32")
+    merged_df["pr_review_content"] = merged_df.pr_review_content.str.lower()
+    merged_df["pr_review_content"] = merged_df.pr_review_content.str.replace(
+        [".", "?", "!"], [eol_char], regex=False
+    )
+
+    sentences = merged_df.map_partitions(create_sentences_from_reviews)
+    # need the global position in the sentence tokenized df
+    sentences["x"] = 1
+    sentences['sentence_tokenized_global_pos'] = sentences['x'].cumsum()
+    del sentences["x"]
+
+    word_df = sentences.map_partitions(
+        create_words_from_sentences,
+        global_position_column="sentence_tokenized_global_pos",
+    )
+
+    # This txt file comes from the official TPCx-BB kit
+    # We extracted it from bigbenchqueriesmr.jar
+    # Need to pass the absolute path for this txt file
+    sentiment_dir = os.path.join(config["data_dir"], "sentiment_files")
+    ns_df = dask_cudf.read_csv(os.path.join(sentiment_dir, "negativeSentiment.txt"), names=["sentiment_word"])
+    c.create_table('sent_df', ns_df, persist=False)
+
+    sentences = sentences.persist()
+    wait(sentences)
+    c.create_table('sentences_df', sentences, persist=False)
+
+    word_df = word_df.persist()
+    wait(word_df)
+    c.create_table('word_df', word_df, persist=False)
+
+    merged_df = merged_df.persist()
+    wait(merged_df)
+    c.create_table('merged_df', merged_df, persist=False)
+
+    query = """
+        WITH negativesent AS
+        (
+            SELECT distinct sentiment_word
+            FROM sent_df
+        ), word_sentence_sentiment AS
+        (
+            SELECT sd.sentiment_word,
+                wd.sentence_idx_global_pos
+            FROM word_df wd
+            INNER JOIN negativesent sd ON wd.word = sd.sentiment_word
+        ), temp AS
+        (
+            SELECT s.review_idx_global_pos,
+                w.sentiment_word,
+                s.sentence
+            FROM word_sentence_sentiment w
+            LEFT JOIN sentences_df s
+            ON w.sentence_idx_global_pos = s.sentence_tokenized_global_pos
+        )
+        SELECT pr_item_sk AS item_sk,
+            sentence AS review_sentence,
+            'NEG' AS sentiment,
+            sentiment_word
+        FROM temp
+        INNER JOIN merged_df ON pr_review_sk = review_idx_global_pos
+        ORDER BY pr_item_sk, review_sentence, sentiment_word
+    """
+    result = c.sql(query)
+
+    c.drop_table("sentences_df")
+    del sentences
+    c.drop_table("word_df")
+    del word_df
+    c.drop_table("merged_df")
+    del merged_df
+
+    return result
+
+
+if __name__ == "__main__":
+    config = gpubdb_argparser()
+    client, c = attach_to_cluster(config, create_sql_context=True)
+    run_query(config=config, client=client, query_func=main, sql_context=c)
diff --git a/gpu_bdb/queries/q20/gpu_bdb_query_20.py b/gpu_bdb/queries/q20/gpu_bdb_query_20.py
index 8db6d19d..85c30cfb 100755
--- a/gpu_bdb/queries/q20/gpu_bdb_query_20.py
+++ b/gpu_bdb/queries/q20/gpu_bdb_query_20.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,81 +14,19 @@
 # limitations under the License.
 #
 
-import sys
-import cupy as cp
-import rmm
 import numpy as np
 
-
 from bdb_tools.utils import (
     benchmark,
     gpubdb_argparser,
-    train_clustering_model,
     run_query,
 )
-from bdb_tools.readers import build_reader
-from dask import delayed
+from bdb_tools.q20_utils import (
+    get_clusters,
+    read_tables
+)
 from dask.distributed import wait
 
-
-# q20 parameters
-N_CLUSTERS = 8
-CLUSTER_ITERATIONS = 20
-N_ITER = 5
-
-
-def read_tables(config):
-    table_reader = build_reader(
-        data_format=config["file_format"],
-        basepath=config["data_dir"],
-        split_row_groups=config["split_row_groups"],
-    )
-
-    store_sales_cols = [
-        "ss_customer_sk",
-        "ss_ticket_number",
-        "ss_item_sk",
-        "ss_net_paid",
-    ]
-    store_returns_cols = [
-        "sr_item_sk",
-        "sr_customer_sk",
-        "sr_ticket_number",
-        "sr_return_amt",
-    ]
-
-    store_sales_df = table_reader.read("store_sales", relevant_cols=store_sales_cols)
-    store_returns_df = table_reader.read(
-        "store_returns", relevant_cols=store_returns_cols
-    )
-    return store_sales_df, store_returns_df
-
-
-def get_clusters(client, ml_input_df, feature_cols):
-    """
-    Takes the dask client, kmeans_input_df and feature columns.
-    Returns a dictionary matching the output required for q20
-    """
-    import dask_cudf
-
-    ml_tasks = [
-        delayed(train_clustering_model)(df, N_CLUSTERS, CLUSTER_ITERATIONS, N_ITER)
-        for df in ml_input_df[feature_cols].to_delayed()
-    ]
-
-    results_dict = client.compute(*ml_tasks, sync=True)
-
-    labels = results_dict["cid_labels"]
-
-    labels_final = dask_cudf.from_cudf(labels, npartitions=ml_input_df.npartitions)
-    ml_input_df["label"] = labels_final.reset_index()[0]
-
-    output = ml_input_df[["user_sk", "label"]]
-
-    results_dict["cid_labels"] = output
-    return results_dict
-
-
 def remove_inf_and_nulls(df, column_names, value=0.0):
     """
     Replace all nulls, inf, -inf with value column_name from df 
@@ -224,8 +162,6 @@ def main(client, config):
 
 if __name__ == "__main__":
     from bdb_tools.cluster_startup import attach_to_cluster
-    import cudf
-    import dask_cudf
 
     config = gpubdb_argparser()
     client, bc = attach_to_cluster(config)
diff --git a/gpu_bdb/queries/q20/gpu_bdb_query_20_dask_sql.py b/gpu_bdb/queries/q20/gpu_bdb_query_20_dask_sql.py
new file mode 100755
index 00000000..4715177d
--- /dev/null
+++ b/gpu_bdb/queries/q20/gpu_bdb_query_20_dask_sql.py
@@ -0,0 +1,99 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from bdb_tools.cluster_startup import attach_to_cluster
+from dask.distributed import wait
+
+from bdb_tools.utils import (
+    benchmark,
+    gpubdb_argparser,
+    run_query,
+)
+
+from bdb_tools.q20_utils import (
+    get_clusters,
+    read_tables
+)
+
+def main(data_dir, client, c, config):
+    benchmark(read_tables, config, c, dask_profile=config["dask_profile"])
+
+    query = """
+        SELECT
+            ss_customer_sk AS user_sk,
+            round(CASE WHEN ((returns_count IS NULL) OR (orders_count IS NULL)
+                OR ((returns_count / orders_count) IS NULL) ) THEN 0.0
+                ELSE (returns_count / orders_count) END, 7) AS orderRatio,
+            round(CASE WHEN ((returns_items IS NULL) OR (orders_items IS NULL)
+                OR ((returns_items / orders_items) IS NULL) ) THEN 0.0
+                ELSE (returns_items / orders_items) END, 7) AS itemsRatio,
+            round(CASE WHEN ((returns_money IS NULL) OR (orders_money IS NULL)
+                OR ((returns_money / orders_money) IS NULL) ) THEN 0.0
+                ELSE (returns_money / orders_money) END, 7) AS monetaryRatio,
+            round(CASE WHEN ( returns_count IS NULL) THEN 0.0
+                ELSE returns_count END, 0) AS frequency
+        FROM
+        (
+            SELECT
+                ss_customer_sk,
+                -- return order ratio
+                CAST (COUNT(distinct(ss_ticket_number)) AS DOUBLE)
+                    AS orders_count,
+                -- return ss_item_sk ratio
+                CAST (COUNT(ss_item_sk) AS DOUBLE) AS orders_items,
+                -- return monetary amount ratio
+                CAST(SUM( ss_net_paid ) AS DOUBLE) AS orders_money
+            FROM store_sales s
+            GROUP BY ss_customer_sk
+        ) orders
+        LEFT OUTER JOIN
+        (
+            SELECT
+                sr_customer_sk,
+                -- return order ratio
+                CAST(count(distinct(sr_ticket_number)) AS DOUBLE)
+                    AS returns_count,
+                -- return ss_item_sk ratio
+                CAST (COUNT(sr_item_sk) AS DOUBLE) AS returns_items,
+                -- return monetary amount ratio
+                CAST( SUM( sr_return_amt ) AS DOUBLE) AS returns_money
+            FROM store_returns
+            GROUP BY sr_customer_sk
+        ) returned ON ss_customer_sk=sr_customer_sk
+    """
+    final_df = c.sql(query)
+
+    final_df = final_df.fillna(0)
+    final_df = final_df.repartition(npartitions=1).persist()
+    wait(final_df)
+
+    final_df = final_df.sort_values(["user_sk"]).reset_index(drop=True)
+    final_df = final_df.persist()
+    wait(final_df)
+
+    feature_cols = ["orderRatio", "itemsRatio", "monetaryRatio", "frequency"]
+
+    results_dict = get_clusters(
+        client=client, ml_input_df=final_df, feature_cols=feature_cols
+    )
+
+    return results_dict
+
+
+if __name__ == "__main__":
+    config = gpubdb_argparser()
+    client, c = attach_to_cluster(config, create_sql_context=True)
+    run_query(config=config, client=client, query_func=main, sql_context=c)
diff --git a/gpu_bdb/queries/q21/gpu_bdb_query_21.py b/gpu_bdb/queries/q21/gpu_bdb_query_21.py
index 4d1e1217..084e0392 100755
--- a/gpu_bdb/queries/q21/gpu_bdb_query_21.py
+++ b/gpu_bdb/queries/q21/gpu_bdb_query_21.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,8 +14,6 @@
 # limitations under the License.
 #
 
-import sys
-
 from bdb_tools.utils import (
     benchmark,
     gpubdb_argparser,
@@ -23,67 +21,20 @@
 )
 from bdb_tools.merge_util import hash_merge
 
-from bdb_tools.readers import build_reader
-from dask.distributed import Client, wait
+from bdb_tools.q21_utils import read_tables
+
+from dask.distributed import wait
 
 q21_year = 2003
 q21_month = 1
 q21_limit = 100
 
-
-store_sales_cols = [
-    "ss_item_sk",
-    "ss_store_sk",
-    "ss_customer_sk",
-    "ss_ticket_number",
-    "ss_quantity",
-    "ss_sold_date_sk",
-]
-date_cols = ["d_date_sk", "d_year", "d_moy"]
-websale_cols = ["ws_item_sk", "ws_bill_customer_sk", "ws_quantity", "ws_sold_date_sk"]
-sr_cols = [
-    "sr_item_sk",
-    "sr_customer_sk",
-    "sr_ticket_number",
-    "sr_return_quantity",
-    "sr_returned_date_sk",
-]
-store_cols = ["s_store_name", "s_store_id", "s_store_sk"]
-item_cols = ["i_item_id", "i_item_desc", "i_item_sk"]
-
-# todo: See if persisting the date table improves performence as its used all over
-
-
-def read_tables(config):
-    table_reader = build_reader(
-        data_format=config["file_format"],
-        basepath=config["data_dir"],
-        split_row_groups=config["split_row_groups"],
-    )
-
-    store_sales_df = table_reader.read("store_sales", relevant_cols=store_sales_cols)
-    date_dim_df = table_reader.read("date_dim", relevant_cols=date_cols)
-    web_sales_df = table_reader.read("web_sales", relevant_cols=websale_cols)
-    store_retuns_df = table_reader.read("store_returns", relevant_cols=sr_cols)
-    store_table_df = table_reader.read("store", relevant_cols=store_cols)
-    item_table_df = table_reader.read("item", relevant_cols=item_cols)
-
-    return (
-        store_sales_df,
-        date_dim_df,
-        web_sales_df,
-        store_retuns_df,
-        store_table_df,
-        item_table_df,
-    )
-
-
 def main(client, config):
     (
         store_sales_df,
         date_dim_df,
         web_sales_df,
-        store_retuns_df,
+        store_returns_df,
         store_table_df,
         item_table_df,
     ) = benchmark(
@@ -105,7 +56,7 @@ def main(client, config):
         meta=date_dim_df._meta,
     ).reset_index(drop=True)
 
-    part_sr = store_retuns_df.merge(
+    part_sr = store_returns_df.merge(
         d2, left_on="sr_returned_date_sk", right_on="d_date_sk", how="inner"
     )
 
@@ -289,8 +240,6 @@ def main(client, config):
 
 if __name__ == "__main__":
     from bdb_tools.cluster_startup import attach_to_cluster
-    import cudf
-    import dask_cudf
 
     config = gpubdb_argparser()
     client, bc = attach_to_cluster(config)
diff --git a/gpu_bdb/queries/q21/gpu_bdb_query_21_dask_sql.py b/gpu_bdb/queries/q21/gpu_bdb_query_21_dask_sql.py
new file mode 100755
index 00000000..eca3dd03
--- /dev/null
+++ b/gpu_bdb/queries/q21/gpu_bdb_query_21_dask_sql.py
@@ -0,0 +1,117 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from bdb_tools.cluster_startup import attach_to_cluster
+
+from bdb_tools.utils import (
+    benchmark,
+    gpubdb_argparser,
+    run_query,
+)
+
+from bdb_tools.q21_utils import read_tables
+
+def main(data_dir, client, c, config):
+    benchmark(read_tables, config, c, dask_profile=config["dask_profile"])
+
+    query = """
+		SELECT
+			part_i.i_item_id AS i_item_id,
+            part_i.i_item_desc AS i_item_desc,
+            part_s.s_store_id AS s_store_id,
+            part_s.s_store_name AS s_store_name,
+            CAST(SUM(part_ss.ss_quantity) AS BIGINT) AS store_sales_quantity,
+            CAST(SUM(part_sr.sr_return_quantity) AS BIGINT) AS store_returns_quantity,
+            CAST(SUM(part_ws.ws_quantity) AS BIGINT) AS web_sales_quantity
+		FROM 
+		(
+			SELECT
+				sr_item_sk,
+				sr_customer_sk,
+				sr_ticket_number,
+				sr_return_quantity
+			FROM
+				store_returns sr,
+				date_dim d2
+			WHERE d2.d_year = 2003
+			AND d2.d_moy BETWEEN 1 AND 7 --which were returned in the next six months
+			AND sr.sr_returned_date_sk = d2.d_date_sk
+		) part_sr
+		INNER JOIN 
+		(
+			SELECT
+				ws_item_sk,
+				ws_bill_customer_sk,
+				ws_quantity
+			FROM
+				web_sales ws,
+				date_dim d3
+			-- in the following three years (re-purchased by the returning customer afterwards through the web sales channel)
+			WHERE d3.d_year BETWEEN 2003 AND 2005 
+			AND ws.ws_sold_date_sk = d3.d_date_sk
+		) part_ws ON 
+		(
+			part_sr.sr_item_sk = part_ws.ws_item_sk
+			AND part_sr.sr_customer_sk = part_ws.ws_bill_customer_sk
+		) INNER JOIN 
+		(
+			SELECT
+				ss_item_sk,
+				ss_store_sk,
+				ss_customer_sk,
+				ss_ticket_number,
+				ss_quantity
+			FROM
+				store_sales ss,
+				date_dim d1
+			WHERE d1.d_year = 2003
+			AND d1.d_moy = 1
+			AND ss.ss_sold_date_sk = d1.d_date_sk
+		) part_ss ON 
+		(
+			part_ss.ss_ticket_number = part_sr.sr_ticket_number
+			AND part_ss.ss_item_sk = part_sr.sr_item_sk
+			AND part_ss.ss_customer_sk = part_sr.sr_customer_sk
+		)
+		INNER JOIN store part_s ON 
+		(
+			part_s.s_store_sk = part_ss.ss_store_sk
+		)
+		INNER JOIN item part_i ON 
+		(
+			part_i.i_item_sk = part_ss.ss_item_sk
+		)
+		GROUP BY
+			part_i.i_item_id,
+			part_i.i_item_desc,
+			part_s.s_store_id,
+			part_s.s_store_name
+		ORDER BY
+			part_i.i_item_id,
+			part_i.i_item_desc,
+			part_s.s_store_id,
+			part_s.s_store_name
+		LIMIT 100
+	"""
+    result = c.sql(query)
+    result['i_item_desc'] = result['i_item_desc'].str.strip()
+    return result
+
+
+if __name__ == "__main__":
+	config = gpubdb_argparser()
+	client, c = attach_to_cluster(config, create_sql_context=True)
+	run_query(config=config, client=client, query_func=main, sql_context=c)
diff --git a/gpu_bdb/queries/q22/gpu_bdb_query_22.py b/gpu_bdb/queries/q22/gpu_bdb_query_22.py
index 2dfebb1f..3a56d3fb 100755
--- a/gpu_bdb/queries/q22/gpu_bdb_query_22.py
+++ b/gpu_bdb/queries/q22/gpu_bdb_query_22.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,19 +14,19 @@
 # limitations under the License.
 #
 
-from numba import cuda
 import numpy as np
-import sys
-
 
 from bdb_tools.utils import (
     benchmark,
     gpubdb_argparser,
     run_query,
-    convert_datestring_to_days,
 )
-from bdb_tools.readers import build_reader
-
+from bdb_tools.q22_utils import (
+    q22_date,
+    q22_i_current_price_min,
+    q22_i_current_price_max,
+    read_tables
+)
 
 def inventory_before_after(df, date):
     df["inv_before"] = df["inv_quantity_on_hand"].copy()
@@ -36,38 +36,7 @@ def inventory_before_after(df, date):
     return df
 
 
-def read_tables(config):
-    table_reader = build_reader(
-        data_format=config["file_format"],
-        basepath=config["data_dir"],
-        split_row_groups=config["split_row_groups"],
-    )
-    inv_columns = [
-        "inv_item_sk",
-        "inv_warehouse_sk",
-        "inv_date_sk",
-        "inv_quantity_on_hand",
-    ]
-    inventory = table_reader.read("inventory", relevant_cols=inv_columns)
-
-    item_columns = ["i_item_id", "i_current_price", "i_item_sk"]
-    item = table_reader.read("item", relevant_cols=item_columns)
-
-    warehouse_columns = ["w_warehouse_sk", "w_warehouse_name"]
-    warehouse = table_reader.read("warehouse", relevant_cols=warehouse_columns)
-
-    dd_columns = ["d_date_sk", "d_date"]
-    date_dim = table_reader.read("date_dim", relevant_cols=dd_columns)
-
-    return inventory, item, warehouse, date_dim
-
-
 def main(client, config):
-
-    q22_date = "2001-05-08"
-    q22_i_current_price_min = 0.98
-    q22_i_current_price_max = 1.5
-
     inventory, item, warehouse, date_dim = benchmark(
         read_tables,
         config=config,
@@ -99,7 +68,6 @@ def main(client, config):
 
     output_table = output_table[keep_columns]
 
-    date_dim = date_dim.map_partitions(convert_datestring_to_days)
 
     # Filter limit in days
     min_date = np.datetime64(q22_date, "D").astype(int) - 30
@@ -161,8 +129,6 @@ def main(client, config):
 
 if __name__ == "__main__":
     from bdb_tools.cluster_startup import attach_to_cluster
-    import cudf
-    import dask_cudf
 
     config = gpubdb_argparser()
     client, bc = attach_to_cluster(config)
diff --git a/gpu_bdb/queries/q22/gpu_bdb_query_22_dask_sql.py b/gpu_bdb/queries/q22/gpu_bdb_query_22_dask_sql.py
new file mode 100755
index 00000000..6393842c
--- /dev/null
+++ b/gpu_bdb/queries/q22/gpu_bdb_query_22_dask_sql.py
@@ -0,0 +1,85 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import numpy as np
+
+from bdb_tools.cluster_startup import attach_to_cluster
+
+from bdb_tools.utils import (
+    benchmark,
+    gpubdb_argparser,
+    run_query
+)
+
+from bdb_tools.q22_utils import (
+    q22_date,
+    q22_i_current_price_min,
+    q22_i_current_price_max,
+    read_tables
+)
+
+def main(data_dir, client, c, config):
+    benchmark(read_tables, config, c, dask_profile=config["dask_profile"])
+
+    # Filter limit in days
+    min_date = np.datetime64(q22_date, "D").astype(int) - 30
+    max_date = np.datetime64(q22_date, "D").astype(int) + 30
+    d_date_int = np.datetime64(q22_date, "D").astype(int)
+    ratio_min = 2.0 / 3.0
+    ratio_max = 3.0 / 2.0
+    query = f"""
+        SELECT
+            w_warehouse_name,
+            i_item_id,
+            SUM(CASE WHEN d_date - {d_date_int} < 0 THEN inv_quantity_on_hand ELSE 0 END) AS inv_before,
+            SUM(CASE WHEN d_date - {d_date_int} >= 0 THEN inv_quantity_on_hand ELSE 0 END) AS inv_after
+        FROM
+            inventory inv,
+            item i,
+            warehouse w,
+            date_dim d
+        WHERE i_current_price BETWEEN {q22_i_current_price_min} AND {q22_i_current_price_max}
+        AND i_item_sk        = inv_item_sk
+        AND inv_warehouse_sk = w_warehouse_sk
+        AND inv_date_sk      = d_date_sk
+        AND d_date >= {min_date}
+        AND d_date <= {max_date}
+        GROUP BY w_warehouse_name, i_item_id
+    """
+    intermediate = c.sql(query)
+    c.create_table("intermediate", intermediate ,persist=False)
+
+    query_2 = f"""
+        SELECT
+            w_warehouse_name,
+            i_item_id,
+            inv_before,
+            inv_after
+        FROM intermediate
+        WHERE inv_before > 0
+        AND CAST(inv_after AS DOUBLE) / CAST(inv_before AS DOUBLE) >= {ratio_min}
+        AND CAST(inv_after AS DOUBLE) / CAST(inv_before AS DOUBLE) <= {ratio_max}
+        ORDER BY w_warehouse_name, i_item_id
+        LIMIT 100
+    """
+    result = c.sql(query_2)
+    return result
+
+
+if __name__ == "__main__":
+    config = gpubdb_argparser()
+    client, c = attach_to_cluster(config, create_sql_context=True)
+    run_query(config=config, client=client, query_func=main, sql_context=c)
diff --git a/gpu_bdb/queries/q23/gpu_bdb_query_23.py b/gpu_bdb/queries/q23/gpu_bdb_query_23.py
index 08e1b09d..0dcb558a 100755
--- a/gpu_bdb/queries/q23/gpu_bdb_query_23.py
+++ b/gpu_bdb/queries/q23/gpu_bdb_query_23.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,46 +14,20 @@
 # limitations under the License.
 #
 
-import cupy as cp
-import sys
-import rmm
-
-
-from bdb_tools.readers import build_reader
 from bdb_tools.utils import (
     benchmark,
     gpubdb_argparser,
     run_query,
 )
+from bdb_tools.q23_utils import (
+    q23_year,
+    q23_month,
+    q23_coefficient,
+    read_tables
+)
 
 from distributed import wait
 
-
-### inventory date
-q23_year = 2001
-q23_month = 1
-q23_coefficient = 1.3
-
-
-def read_tables(config):
-    table_reader = build_reader(
-        data_format=config["file_format"], basepath=config["data_dir"],
-    )
-
-    date_cols = ["d_date_sk", "d_year", "d_moy"]
-    date_df = table_reader.read("date_dim", relevant_cols=date_cols)
-
-    inv_cols = [
-        "inv_warehouse_sk",
-        "inv_item_sk",
-        "inv_date_sk",
-        "inv_quantity_on_hand",
-    ]
-    inv_df = table_reader.read("inventory", relevant_cols=inv_cols)
-
-    return date_df, inv_df
-
-
 def get_iteration1(merged_inv_dates, n_workers):
     grouped_df = merged_inv_dates.groupby(["inv_warehouse_sk", "inv_item_sk", "d_moy"])
     q23_tmp_inv_part = grouped_df.agg(
@@ -129,8 +103,6 @@ def main(client, config):
 
 if __name__ == "__main__":
     from bdb_tools.cluster_startup import attach_to_cluster
-    import cudf
-    import dask_cudf
 
     config = gpubdb_argparser()
     client, bc = attach_to_cluster(config)
diff --git a/gpu_bdb/queries/q23/gpu_bdb_query_23_dask_sql.py b/gpu_bdb/queries/q23/gpu_bdb_query_23_dask_sql.py
new file mode 100755
index 00000000..ef3debd8
--- /dev/null
+++ b/gpu_bdb/queries/q23/gpu_bdb_query_23_dask_sql.py
@@ -0,0 +1,99 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from bdb_tools.cluster_startup import attach_to_cluster
+
+from bdb_tools.utils import (
+    benchmark,
+    gpubdb_argparser,
+    run_query,
+)
+
+from bdb_tools.q23_utils import (
+    q23_year,
+    q23_month,
+    q23_coefficient,
+    read_tables
+)
+
+from dask.distributed import wait
+
+def main(data_dir, client, c, config):
+    benchmark(read_tables, config, c, dask_profile=config["dask_profile"])
+
+    query_1 = f"""
+        SELECT inv_warehouse_sk,
+            inv_item_sk,
+            inv_quantity_on_hand,
+            d_moy
+        FROM inventory inv
+        INNER JOIN date_dim d ON inv.inv_date_sk = d.d_date_sk
+        AND d.d_year = {q23_year}
+        AND d_moy between {q23_month} AND {q23_month + 1}
+    """
+    inv_dates_result = c.sql(query_1)
+
+    c.create_table('inv_dates', inv_dates_result, persist=False)
+    query_2 = """
+        SELECT inv_warehouse_sk,
+            inv_item_sk,
+            d_moy,
+            AVG(CAST(inv_quantity_on_hand AS DOUBLE)) AS q_mean,
+            stddev_samp(CAST(inv_quantity_on_hand as DOUBLE)) AS q_std
+        FROM inv_dates
+        GROUP BY inv_warehouse_sk, inv_item_sk, d_moy
+    """
+    iteration_1 = c.sql(query_2)
+
+    c.create_table('iteration_1', iteration_1, persist=False)
+    query_3 = f"""
+        SELECT inv_warehouse_sk,
+            inv_item_sk,
+            d_moy,
+            q_std / q_mean AS qty_cov
+        FROM iteration_1
+        WHERE (q_std / q_mean) >= {q23_coefficient}
+    """
+
+    iteration_2 = c.sql(query_3)
+
+    c.create_table('temp_table', iteration_2, persist=False)
+    query = f"""
+        SELECT inv1.inv_warehouse_sk,
+            inv1.inv_item_sk,
+            inv1.d_moy,
+            inv1.qty_cov AS cov,
+            inv2.d_moy AS inv2_d_moy,
+            inv2.qty_cov AS inv2_cov
+        FROM temp_table inv1
+        INNER JOIN temp_table inv2 ON inv1.inv_warehouse_sk = inv2.inv_warehouse_sk
+        AND inv1.inv_item_sk = inv2.inv_item_sk
+        AND inv1.d_moy = {q23_month}
+        AND inv2.d_moy = {q23_month + 1}
+        ORDER BY inv1.inv_warehouse_sk,
+            inv1.inv_item_sk
+    """
+    result = c.sql(query)
+    result = result.persist()
+    wait(result)
+    c.drop_table("temp_table")
+    return result
+
+
+if __name__ == "__main__":
+    config = gpubdb_argparser()
+    client, c = attach_to_cluster(config, create_sql_context=True)
+    run_query(config=config, client=client, query_func=main, sql_context=c)
diff --git a/gpu_bdb/queries/q24/gpu_bdb_query_24.py b/gpu_bdb/queries/q24/gpu_bdb_query_24.py
index 5b7b2a07..90f2bf3b 100755
--- a/gpu_bdb/queries/q24/gpu_bdb_query_24.py
+++ b/gpu_bdb/queries/q24/gpu_bdb_query_24.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,51 +14,20 @@
 # limitations under the License.
 #
 
-import sys
-
-
 from bdb_tools.utils import (
     benchmark,
     gpubdb_argparser,
     run_query,
 )
-from bdb_tools.readers import build_reader
+from bdb_tools.q24_utils import read_tables
 from distributed import wait
 
 ### Current Implimenation Assumption
 ### Grouped Store sales and web sales of 1 item grouped by `date_sk` should fit in memory as number of dates is limited
 
-
 ## query parameter
 q24_i_item_sk = 10000
 
-ws_cols = ["ws_item_sk", "ws_sold_date_sk", "ws_quantity"]
-item_cols = ["i_item_sk", "i_current_price"]
-imp_cols = [
-    "imp_item_sk",
-    "imp_competitor_price",
-    "imp_start_date",
-    "imp_end_date",
-    "imp_sk",
-]
-ss_cols = ["ss_item_sk", "ss_sold_date_sk", "ss_quantity"]
-
-
-def read_tables(config):
-    table_reader = build_reader(
-        data_format=config["file_format"],
-        basepath=config["data_dir"],
-        split_row_groups=config["split_row_groups"],
-    )
-    ### read tables
-    ws_df = table_reader.read("web_sales", relevant_cols=ws_cols)
-    item_df = table_reader.read("item", relevant_cols=item_cols)
-    imp_df = table_reader.read("item_marketprices", relevant_cols=imp_cols)
-    ss_df = table_reader.read("store_sales", relevant_cols=ss_cols)
-
-    return ws_df, item_df, imp_df, ss_df
-
-
 def get_helper_query_table(imp_df, item_df):
     f_imp_df = (
         imp_df.query(f"imp_item_sk == {q24_i_item_sk}", meta=imp_df._meta)
@@ -254,8 +223,6 @@ def main(client, config):
 
 if __name__ == "__main__":
     from bdb_tools.cluster_startup import attach_to_cluster
-    import cudf
-    import dask_cudf
 
     config = gpubdb_argparser()
     client, bc = attach_to_cluster(config)
diff --git a/gpu_bdb/queries/q24/gpu_bdb_query_24_dask_sql.py b/gpu_bdb/queries/q24/gpu_bdb_query_24_dask_sql.py
new file mode 100755
index 00000000..f418beb2
--- /dev/null
+++ b/gpu_bdb/queries/q24/gpu_bdb_query_24_dask_sql.py
@@ -0,0 +1,82 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from bdb_tools.cluster_startup import attach_to_cluster
+
+from bdb_tools.utils import (
+    benchmark,
+    gpubdb_argparser,
+    run_query,
+)
+
+from bdb_tools.q24_utils import read_tables
+
+def main(data_dir, client, c, config):
+    benchmark(read_tables, config, c, dask_profile=config["dask_profile"])
+
+    query = """
+		WITH temp_table as 
+		(
+			SELECT
+				i_item_sk, 
+				imp_sk,
+				(imp_competitor_price - i_current_price) / i_current_price AS price_change,
+				imp_start_date, 
+				(imp_end_date - imp_start_date) AS no_days_comp_price
+			FROM item i ,item_marketprices imp 
+			WHERE i.i_item_sk = imp.imp_item_sk
+			AND i.i_item_sk = 10000
+			ORDER BY i_item_sk, imp_sk, imp_start_date
+		)
+		SELECT ws_item_sk,
+		-- avg ( (current_ss_quant + current_ws_quant - prev_ss_quant - prev_ws_quant) / ((prev_ss_quant + prev_ws_quant) * ws.price_change) ) -- single node
+			sum( (current_ss_quant+current_ws_quant-prev_ss_quant-prev_ws_quant) / (prev_ss_quant*ws.price_change+prev_ws_quant*ws.price_change) ) 
+			/ count( (current_ss_quant + current_ws_quant - prev_ss_quant - prev_ws_quant) / ((prev_ss_quant + prev_ws_quant) * ws.price_change) ) AS cross_price_elasticity
+		FROM
+		( 
+			SELECT
+				ws_item_sk,
+				imp_sk,
+				price_change,
+				SUM( CASE WHEN ( (ws_sold_date_sk >= c.imp_start_date) AND (ws_sold_date_sk < (c.imp_start_date + c.no_days_comp_price))) THEN ws_quantity ELSE 0 END ) AS current_ws_quant,
+				SUM( CASE WHEN ( (ws_sold_date_sk >= (c.imp_start_date - c.no_days_comp_price)) AND (ws_sold_date_sk < c.imp_start_date)) THEN ws_quantity ELSE 0 END ) AS prev_ws_quant
+			FROM web_sales ws
+			JOIN temp_table c ON ws.ws_item_sk = c.i_item_sk
+			GROUP BY ws_item_sk, imp_sk, price_change
+		) ws JOIN
+		( 
+			SELECT
+				ss_item_sk,
+				imp_sk,
+				price_change,
+				SUM( CASE WHEN ((ss_sold_date_sk >= c.imp_start_date) AND (ss_sold_date_sk < (c.imp_start_date + c.no_days_comp_price))) THEN ss_quantity ELSE 0 END) AS current_ss_quant,
+				SUM( CASE WHEN ((ss_sold_date_sk >= (c.imp_start_date - c.no_days_comp_price)) AND (ss_sold_date_sk < c.imp_start_date)) THEN ss_quantity ELSE 0 END) AS prev_ss_quant
+			FROM store_sales ss
+			JOIN temp_table c ON c.i_item_sk = ss.ss_item_sk
+			GROUP BY ss_item_sk, imp_sk, price_change
+		) ss
+		ON (ws.ws_item_sk = ss.ss_item_sk and ws.imp_sk = ss.imp_sk)
+		GROUP BY  ws.ws_item_sk
+  	"""
+
+    result = c.sql(query)
+    return result
+
+
+if __name__ == "__main__":
+    config = gpubdb_argparser()
+    client, c = attach_to_cluster(config, create_sql_context=True)
+    run_query(config=config, client=client, query_func=main, sql_context=c)
diff --git a/gpu_bdb/queries/q25/gpu_bdb_query_25.py b/gpu_bdb/queries/q25/gpu_bdb_query_25.py
index 5d88b643..2e0fde62 100755
--- a/gpu_bdb/queries/q25/gpu_bdb_query_25.py
+++ b/gpu_bdb/queries/q25/gpu_bdb_query_25.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,10 +14,9 @@
 # limitations under the License.
 #
 
-import sys
-
 import numpy as np
-from numba import cuda
+
+import dask_cudf
 
 from bdb_tools.utils import (
     benchmark,
@@ -26,40 +25,15 @@
     run_query,
     convert_datestring_to_days,
 )
-from bdb_tools.readers import build_reader
+from bdb_tools.q25_utils import (
+    q25_date,
+    N_CLUSTERS,
+    CLUSTER_ITERATIONS,
+    N_ITER,
+    read_tables
+)
 from dask import delayed
 
-
-# q25 parameters
-Q25_DATE = "2002-01-02"
-N_CLUSTERS = 8
-CLUSTER_ITERATIONS = 20
-N_ITER = 5
-
-
-def read_tables(config):
-    table_reader = build_reader(
-        data_format=config["file_format"],
-        basepath=config["data_dir"],
-        split_row_groups=config["split_row_groups"],
-    )
-
-    ss_cols = ["ss_customer_sk", "ss_sold_date_sk", "ss_ticket_number", "ss_net_paid"]
-    ws_cols = [
-        "ws_bill_customer_sk",
-        "ws_sold_date_sk",
-        "ws_order_number",
-        "ws_net_paid",
-    ]
-    datedim_cols = ["d_date_sk", "d_date"]
-
-    ss_ddf = table_reader.read("store_sales", relevant_cols=ss_cols, index=False)
-    ws_ddf = table_reader.read("web_sales", relevant_cols=ws_cols, index=False)
-    datedim_ddf = table_reader.read("date_dim", relevant_cols=datedim_cols, index=False)
-
-    return (ss_ddf, ws_ddf, datedim_ddf)
-
-
 def agg_count_distinct(df, group_key, counted_key, client):
     """Returns a Series that is the result of counting distinct instances of 'counted_key' within each 'group_key'.
     The series' index will have one entry per unique 'group_key' value.
@@ -77,7 +51,6 @@ def agg_count_distinct(df, group_key, counted_key, client):
 
 
 def get_clusters(client, ml_input_df):
-    import dask_cudf
 
     ml_tasks = [
         delayed(train_clustering_model)(df, N_CLUSTERS, CLUSTER_ITERATIONS, N_ITER)
@@ -100,7 +73,6 @@ def get_clusters(client, ml_input_df):
 
 
 def main(client, config):
-    import dask_cudf
 
     ss_ddf, ws_ddf, datedim_ddf = benchmark(
         read_tables,
@@ -109,7 +81,7 @@ def main(client, config):
         dask_profile=config["dask_profile"],
     )
     datedim_ddf = datedim_ddf.map_partitions(convert_datestring_to_days)
-    min_date = np.datetime64(Q25_DATE, "D").astype(int)
+    min_date = np.datetime64(q25_date, "D").astype(int)
     # Filter by date
     valid_dates_ddf = datedim_ddf[datedim_ddf["d_date"] > min_date].reset_index(
         drop=True
@@ -173,8 +145,6 @@ def main(client, config):
 
 if __name__ == "__main__":
     from bdb_tools.cluster_startup import attach_to_cluster
-    import cudf
-    import dask_cudf
 
     config = gpubdb_argparser()
     client, bc = attach_to_cluster(config)
diff --git a/gpu_bdb/queries/q25/gpu_bdb_query_25_dask_sql.py b/gpu_bdb/queries/q25/gpu_bdb_query_25_dask_sql.py
new file mode 100755
index 00000000..3ae0afad
--- /dev/null
+++ b/gpu_bdb/queries/q25/gpu_bdb_query_25_dask_sql.py
@@ -0,0 +1,185 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from bdb_tools.cluster_startup import attach_to_cluster
+
+from bdb_tools.utils import (
+    benchmark,
+    gpubdb_argparser,
+    run_query,
+    train_clustering_model
+)
+
+from bdb_tools.q25_utils import (
+    q25_date,
+    N_CLUSTERS,
+    CLUSTER_ITERATIONS,
+    N_ITER,
+    read_tables
+)
+
+from dask import delayed
+
+def get_clusters(client, ml_input_df):
+    import dask_cudf
+
+    ml_tasks = [
+        delayed(train_clustering_model)(df, N_CLUSTERS, CLUSTER_ITERATIONS, N_ITER)
+        for df in ml_input_df.to_delayed()
+    ]
+    results_dict = client.compute(*ml_tasks, sync=True)
+
+    output = ml_input_df.index.to_frame().reset_index(drop=True)
+
+    labels_final = dask_cudf.from_cudf(
+        results_dict["cid_labels"], npartitions=output.npartitions
+    )
+    output["label"] = labels_final.reset_index()[0]
+
+    # Based on CDH6.1 q25-result formatting
+    results_dict["cid_labels"] = output
+    return results_dict
+
+
+def agg_count_distinct(df, group_key, counted_key):
+    """Returns a Series that is the result of counting distinct instances of 'counted_key' within each 'group_key'.
+    The series' index will have one entry per unique 'group_key' value.
+    Workaround for lack of nunique aggregate function on Dask df.
+    """
+
+    ### going via repartition for split_out drop duplicates
+    unique_df = df[[group_key, counted_key]].map_partitions(
+        lambda df: df.drop_duplicates()
+    )
+    unique_df = unique_df.shuffle(on=[group_key])
+    unique_df = unique_df.map_partitions(lambda df: df.drop_duplicates())
+
+    unique_df = unique_df.groupby(group_key)[counted_key].count()
+    return unique_df.reset_index(drop=False)
+
+def main(data_dir, client, c, config):
+    benchmark(read_tables, config, c, dask_profile=config["dask_profile"])
+
+    ss_join_query= f"""
+        SELECT
+            ss_customer_sk,
+            ss_sold_date_sk,
+            ss_net_paid,
+            ss_ticket_number
+        FROM
+            store_sales ss
+        JOIN
+            date_dim d ON ss.ss_sold_date_sk = d.d_date_sk
+        WHERE
+            CAST(d.d_date AS DATE) > DATE '{q25_date}'
+        AND
+            ss_customer_sk IS NOT NULL
+    """
+
+
+    ws_join_query = f"""
+        SELECT
+            ws_bill_customer_sk,
+            ws_order_number,
+            ws_sold_date_sk,
+            ws_net_paid
+        FROM
+            web_sales ws
+        JOIN
+            date_dim d ON ws.ws_sold_date_sk = d.d_date_sk
+        WHERE
+            CAST(d.d_date AS DATE) > DATE '{q25_date}'
+        AND
+            ws_bill_customer_sk IS NOT NULL
+    """
+
+    ss_merged_df = c.sql(ss_join_query)
+    ws_merged_df = c.sql(ws_join_query)
+
+    c.create_table('ss_merged_table', ss_merged_df, persist=False)
+    c.create_table('ws_merged_table', ws_merged_df, persist=False)
+
+    ss_agg_query = """
+        SELECT
+            ss_customer_sk AS cid,
+            -- count(distinct ss_ticket_number) AS frequency,  # distinct count groupby OOMS with dask-sql
+            max(ss_sold_date_sk) AS most_recent_date,
+            CAST( SUM(ss_net_paid) AS DOUBLE) AS amount
+        FROM ss_merged_table
+        GROUP BY ss_customer_sk
+        """
+    ws_agg_query= """
+        SELECT
+            ws_bill_customer_sk AS cid,
+            -- count(distinct ws_order_number) AS frequency, # distinct count groupby OOMS with dask-sql
+            max(ws_sold_date_sk)   AS most_recent_date,
+            CAST( SUM(ws_net_paid) AS DOUBLE) AS amount
+        FROM ws_merged_table
+        GROUP BY ws_bill_customer_sk
+        """
+
+    ss_distinct_count_agg = agg_count_distinct(ss_merged_df,'ss_customer_sk','ss_ticket_number')
+    ss_distinct_count_agg = ss_distinct_count_agg.rename(columns={'ss_customer_sk':'cid',
+                                                                  'ss_ticket_number':'frequency'})
+    ss_agg_df = c.sql(ss_agg_query)
+    ### add distinct count
+    ss_agg_df = ss_agg_df.merge(ss_distinct_count_agg)
+
+    ws_distinct_count_agg =  agg_count_distinct(ws_merged_df,'ws_bill_customer_sk','ws_order_number')
+    ws_distinct_count_agg =  ws_distinct_count_agg.rename(columns={'ws_bill_customer_sk':'cid',
+                                                                   'ws_order_number':'frequency'})
+    ws_agg_df = c.sql(ws_agg_query)
+    ### add distinct count
+    ws_agg_df = ws_agg_df.merge(ws_distinct_count_agg)
+
+    c.create_table('ss_agg_df', ss_agg_df, persist=False)
+    c.create_table('ws_agg_df', ws_agg_df, persist=False)
+
+
+    result_query = '''
+            WITH  concat_table AS
+            (
+            SELECT * FROM ss_agg_df
+            UNION ALL
+            SELECT * FROM ws_agg_df
+            )
+            SELECT
+                cid AS cid,
+                CASE WHEN 37621 - max(most_recent_date) < 60 THEN 1.0
+                    ELSE 0.0 END AS recency, -- 37621 == 2003-01-02
+                CAST( SUM(frequency) AS BIGINT) AS frequency, --total frequency
+                CAST( SUM(amount) AS DOUBLE)    AS amount --total amount
+            FROM concat_table
+            GROUP BY cid
+            ORDER BY cid
+            '''
+    cluster_input_ddf = c.sql(result_query)
+
+    # Prepare df for KMeans clustering
+    cluster_input_ddf["recency"] = cluster_input_ddf["recency"].astype("int64")
+
+    cluster_input_ddf = cluster_input_ddf.repartition(npartitions=1)
+    cluster_input_ddf = cluster_input_ddf.persist()
+    cluster_input_ddf = cluster_input_ddf.set_index('cid')
+    results_dict = get_clusters(client=client, ml_input_df=cluster_input_ddf)
+
+    return results_dict
+
+
+if __name__ == "__main__":
+    config = gpubdb_argparser()
+    client, c = attach_to_cluster(config, create_sql_context=True)
+    run_query(config=config, client=client, query_func=main, sql_context=c)
diff --git a/gpu_bdb/queries/q26/gpu_bdb_query_26.py b/gpu_bdb/queries/q26/gpu_bdb_query_26.py
index 82596f0f..fa4b81b3 100755
--- a/gpu_bdb/queries/q26/gpu_bdb_query_26.py
+++ b/gpu_bdb/queries/q26/gpu_bdb_query_26.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,45 +14,23 @@
 # limitations under the License.
 #
 
-import sys
-
-import numpy as np
-from numba import cuda
-
 from bdb_tools.utils import (
     benchmark,
     gpubdb_argparser,
     train_clustering_model,
     run_query,
 )
-from bdb_tools.readers import build_reader
+from bdb_tools.q26_utils import (
+    Q26_CATEGORY,
+    Q26_ITEM_COUNT,
+    N_CLUSTERS,
+    CLUSTER_ITERATIONS,
+    N_ITER,
+    read_tables
+)
+import numpy as np
 from dask import delayed
 
-
-# q26 parameters
-Q26_CATEGORY = "Books"
-Q26_ITEM_COUNT = 5
-N_CLUSTERS = 8
-CLUSTER_ITERATIONS = 20
-N_ITER = 5
-
-
-def read_tables(config):
-    table_reader = build_reader(
-        data_format=config["file_format"],
-        basepath=config["data_dir"],
-        split_row_groups=config["split_row_groups"],
-    )
-
-    ss_cols = ["ss_customer_sk", "ss_item_sk"]
-    items_cols = ["i_item_sk", "i_category", "i_class_id"]
-
-    ss_ddf = table_reader.read("store_sales", relevant_cols=ss_cols, index=False)
-    items_ddf = table_reader.read("item", relevant_cols=items_cols, index=False)
-
-    return (ss_ddf, items_ddf)
-
-
 def agg_count_distinct(df, group_key, counted_key):
     """Returns a Series that is the result of counting distinct instances of 'counted_key' within each 'group_key'.
     The series' index will have one entry per unique 'group_key' value.
@@ -113,10 +91,10 @@ def main(client, config):
 
     # One-Hot-Encode i_class_id
     merged_ddf = merged_ddf.map_partitions(
-        cudf.DataFrame.one_hot_encoding,
-        column="i_class_id",
+        cudf.get_dummies,
+        columns=["i_class_id"],
         prefix="id",
-        cats=[i for i in range(1, 16)],
+        cats={"i_class_id": np.arange(1, 16, dtype="int32")},
         prefix_sep="",
         dtype="float32",
     )
@@ -139,8 +117,6 @@ def main(client, config):
 
 if __name__ == "__main__":
     from bdb_tools.cluster_startup import attach_to_cluster
-    import cudf
-    import dask_cudf
 
     config = gpubdb_argparser()
     client, bc = attach_to_cluster(config)
diff --git a/gpu_bdb/queries/q26/gpu_bdb_query_26_dask_sql.py b/gpu_bdb/queries/q26/gpu_bdb_query_26_dask_sql.py
new file mode 100755
index 00000000..95458d56
--- /dev/null
+++ b/gpu_bdb/queries/q26/gpu_bdb_query_26_dask_sql.py
@@ -0,0 +1,102 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from bdb_tools.cluster_startup import attach_to_cluster
+
+from bdb_tools.utils import (
+    benchmark,
+    gpubdb_argparser,
+    run_query,
+    train_clustering_model
+)
+
+from bdb_tools.q26_utils import (
+    Q26_CATEGORY,
+    Q26_ITEM_COUNT,
+    N_CLUSTERS,
+    CLUSTER_ITERATIONS,
+    N_ITER,
+    read_tables
+)
+
+from dask import delayed
+
+def get_clusters(client, kmeans_input_df):
+    import dask_cudf
+
+    ml_tasks = [
+        delayed(train_clustering_model)(df, N_CLUSTERS, CLUSTER_ITERATIONS, N_ITER)
+        for df in kmeans_input_df.to_delayed()
+    ]
+
+    results_dict = client.compute(*ml_tasks, sync=True)
+
+    output = kmeans_input_df.index.to_frame().reset_index(drop=True)
+
+    labels_final = dask_cudf.from_cudf(
+        results_dict["cid_labels"], npartitions=output.npartitions
+    )
+    output["label"] = labels_final.reset_index()[0]
+
+    # Based on CDH6.1 q26-result formatting
+    results_dict["cid_labels"] = output
+    return results_dict
+
+
+def main(data_dir, client, c, config):
+    benchmark(read_tables, config, c, dask_profile=config["dask_profile"])
+
+    query = f"""
+        SELECT
+            ss.ss_customer_sk AS cid,
+            CAST( count(CASE WHEN i.i_class_id=1  THEN 1 ELSE NULL END) AS DOUBLE ) AS id1,
+			CAST( count(CASE WHEN i.i_class_id=2  THEN 1 ELSE NULL END) AS DOUBLE ) AS id2,
+			CAST( count(CASE WHEN i.i_class_id=3  THEN 1 ELSE NULL END) AS DOUBLE ) AS id3,
+			CAST( count(CASE WHEN i.i_class_id=4  THEN 1 ELSE NULL END) AS DOUBLE ) AS id4,
+			CAST( count(CASE WHEN i.i_class_id=5  THEN 1 ELSE NULL END) AS DOUBLE ) AS id5,
+			CAST( count(CASE WHEN i.i_class_id=6  THEN 1 ELSE NULL END) AS DOUBLE ) AS id6,
+			CAST( count(CASE WHEN i.i_class_id=7  THEN 1 ELSE NULL END) AS DOUBLE ) AS id7,
+			CAST( count(CASE WHEN i.i_class_id=8  THEN 1 ELSE NULL END) AS DOUBLE ) AS id8,
+			CAST( count(CASE WHEN i.i_class_id=9  THEN 1 ELSE NULL END) AS DOUBLE ) AS id9,
+			CAST( count(CASE WHEN i.i_class_id=10 THEN 1 ELSE NULL END) AS DOUBLE ) AS id10,
+			CAST( count(CASE WHEN i.i_class_id=11 THEN 1 ELSE NULL END) AS DOUBLE ) AS id11,
+			CAST( count(CASE WHEN i.i_class_id=12 THEN 1 ELSE NULL END) AS DOUBLE ) AS id12,
+			CAST( count(CASE WHEN i.i_class_id=13 THEN 1 ELSE NULL END) AS DOUBLE ) AS id13,
+			CAST( count(CASE WHEN i.i_class_id=14 THEN 1 ELSE NULL END) AS DOUBLE ) AS id14,
+			CAST( count(CASE WHEN i.i_class_id=15 THEN 1 ELSE NULL END) AS DOUBLE ) AS id15
+        FROM store_sales ss
+        INNER JOIN item i
+        ON
+        (
+            ss.ss_item_sk = i.i_item_sk
+            AND i.i_category IN ('{Q26_CATEGORY}')
+            AND ss.ss_customer_sk IS NOT NULL
+        )
+        GROUP BY ss.ss_customer_sk
+        HAVING count(ss.ss_item_sk) > {Q26_ITEM_COUNT}
+        ORDER BY cid
+    """
+    result = c.sql(query)
+    result = result.repartition(npartitions=1)
+    result_ml = result.set_index('cid')
+    ml_result_dict = get_clusters(client=client, kmeans_input_df=result_ml)
+    return ml_result_dict
+
+
+if __name__ == "__main__":
+    config = gpubdb_argparser()
+    client, c = attach_to_cluster(config, create_sql_context=True)
+    run_query(config=config, client=client, query_func=main, sql_context=c)
diff --git a/gpu_bdb/queries/q27/gpu_bdb_query_27.py b/gpu_bdb/queries/q27/gpu_bdb_query_27.py
index 25eb247e..0634d2a8 100755
--- a/gpu_bdb/queries/q27/gpu_bdb_query_27.py
+++ b/gpu_bdb/queries/q27/gpu_bdb_query_27.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,63 +14,29 @@
 # limitations under the License.
 #
 
-import sys
-import time
-import argparse
-
-import spacy
-import rmm
-import cupy as cp
-import distributed
+import dask_cudf
 
 from bdb_tools.utils import (
     benchmark,
     gpubdb_argparser,
-    left_semi_join,
-    run_query,
+    run_query
 )
 
-from bdb_tools.text import create_sentences_from_reviews, create_words_from_sentences
-from bdb_tools.readers import build_reader
-from dask_cuda import LocalCUDACluster
-from dask.distributed import Client, wait
-
-
-# -------- Q27 -----------
-q27_pr_item_sk = 10002
-EOL_CHAR = "."
-
-
-def read_tables(config):
-    ### splitting by row groups for better parallelism
-    table_reader = build_reader(
-        data_format=config["file_format"],
-        basepath=config["data_dir"],
-        split_row_groups=True,
-    )
-    product_reviews_cols = ["pr_item_sk", "pr_review_content", "pr_review_sk"]
-    product_reviews_df = table_reader.read(
-        "product_reviews", relevant_cols=product_reviews_cols
-    )
-    return product_reviews_df
-
+from bdb_tools.text import (
+    create_sentences_from_reviews,
+    create_words_from_sentences
+)
 
-def ner_parser(df, col_string, batch_size=256):
-    spacy.require_gpu()
-    nlp = spacy.load("en_core_web_sm")
-    docs = nlp.pipe(df[col_string], disable=["tagger", "parser"], batch_size=batch_size)
-    out = []
-    for doc in docs:
-        l = [ent.text for ent in doc.ents if ent.label_ == "ORG"]
-        val = ", "
-        l = val.join(l)
-        out.append(l)
-    df["company_name_list"] = out
-    return df
+from bdb_tools.q27_utils import (
+    ner_parser,
+    q27_pr_item_sk,
+    EOL_CHAR,
+    read_tables
+)
 
+from dask.distributed import wait
 
 def main(client, config):
-    import dask_cudf
 
     product_reviews_df = benchmark(
         read_tables,
@@ -138,8 +104,6 @@ def main(client, config):
 
 if __name__ == "__main__":
     from bdb_tools.cluster_startup import attach_to_cluster
-    import cudf
-    import dask_cudf
 
     config = gpubdb_argparser()
     client, bc = attach_to_cluster(config)
diff --git a/gpu_bdb/queries/q27/gpu_bdb_query_27_dask_sql.py b/gpu_bdb/queries/q27/gpu_bdb_query_27_dask_sql.py
new file mode 100755
index 00000000..50e27a5a
--- /dev/null
+++ b/gpu_bdb/queries/q27/gpu_bdb_query_27_dask_sql.py
@@ -0,0 +1,111 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from bdb_tools.text import (
+    create_sentences_from_reviews,
+    create_words_from_sentences
+)
+
+from bdb_tools.cluster_startup import attach_to_cluster
+from dask.distributed import wait
+
+from bdb_tools.utils import (
+    benchmark,
+    gpubdb_argparser,
+    run_query,
+)
+
+from bdb_tools.q27_utils import (
+    ner_parser,
+    q27_pr_item_sk,
+    EOL_CHAR,
+    read_tables
+)
+
+def main(data_dir, client, c, config):
+    benchmark(read_tables, config, c, dask_profile=config["dask_profile"])
+
+    import dask_cudf
+
+    query = f"""
+        SELECT pr_review_sk, pr_item_sk, pr_review_content
+        FROM product_reviews
+        WHERE pr_item_sk = {q27_pr_item_sk}
+    """
+    product_reviews_df = c.sql(query)
+
+    sentences = product_reviews_df.map_partitions(
+        create_sentences_from_reviews,
+        review_column="pr_review_content",
+        end_of_line_char=EOL_CHAR,
+    )
+
+    # need the global position in the sentence tokenized df
+    sentences["x"] = 1
+    sentences["sentence_tokenized_global_pos"] = sentences.x.cumsum()
+    del sentences["x"]
+    del product_reviews_df
+
+    # Do the NER
+    sentences = sentences.to_dask_dataframe()
+    ner_parsed = sentences.map_partitions(ner_parser, "sentence")
+    ner_parsed = dask_cudf.from_dask_dataframe(ner_parsed)
+    ner_parsed = ner_parsed.persist()
+    wait(ner_parsed)
+
+    ner_parsed = ner_parsed[ner_parsed.company_name_list != ""]
+
+    # separate NER results into one row per found company
+    repeated_names = ner_parsed.map_partitions(
+        create_words_from_sentences,
+        sentence_column="company_name_list",
+        global_position_column="sentence_tokenized_global_pos",
+        delimiter="é",
+    )
+    del sentences
+
+    # recombine
+    repeated_names = repeated_names.persist()
+    wait(repeated_names)
+    c.create_table('repeated_names', repeated_names, persist=False)
+
+    ner_parsed = ner_parsed.persist()
+    wait(ner_parsed)
+    c.create_table('ner_parsed', ner_parsed, persist=False)
+
+    query = f"""
+        SELECT review_idx_global_pos as review_sk,
+            CAST({q27_pr_item_sk} AS BIGINT) as item_sk,
+            word as company_name,
+            sentence as review_sentence
+        FROM repeated_names left join ner_parsed
+        ON sentence_idx_global_pos = sentence_tokenized_global_pos
+        ORDER BY review_idx_global_pos, item_sk, word, sentence
+    """
+    recombined = c.sql(query)
+
+    c.drop_table("repeated_names")
+    c.drop_table("ner_parsed")
+    del ner_parsed
+    del repeated_names
+    return recombined
+
+
+if __name__ == "__main__":
+    config = gpubdb_argparser()
+    client, c = attach_to_cluster(config, create_sql_context=True)
+    run_query(config=config, client=client, query_func=main, sql_context=c)
+
diff --git a/gpu_bdb/queries/q28/gpu_bdb_query_28.py b/gpu_bdb/queries/q28/gpu_bdb_query_28.py
index 8ecdf712..281c84ac 100755
--- a/gpu_bdb/queries/q28/gpu_bdb_query_28.py
+++ b/gpu_bdb/queries/q28/gpu_bdb_query_28.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -15,311 +15,22 @@
 #
 
 import cupy
-import dask
 
-import distributed
-import numpy as np
-import time
 import cupy as cp
 import copyreg
-import sys, os
-import traceback
-
-from distributed import wait
-from cuml.feature_extraction.text import HashingVectorizer
 
 from bdb_tools.utils import (
     benchmark,
     gpubdb_argparser,
     run_query,
 )
-from bdb_tools.readers import build_reader
-
-
-QUERY_NUM = os.getcwd().split("/")[-1][1:]
-
-N_FEATURES = 2 ** 23  # Spark is doing 2^20
-ngram_range = (1, 2)
-preprocessor = lambda s:s.str.lower()
-norm = None
-alternate_sign = False
-
-
-def gpu_hashing_vectorizer(x):
-    vec = HashingVectorizer(n_features=N_FEATURES,
-                            alternate_sign=alternate_sign,
-                            ngram_range=ngram_range,
-                            norm=norm,
-                            preprocessor=preprocessor
-     )
-    return vec.fit_transform(x)
-
-
-def map_labels(ser):
-    import cudf
-    output_ser = cudf.Series(cudf.core.column.full(size=len(ser), fill_value=2, dtype=np.int32))
-    zero_flag = (ser==1) | (ser==2)
-    output_ser.loc[zero_flag]=0
-
-    three_flag = (ser==3)
-    output_ser.loc[three_flag]=1
-
-    return output_ser
-
-
-def build_features(t):
-    X = t["pr_review_content"]
-    X = X.map_partitions(
-        gpu_hashing_vectorizer,
-        meta=dask.array.from_array(
-            cupy.sparse.csr_matrix(cupy.zeros(1, dtype=cp.float32))
-        ),
-    )
-
-    X = X.astype(np.float32).persist()
-    X.compute_chunk_sizes()
-
-    return X
-
-
-def build_labels(reviews_df):
-    y = reviews_df["pr_review_rating"].map_partitions(map_labels)
-    y = y.map_partitions(lambda x: cupy.asarray(x, cupy.int32)).persist()
-    y.compute_chunk_sizes()
-
-    return y
-
-
-def read_tables(config):
-    ### splitting by row groups for better parallelism
-    table_reader = build_reader(
-        data_format=config["file_format"],
-        basepath=config["data_dir"],
-        split_row_groups=True,
-    )
-
-    columns = [
-        "pr_review_content",
-        "pr_review_rating",
-        "pr_review_sk",
-    ]
-    ret = table_reader.read("product_reviews", relevant_cols=columns)
-    return ret
-
-
-def categoricalize(num_sr):
-    return num_sr.astype("str").str.replace(["0", "1", "2"], ["NEG", "NEUT", "POS"])
-
-
-def sum_tp_fp(y_y_pred, nclasses):
-
-    y, y_pred = y_y_pred
-    res = cp.zeros((nclasses, 2), order="F")
-
-    for i in range(nclasses):
-        pos_pred_ix = cp.where(y_pred == i)[0]
-
-        # short circuit
-        if len(pos_pred_ix) == 0:
-            res[i] = 0
-            break
-
-        tp_sum = (y_pred[pos_pred_ix] == y[pos_pred_ix]).sum()
-        fp_sum = (y_pred[pos_pred_ix] != y[pos_pred_ix]).sum()
-        res[i][0] = tp_sum
-        res[i][1] = fp_sum
-    return res
-
-
-def precision_score(client, y, y_pred, average="binary"):
-    from cuml.dask.common.input_utils import DistributedDataHandler
-
-    nclasses = len(cp.unique(y.map_blocks(lambda x: cp.unique(x)).compute()))
-
-    if average == "binary" and nclasses > 2:
-        raise ValueError
-
-    if nclasses < 2:
-        raise ValueError("Single class precision is not yet supported")
-
-    ddh = DistributedDataHandler.create([y, y_pred])
-
-    precision_scores = client.compute(
-        [
-            client.submit(sum_tp_fp, part, nclasses, workers=[worker])
-            for worker, part in ddh.gpu_futures
-        ],
-        sync=True,
-    )
-
-    res = cp.zeros((nclasses, 2), order="F")
-
-    for i in precision_scores:
-        res += i
-
-    if average == "binary" or average == "macro":
-
-        prec = cp.zeros(nclasses)
-        for i in range(nclasses):
-            tp_sum, fp_sum = res[i]
-            prec[i] = (tp_sum / (tp_sum + fp_sum)).item()
-
-        if average == "binary":
-            return prec[nclasses - 1].item()
-        else:
-            return prec.mean().item()
-    else:
-        global_tp = cp.sum(res[:, 0])
-        global_fp = cp.sum(res[:, 1])
-
-        return global_tp / (global_tp + global_fp).item()
-
-
-def local_cm(y_y_pred, unique_labels, sample_weight):
-
-    y_true, y_pred = y_y_pred
-    labels = unique_labels
-
-    n_labels = labels.size
-
-    # Assume labels are monotonically increasing for now.
-
-    # intersect y_pred, y_true with labels, eliminate items not in labels
-    ind = cp.logical_and(y_pred < n_labels, y_true < n_labels)
-    y_pred = y_pred[ind]
-    y_true = y_true[ind]
-
-    if sample_weight is None:
-        sample_weight = cp.ones(y_true.shape[0], dtype=np.int64)
-    else:
-        sample_weight = cp.asarray(sample_weight)
-
-    sample_weight = sample_weight[ind]
-
-    cm = cp.sparse.coo_matrix(
-        (sample_weight, (y_true, y_pred)), shape=(n_labels, n_labels), dtype=cp.float32,
-    ).toarray()
-
-    return cp.nan_to_num(cm)
-
-
-def confusion_matrix(client, y_true, y_pred, normalize=None, sample_weight=None):
-    from cuml.dask.common.input_utils import DistributedDataHandler
-
-    unique_classes = cp.unique(y_true.map_blocks(lambda x: cp.unique(x)).compute())
-    nclasses = len(unique_classes)
-
-    ddh = DistributedDataHandler.create([y_true, y_pred])
-
-    cms = client.compute(
-        [
-            client.submit(
-                local_cm, part, unique_classes, sample_weight, workers=[worker]
-            )
-            for worker, part in ddh.gpu_futures
-        ],
-        sync=True,
-    )
-
-    cm = cp.zeros((nclasses, nclasses))
-    for i in cms:
-        cm += i
-
-    with np.errstate(all="ignore"):
-        if normalize == "true":
-            cm = cm / cm.sum(axis=1, keepdims=True)
-        elif normalize == "pred":
-            cm = cm / cm.sum(axis=0, keepdims=True)
-        elif normalize == "all":
-            cm = cm / cm.sum()
-        cm = cp.nan_to_num(cm)
-
-    return cm
-
-
-def accuracy_score(client, y, y_hat):
-    from uuid import uuid1
-    from cuml.dask.common.input_utils import DistributedDataHandler
-
-    ddh = DistributedDataHandler.create([y_hat, y])
-
-    def _count_accurate_predictions(y_hat_y):
-        y_hat, y = y_hat_y
-        y_hat = cp.asarray(y_hat, dtype=y_hat.dtype)
-        y = cp.asarray(y, dtype=y.dtype)
-        return y.shape[0] - cp.count_nonzero(y - y_hat)
-
-    key = uuid1()
-
-    futures = client.compute(
-        [
-            client.submit(
-                _count_accurate_predictions,
-                worker_future[1],
-                workers=[worker_future[0]],
-                key="%s-%s" % (key, idx),
-            )
-            for idx, worker_future in enumerate(ddh.gpu_futures)
-        ],
-        sync=True,
-    )
-
-    return sum(futures) / y.shape[0]
-
-
-def post_etl_processing(client, train_data, test_data):
-    import cudf
-    from cuml.dask.naive_bayes import MultinomialNB as DistMNB
-    from cuml.dask.common import to_dask_cudf
-    from cuml.dask.common.input_utils import DistributedDataHandler
-
-    # Feature engineering
-    X_train = build_features(train_data)
-    X_test = build_features(test_data)
-
-    y_train = build_labels(train_data)
-    y_test = build_labels(test_data)
-
-    # Perform ML
-    model = DistMNB(client=client, alpha=0.001)
-    model.fit(X_train, y_train)
-
-    ### this regression seems to be coming from here
-    test_pred_st = time.time()
-    y_hat = model.predict(X_test).persist()
-
-    # Compute distributed performance metrics
-    acc = accuracy_score(client, y_test, y_hat)
-
-    print("Accuracy: " + str(acc))
-    prec = precision_score(client, y_test, y_hat, average="macro")
-
-    print("Precision: " + str(prec))
-    cmat = confusion_matrix(client, y_test, y_hat)
-
-    print("Confusion Matrix: " + str(cmat))
-    metric_et = time.time()
-
-    # Place results back in original Dataframe
-
-    ddh = DistributedDataHandler.create(y_hat)
-    test_preds = to_dask_cudf(
-        [client.submit(cudf.Series, part) for w, part in ddh.gpu_futures]
-    )
-
-    test_preds = test_preds.map_partitions(categoricalize)
-
-    test_data["prediction"] = test_preds
-
-    final_data = test_data[["pr_review_sk", "pr_review_rating", "prediction"]].persist()
-
-    final_data = final_data.sort_values("pr_review_sk").reset_index(drop=True)
-    wait(final_data)
-    return final_data, acc, prec, cmat
 
+from bdb_tools.q28_utils import (
+    post_etl_processing,
+    read_tables
+)
 
 def main(client, config):
-    q_st = time.time()
     product_reviews_df = benchmark(
         read_tables,
         config=config,
@@ -360,11 +71,6 @@ def serialize_mat_descriptor(m):
 if __name__ == "__main__":
     from bdb_tools.cluster_startup import attach_to_cluster
 
-    import cudf
-    from cuml.dask.naive_bayes import MultinomialNB as DistMNB
-    from cuml.dask.common.input_utils import DistributedDataHandler
-    from cuml.dask.common import to_dask_cudf
-
     config = gpubdb_argparser()
     client, bc = attach_to_cluster(config)
     run_query(config=config, client=client, query_func=main)
diff --git a/gpu_bdb/queries/q28/gpu_bdb_query_28_dask_sql.py b/gpu_bdb/queries/q28/gpu_bdb_query_28_dask_sql.py
new file mode 100755
index 00000000..aa6c5e76
--- /dev/null
+++ b/gpu_bdb/queries/q28/gpu_bdb_query_28_dask_sql.py
@@ -0,0 +1,77 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from bdb_tools.cluster_startup import attach_to_cluster
+
+from bdb_tools.utils import (
+    benchmark,
+    gpubdb_argparser,
+    run_query,
+)
+
+from bdb_tools.q28_utils import (
+    post_etl_processing,
+    read_tables
+)
+
+def main(data_dir, client, c, config):
+    benchmark(read_tables, config, c, dask_profile=config["dask_profile"])
+
+    # 10 % of data
+    query1 = """
+        SELECT
+            pr_review_sk,
+            pr_review_rating,
+            pr_review_content
+        FROM product_reviews
+        WHERE mod(pr_review_sk, 10) IN (0)
+        AND pr_review_content IS NOT NULL
+        ORDER BY pr_review_sk
+    """
+    test_data = c.sql(query1)
+
+    # 90 % of data
+    query2 = """
+        SELECT
+            pr_review_sk,
+            pr_review_rating,
+            pr_review_content
+        FROM product_reviews
+        WHERE mod(pr_review_sk, 10) IN (1,2,3,4,5,6,7,8,9)
+        AND pr_review_content IS NOT NULL
+        ORDER BY pr_review_sk
+    """
+    train_data = c.sql(query2)
+
+    final_data, acc, prec, cmat = post_etl_processing(
+        client=client, train_data=train_data, test_data=test_data
+    )
+
+    payload = {
+        "df": final_data,
+        "acc": acc,
+        "prec": prec,
+        "cmat": cmat,
+        "output_type": "supervised",
+    }
+
+    return payload
+
+
+if __name__ == "__main__":
+    config = gpubdb_argparser()
+    client, c = attach_to_cluster(config, create_sql_context=True)
+    run_query(config=config, client=client, query_func=main, sql_context=c)
diff --git a/gpu_bdb/queries/q29/gpu_bdb_query_29.py b/gpu_bdb/queries/q29/gpu_bdb_query_29.py
index 88ead76f..7dc8c29c 100755
--- a/gpu_bdb/queries/q29/gpu_bdb_query_29.py
+++ b/gpu_bdb/queries/q29/gpu_bdb_query_29.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,16 +14,15 @@
 # limitations under the License.
 #
 
-import sys
-
 from bdb_tools.utils import (
     benchmark,
     gpubdb_argparser,
     run_query,
 )
-from bdb_tools.readers import build_reader
-from bdb_tools.utils import benchmark
-from distributed import wait
+from bdb_tools.q29_utils import (
+    q29_limit,
+    read_tables
+)
 
 ### Implementation Notes:
 # * `drop_duplicates` and `groupby` by default brings result to single partition
@@ -39,25 +38,8 @@
 ### Scalabilty problems
 # * The ws_item_join table after distincts has `48M` rows, can cause problems on bigger scale factors
 
-
-# -------- Q29 -----------
-q29_limit = 100
 q29_session_timeout_inSec = 3600
 
-
-def read_tables(config):
-    table_reader = build_reader(
-        data_format=config["file_format"], basepath=config["data_dir"],
-    )
-    item_cols = ["i_item_sk", "i_category_id"]
-    item_df = table_reader.read("item", relevant_cols=item_cols)
-
-    ws_cols = ["ws_order_number", "ws_item_sk"]
-    ws_df = table_reader.read("web_sales", relevant_cols=ws_cols)
-
-    return item_df, ws_df
-
-
 ###
 # Select t1.i_category_id AS category_id_1 , t2.i_category_id AS category_id_2
 #  FROM (
@@ -149,8 +131,6 @@ def main(client, config):
 
 if __name__ == "__main__":
     from bdb_tools.cluster_startup import attach_to_cluster
-    import cudf
-    import dask_cudf
 
     config = gpubdb_argparser()
     client, bc = attach_to_cluster(config)
diff --git a/gpu_bdb/queries/q29/gpu_bdb_query_29_dask_sql.py b/gpu_bdb/queries/q29/gpu_bdb_query_29_dask_sql.py
new file mode 100755
index 00000000..cb34a5e9
--- /dev/null
+++ b/gpu_bdb/queries/q29/gpu_bdb_query_29_dask_sql.py
@@ -0,0 +1,84 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from bdb_tools.cluster_startup import attach_to_cluster
+
+from bdb_tools.utils import (
+    benchmark,
+    gpubdb_argparser,
+    run_query,
+)
+
+from bdb_tools.q29_utils import (
+    q29_limit,
+    read_tables
+)
+
+from dask.distributed import wait
+
+def main(data_dir, client, c, config):
+    benchmark(read_tables, config, c, dask_profile=config["dask_profile"])
+    n_workers = len(client.scheduler_info()["workers"])
+
+    join_query = """
+        -- Removed distinct as we do it in
+        -- dask_cudf based drop_duplicates with split_out
+        -- 553 M rows dont fit on single GPU (int32,int64 column)
+        -- TODO: Remove when we support Split Out
+        -- https://github.com/dask-contrib/dask-sql/issues/241
+
+        SELECT  i_category_id, ws_order_number
+        FROM web_sales ws, item i
+        WHERE ws.ws_item_sk = i.i_item_sk
+        AND i.i_category_id IS NOT NULL
+    """
+    result = c.sql(join_query)
+    
+    # Distinct Calculatiin
+    result_distinct = result.drop_duplicates(split_out=n_workers,ignore_index=True)
+    ## Remove the int64 index that was created
+    ## TODO Raise a issue for this
+    result_distinct = result_distinct.reset_index(drop=True)
+    ### Persiting cause Order by causes execution
+    c.create_table('distinct_table', result_distinct, persist=True)
+
+    query = f"""
+        SELECT category_id_1, category_id_2, COUNT (*) AS cnt
+        FROM
+        (
+            SELECT CAST(t1.i_category_id as BIGINT) AS category_id_1,
+                CAST(t2.i_category_id as BIGINT) AS category_id_2
+            FROM distinct_table t1
+            INNER JOIN distinct_table t2
+            ON t1.ws_order_number = t2.ws_order_number
+            WHERE t1.i_category_id < t2.i_category_id
+        )
+        GROUP BY category_id_1, category_id_2
+        ORDER BY cnt DESC, category_id_1, category_id_2
+        LIMIT {q29_limit}
+    """
+    result = c.sql(query)
+    result = result.persist()
+    wait(result);
+
+    c.drop_table("distinct_table")
+    return result
+
+
+if __name__ == "__main__":
+    config = gpubdb_argparser()
+    client, c = attach_to_cluster(config, create_sql_context=True)
+    run_query(config=config, client=client, query_func=main, sql_context=c)
diff --git a/gpu_bdb/queries/q30/gpu_bdb_query_30.py b/gpu_bdb/queries/q30/gpu_bdb_query_30.py
index 5f9eaac5..1f37f718 100755
--- a/gpu_bdb/queries/q30/gpu_bdb_query_30.py
+++ b/gpu_bdb/queries/q30/gpu_bdb_query_30.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,17 +14,23 @@
 # limitations under the License.
 #
 
-import sys
 import glob
 import os
 
+import cudf
+import dask_cudf
+
 from bdb_tools.utils import (
     benchmark,
     gpubdb_argparser,
     run_query,
 )
-from bdb_tools.readers import build_reader
-from bdb_tools.sessionization import get_session_id, get_distinct_sessions, get_pairs
+from bdb_tools.q30_utils import (
+    q30_session_timeout_inSec,
+    q30_limit,
+    read_tables
+)
+from bdb_tools.sessionization import get_distinct_sessions, get_pairs
 
 from dask import delayed
 import numpy as np
@@ -35,30 +41,10 @@
 # The bottleneck of current implementation is `set-index`, once ucx is working correctly
 # it should go away
 
-
-### session timeout in secs
-q30_session_timeout_inSec = 3600
-### query output limit
-q30_limit = 40
-
-
-def read_tables(config):
-    table_reader = build_reader(
-        data_format=config["file_format"],
-        basepath=config["data_dir"],
-        split_row_groups=config["split_row_groups"],
-    )
-
-    item_cols = ["i_category_id", "i_item_sk"]
-    item_df = table_reader.read("item", relevant_cols=item_cols)
-    return item_df
-
-
 def pre_repartition_task(wcs_fn, f_item_df):
     """
         Runs the pre-repartition task
     """
-    import cudf
 
     wcs_cols = ["wcs_user_sk", "wcs_item_sk", "wcs_click_date_sk", "wcs_click_time_sk"]
     wcs_df = cudf.read_parquet(wcs_fn, columns=wcs_cols)
@@ -80,8 +66,6 @@ def pre_repartition_task(wcs_fn, f_item_df):
 
 
 def main(client, config):
-    import dask_cudf
-    import cudf
 
     item_df = benchmark(
         read_tables,
@@ -163,8 +147,6 @@ def main(client, config):
 
 if __name__ == "__main__":
     from bdb_tools.cluster_startup import attach_to_cluster
-    import cudf
-    import dask_cudf
 
     config = gpubdb_argparser()
     client, bc = attach_to_cluster(config)
diff --git a/gpu_bdb/queries/q30/gpu_bdb_query_30_dask_sql.py b/gpu_bdb/queries/q30/gpu_bdb_query_30_dask_sql.py
new file mode 100755
index 00000000..d7ca3868
--- /dev/null
+++ b/gpu_bdb/queries/q30/gpu_bdb_query_30_dask_sql.py
@@ -0,0 +1,99 @@
+#
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from bdb_tools.cluster_startup import attach_to_cluster
+
+from bdb_tools.utils import (
+    benchmark,
+    gpubdb_argparser,
+    run_query,
+)
+
+from bdb_tools.sessionization import (
+    get_distinct_sessions,
+    get_pairs
+)
+
+from bdb_tools.q30_utils import (
+    q30_session_timeout_inSec,
+    q30_limit,
+    read_tables
+)
+
+from dask.distributed import wait
+
+def main(data_dir, client, c, config):
+    benchmark(read_tables, config, c, dask_profile=config["dask_profile"])
+
+    query_1 = """
+        SELECT i_item_sk,
+            CAST(i_category_id AS TINYINT) AS i_category_id
+        FROM item
+    """
+    item_df = c.sql(query_1)
+
+    item_df = item_df.persist()
+    wait(item_df)
+    c.create_table("item_df", item_df, persist=False)
+
+    query_2 = """
+        SELECT wcs_user_sk,
+            (wcs_click_date_sk * 86400 + wcs_click_time_sk) AS tstamp_inSec,
+            i_category_id
+        FROM web_clickstreams wcs, item_df i
+        WHERE wcs.wcs_item_sk = i.i_item_sk
+        AND i.i_category_id IS NOT NULL
+        AND wcs.wcs_user_sk IS NOT NULL
+        DISTRIBUTE BY wcs_user_sk
+    """
+    merged_df = c.sql(query_2)
+
+    c.drop_table("item_df")
+    del item_df
+
+    distinct_session_df = merged_df.map_partitions(get_distinct_sessions,
+            keep_cols=["wcs_user_sk", "i_category_id"],
+            time_out=q30_session_timeout_inSec)
+
+    del merged_df
+    pair_df = distinct_session_df.map_partitions(
+        get_pairs,
+        pair_col="i_category_id",
+        output_col_1="category_id_1",
+        output_col_2="category_id_2")
+    del distinct_session_df
+
+    c.create_table('pair_df', pair_df, persist=False)
+
+    last_query = f"""
+        SELECT CAST(category_id_1 AS BIGINT) AS category_id_1,
+            CAST(category_id_2 AS BIGINT) AS category_id_2,
+            COUNT(category_id_2) AS cnt
+        FROM pair_df
+        GROUP BY category_id_1, category_id_2
+        ORDER BY cnt desc
+        LIMIT {q30_limit}
+    """
+    result = c.sql(last_query)
+
+    c.drop_table("pair_df")
+    return result
+
+
+if __name__ == "__main__":
+    config = gpubdb_argparser()
+    client, c = attach_to_cluster(config, create_sql_context=True)
+    run_query(config=config, client=client, query_func=main, sql_context=c)