From fceea831cf4df7d7a9ab6f9171a8475ee8d5542d Mon Sep 17 00:00:00 2001 From: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> Date: Fri, 10 Nov 2023 14:29:18 +0000 Subject: [PATCH 1/9] Rewrite ClickHouse scripts in reusable way. --- clickhouse/ch.sh | 28 ++++ clickhouse/exec.sh | 27 ++-- clickhouse/groupby-clickhouse.sh | 75 ++++++++++ clickhouse/groupby-clickhouse.sql.in | 198 --------------------------- clickhouse/join-clickhouse.sh | 39 ++++++ clickhouse/join-clickhouse.sql.in | 107 --------------- clickhouse/setup-clickhouse.sh | 19 +-- 7 files changed, 155 insertions(+), 338 deletions(-) create mode 100755 clickhouse/groupby-clickhouse.sh delete mode 100644 clickhouse/groupby-clickhouse.sql.in create mode 100755 clickhouse/join-clickhouse.sh delete mode 100644 clickhouse/join-clickhouse.sql.in diff --git a/clickhouse/ch.sh b/clickhouse/ch.sh index 03723264..8083067f 100644 --- a/clickhouse/ch.sh +++ b/clickhouse/ch.sh @@ -14,3 +14,31 @@ ch_active() { local ret=$?; if [[ $ret -eq 0 ]]; then return 0; elif [[ $ret -eq 210 ]]; then return 1; else echo "Unexpected return code from clickhouse-client: $ret" >&2 && return 1; fi; } + +ch_query() { + clickhouse-client --query "DROP TABLE IF EXISTS ans;" + clickhouse-client --log_comment ${RUNNAME} --query "CREATE TABLE ans ENGINE = Memory AS ${QUERY};" + local ret=$?; + if [[ $ret -eq 0 ]]; then return 0; elif [[ $ret -eq 210 ]]; then return 1; else echo "Unexpected return code from clickhouse-client: $ret" >&2 && return 1; fi; + clickhouse-client --query "SELECT * FROM ans LIMIT 3;" + clickhouse-client --query "DROP TABLE ans;" +} + +ch_logrun() { + clickhouse-client --query "SYSTEM FLUSH LOGS;" + clickhouse-client --query "SELECT ${RUN} AS run, toUnixTimestamp(now()) AS timestamp, '${TASK}' AS task, '${SRC_DATANAME}' AS data_name, NULL AS in_rows, '${QUESTION}' AS question, result_rows AS out_rows, NULL AS out_cols, 'clickhouse' AS solution, version() AS version, NULL AS git, 'select group by' AS fun, query_duration_ms/1000 AS time_sec, memory_usage/1073741824 AS mem_gb, 1 AS cache, NULL AS chk, NULL AS chk_time_sec, 1 AS on_disk FROM system.query_log WHERE type='QueryFinish' AND log_comment='${RUNNAME}' ORDER BY query_start_time DESC LIMIT 1 FORMAT CSVWithNames;" > clickhouse/log/${RUNNAME}.csv + local ret=$?; + if [[ $ret -eq 0 ]]; then return 0; elif [[ $ret -eq 210 ]]; then return 1; else echo "Unexpected return code from clickhouse-client: $ret" >&2 && return 1; fi; +} + +ch_make_2_runs() { + RUN=1 + RUNNAME="${TASK}_${SRC_DATANAME}_q${Q}_r${RUN}" + ch_query + ch_logrun + + RUN=2 + RUNNAME="${TASK}_${SRC_DATANAME}_q${Q}_r${RUN}" + ch_query + ch_logrun +} \ No newline at end of file diff --git a/clickhouse/exec.sh b/clickhouse/exec.sh index 1b697c12..95e50d10 100755 --- a/clickhouse/exec.sh +++ b/clickhouse/exec.sh @@ -1,5 +1,6 @@ #!/bin/bash set -e +set -x # use this function to check error logs # sudo -u clickhouse clickhouse-server --config=/etc/clickhouse-server/config.xml @@ -24,38 +25,35 @@ ch_active || exit 1 # tail -n+2 data/G1_1e7_1e2_0_0.csv | clickhouse-client --query="INSERT INTO G1_1e7_1e2_0_0 SELECT * FROM input('id1 Nullable(String), id2 Nullable(String), id3 Nullable(String), id4 Nullable(Int32), id5 Nullable(Int32), id6 Nullable(Int32), v1 Nullable(Int32), v2 Nullable(Int32), v3 Nullable(Float64)') FORMAT CSV" # tune CH settings and load data -CH_MEM=245000000000 # 100GB ## old value 128849018880 # 120GB ## now set to 96GB after cache=1 to in-memory temp tables because there was not enough mem for R to parse timings clickhouse-client --query 'DROP TABLE IF EXISTS ans' echo '# clickhouse/exec.sh: creating tables and loading data' if [ $1 == 'groupby' ]; then - CH_EXT_GRP_BY=122500000000 # twice less than CH_MEM #96 - CH_EXT_SORT=122500000000 clickhouse-client --query "DROP TABLE IF EXISTS $SRC_DATANAME" clickhouse-client --query "CREATE TABLE $SRC_DATANAME (id1 Nullable(String), id2 Nullable(String), id3 Nullable(String), id4 Nullable(Int32), id5 Nullable(Int32), id6 Nullable(Int32), v1 Nullable(Int32), v2 Nullable(Int32), v3 Nullable(Float64)) ENGINE = MergeTree() ORDER BY tuple();" - tail -n+2 data/$SRC_DATANAME.csv | clickhouse-client --max_memory_usage $CH_MEM --max_insert_threads 1 --query "INSERT INTO $SRC_DATANAME SELECT * FROM input('id1 Nullable(String), id2 Nullable(String), id3 Nullable(String), id4 Nullable(Int32), id5 Nullable(Int32), id6 Nullable(Int32), v1 Nullable(Int32), v2 Nullable(Int32), v3 Nullable(Float64)') FORMAT CSV" - # confirm all data loaded yandex/ClickHouse#4463 + clickhouse-client --query "INSERT INTO $SRC_DATANAME FROM INFILE 'data/${SRC_DATANAME}.csv'" + # confirm all data loaded echo -e "clickhouse-client --query 'SELECT count(*) FROM $SRC_DATANAME'\n$(echo $SRC_DATANAME | cut -d'_' -f2)" | Rscript -e 'stdin=readLines(file("stdin")); if ((loaded<-as.numeric(system(stdin[1L], intern=TRUE)))!=as.numeric(stdin[2L])) stop("incomplete data load, expected: ", stdin[2L],", loaded: ", loaded)' elif [ $1 == 'join' ]; then # lhs clickhouse-client --query "DROP TABLE IF EXISTS $SRC_DATANAME" clickhouse-client --query "CREATE TABLE $SRC_DATANAME (id1 Nullable(Int32), id2 Nullable(Int32), id3 Nullable(Int32), id4 Nullable(String), id5 Nullable(String), id6 Nullable(String), v1 Nullable(Float64)) ENGINE = MergeTree() ORDER BY tuple();" - tail -n+2 data/$SRC_DATANAME.csv | clickhouse-client --max_memory_usage $CH_MEM --max_insert_threads 1 --query "INSERT INTO $SRC_DATANAME SELECT * FROM input('id1 Nullable(Int32), id2 Nullable(Int32), id3 Nullable(Int32), id4 Nullable(String), id5 Nullable(String), id6 Nullable(String), v1 Nullable(Float64)') FORMAT CSV" + tail -n+2 data/$SRC_DATANAME.csv | clickhouse-client --query "INSERT INTO $SRC_DATANAME SELECT * FROM input('id1 Nullable(Int32), id2 Nullable(Int32), id3 Nullable(Int32), id4 Nullable(String), id5 Nullable(String), id6 Nullable(String), v1 Nullable(Float64)') FORMAT CSV" echo -e "clickhouse-client --query 'SELECT count(*) FROM $SRC_DATANAME'\n$(echo $SRC_DATANAME | cut -d'_' -f2)" | Rscript -e 'stdin=readLines(file("stdin")); if ((loaded<-as.numeric(system(stdin[1L], intern=TRUE)))!=as.numeric(stdin[2L])) stop("incomplete data load, expected: ", stdin[2L],", loaded: ", loaded)' RHS=$(join_to_tbls $SRC_DATANAME) RHS1=$(echo $RHS | cut -d' ' -f1) clickhouse-client --query "DROP TABLE IF EXISTS $RHS1" clickhouse-client --query "CREATE TABLE $RHS1 (id1 Nullable(Int32), id4 Nullable(String), v2 Nullable(Float64)) ENGINE = MergeTree() ORDER BY tuple();" - tail -n+2 data/$RHS1.csv | clickhouse-client --max_memory_usage $CH_MEM --max_insert_threads 1 --query "INSERT INTO $RHS1 SELECT * FROM input('id1 Nullable(Int32), id4 Nullable(String), v2 Nullable(Float64)') FORMAT CSV" + tail -n+2 data/$RHS1.csv | clickhouse-client --query "INSERT INTO $RHS1 SELECT * FROM input('id1 Nullable(Int32), id4 Nullable(String), v2 Nullable(Float64)') FORMAT CSV" echo -e "clickhouse-client --query 'SELECT count(*) FROM $RHS1'\n$(echo $RHS1 | cut -d'_' -f3)" | Rscript -e 'stdin=readLines(file("stdin")); if ((loaded<-as.numeric(system(stdin[1L], intern=TRUE)))!=as.numeric(stdin[2L])) stop("incomplete data load, expected: ", stdin[2L],", loaded: ", loaded)' RHS2=$(echo $RHS | cut -d' ' -f2) clickhouse-client --query "DROP TABLE IF EXISTS $RHS2" clickhouse-client --query "CREATE TABLE $RHS2 (id1 Nullable(Int32), id2 Nullable(Int32), id4 Nullable(String), id5 Nullable(String), v2 Nullable(Float64)) ENGINE = MergeTree() ORDER BY tuple();" - tail -n+2 data/$RHS2.csv | clickhouse-client --max_memory_usage $CH_MEM --max_insert_threads 1 --query "INSERT INTO $RHS2 SELECT * FROM input('id1 Nullable(Int32), id2 Nullable(Int32), id4 Nullable(String), id5 Nullable(String), v2 Nullable(Float64)') FORMAT CSV" + tail -n+2 data/$RHS2.csv | clickhouse-client --query "INSERT INTO $RHS2 SELECT * FROM input('id1 Nullable(Int32), id2 Nullable(Int32), id4 Nullable(String), id5 Nullable(String), v2 Nullable(Float64)') FORMAT CSV" echo -e "clickhouse-client --query 'SELECT count(*) FROM $RHS2'\n$(echo $RHS2 | cut -d'_' -f3)" | Rscript -e 'stdin=readLines(file("stdin")); if ((loaded<-as.numeric(system(stdin[1L], intern=TRUE)))!=as.numeric(stdin[2L])) stop("incomplete data load, expected: ", stdin[2L],", loaded: ", loaded)' RHS3=$(echo $RHS | cut -d' ' -f3) clickhouse-client --query "DROP TABLE IF EXISTS $RHS3" clickhouse-client --query "CREATE TABLE $RHS3 (id1 Nullable(Int32), id2 Nullable(Int32), id3 Nullable(Int32), id4 Nullable(String), id5 Nullable(String), id6 Nullable(String), v2 Nullable(Float64)) ENGINE = MergeTree() ORDER BY tuple();" - tail -n+2 data/$RHS3.csv | clickhouse-client --max_memory_usage $CH_MEM --max_insert_threads 1 --query "INSERT INTO $RHS3 SELECT * FROM input('id1 Nullable(Int32), id2 Nullable(Int32), id3 Nullable(Int32), id4 Nullable(String), id5 Nullable(String), id6 Nullable(String), v2 Nullable(Float64)') FORMAT CSV" + tail -n+2 data/$RHS3.csv | clickhouse-client --query "INSERT INTO $RHS3 SELECT * FROM input('id1 Nullable(Int32), id2 Nullable(Int32), id3 Nullable(Int32), id4 Nullable(String), id5 Nullable(String), id6 Nullable(String), v2 Nullable(Float64)') FORMAT CSV" echo -e "clickhouse-client --query 'SELECT count(*) FROM $RHS3'\n$(echo $RHS3 | cut -d'_' -f3)" | Rscript -e 'stdin=readLines(file("stdin")); if ((loaded<-as.numeric(system(stdin[1L], intern=TRUE)))!=as.numeric(stdin[2L])) stop("incomplete data load, expected: ", stdin[2L],", loaded: ", loaded)' else echo "clickhouse task $1 not implemented" >&2 && exit 1 @@ -67,14 +65,9 @@ rm -f clickhouse/log/$1_${SRC_DATANAME}_q*.csv # execute sql script on clickhouse clickhouse-client --query 'TRUNCATE TABLE system.query_log' -echo "# clickhouse/exec.sh: data loaded, logs truncated, preparing $1-$SRC_DATANAME benchmark sql script and sending it clickhouse" -if [ $1 == 'groupby' ]; then - # for each data_name produce sql script - sed "s/DATA_NAME/$SRC_DATANAME/g" < "clickhouse/$1-clickhouse.sql.in" > "clickhouse/$1-clickhouse.sql" - cat "clickhouse/$1-clickhouse.sql" | clickhouse-client -mn --max_memory_usage $CH_MEM --max_bytes_before_external_group_by $CH_EXT_GRP_BY --max_bytes_before_external_sort $CH_EXT_SORT --receive_timeout 10800 --format Pretty && echo '# clickhouse/exec.sh: benchmark sql script finished' || echo "# clickhouse/exec.sh: benchmark sql script for $SRC_DATANAME terminated with error" -elif [ $1 == 'join' ]; then - sed "s/DATA_NAME/$SRC_DATANAME/g; s/RHS_SMALL/$RHS1/g; s/RHS_MEDIUM/$RHS2/g; s/RHS_BIG/$RHS3/g" < "clickhouse/join-clickhouse.sql.in" > "clickhouse/join-clickhouse.sql" - cat "clickhouse/$1-clickhouse.sql" | clickhouse-client -mn --max_memory_usage $CH_MEM --receive_timeout 10800 --format Pretty && echo '# clickhouse/exec.sh: benchmark sql script finished' || echo "# clickhouse/exec.sh: benchmark sql script for $SRC_DATANAME terminated with error" +echo "# clickhouse/exec.sh: data loaded, logs truncated, runnning $1-$SRC_DATANAME benchmark sh script" +if [ $1 == 'groupby' ] || [ $1 == 'join' ]; then + "./clickhouse/$1-clickhouse.sh" && echo '# clickhouse/exec.sh: benchmark sh script finished' || echo "# clickhouse/exec.sh: benchmark sh script for $SRC_DATANAME terminated with error" else echo "clickhouse task $1 benchmark script launching not defined" >&2 && exit 1 fi diff --git a/clickhouse/groupby-clickhouse.sh b/clickhouse/groupby-clickhouse.sh new file mode 100755 index 00000000..3a008766 --- /dev/null +++ b/clickhouse/groupby-clickhouse.sh @@ -0,0 +1,75 @@ +source ./clickhouse/ch.sh + +SOLUTION=clickhouse +TASK=groupby + +# /* q1: question='sum v1 by id1' */ + +Q=1 +QUESTION="sum v1 by id1" +QUERY="SELECT id1, sum(v1) AS v1 FROM ${SRC_DATANAME} GROUP BY id1" + +ch_make_2_runs + +# /* q2: question='sum v1 by id1:id2' */ +Q=2 +QUESTION="sum v1 by id1:id2" +QUERY="SELECT id1, id2, sum(v1) AS v1 FROM ${SRC_DATANAME} GROUP BY id1, id2" + +ch_make_2_runs + +# /* q3: question='sum v1 mean v3 by id3' */ +Q=3 +QUESTION="sum v1 mean v3 by id3" +QUERY="SELECT id3, sum(v1) AS v1, avg(v3) AS v3 FROM ${SRC_DATANAME} GROUP BY id3" + +ch_make_2_runs + +# /* q4: question='mean v1:v3 by id4' */ +Q=4 +QUESTION="mean v1:v3 by id4" +QUERY="SELECT id4, avg(v1) AS v1, avg(v2) AS v2, avg(v3) AS v3 FROM ${SRC_DATANAME} GROUP BY id4" + +ch_make_2_runs + +# /* q5: question='sum v1:v3 by id6' */ +Q=5 +QUESTION="sum v1:v3 by id6" +QUERY="SELECT id6, sum(v1) AS v1, sum(v2) AS v2, sum(v3) AS v3 FROM ${SRC_DATANAME} GROUP BY id6" + +ch_make_2_runs + +# /* q6: question='median v3 sd v3 by id4 id5' */ +Q=6 +QUESTION="median v3 sd v3 by id4 id5" +QUERY="SELECT id4, id5, medianExact(v3) AS median_v3, stddevPop(v3) AS sd_v3 FROM ${SRC_DATANAME} GROUP BY id4, id5" + +ch_make_2_runs + +# /* q7: question='max v1 - min v2 by id3' */ +Q=7 +QUESTION="max v1 - min v2 by id3" +QUERY="SELECT id3, max(v1) - min(v2) AS range_v1_v2 FROM ${SRC_DATANAME} GROUP BY id3" + +ch_make_2_runs + +# /* q8: question='largest two v3 by id6' */ +Q=8 +QUESTION="largest two v3 by id6" +QUERY="SELECT id6, arrayJoin(arraySlice(arrayReverseSort(groupArray(v3)), 1, 2)) AS v3 FROM (SELECT id6, v3 FROM ${SRC_DATANAME} WHERE v3 IS NOT NULL) AS subq GROUP BY id6" + +ch_make_2_runs + +# /* q9: question='regression v1 v2 by id2 id4' */ +Q=9 +QUESTION="regression v1 v2 by id2 id4" +QUERY="SELECT id2, id4, pow(corr(v1, v2), 2) AS r2 FROM ${SRC_DATANAME} GROUP BY id2, id4" + +ch_make_2_runs + +# /* q10: question='sum v3 count by id1:id6' */ +Q=10 +QUESTION="sum v3 count by id1:id6" +QUERY="SELECT id1, id2, id3, id4, id5, id6, sum(v3) AS v3, count() AS cnt FROM ${SRC_DATANAME} GROUP BY id1, id2, id3, id4, id5, id6" + +ch_make_2_runs diff --git a/clickhouse/groupby-clickhouse.sql.in b/clickhouse/groupby-clickhouse.sql.in deleted file mode 100644 index 3a594808..00000000 --- a/clickhouse/groupby-clickhouse.sql.in +++ /dev/null @@ -1,198 +0,0 @@ --- parent sh script 'sed' pre-processing to be removed once sql script will support variables: https://github.com/yandex/ClickHouse/issues/3815 ---data_name='DATA_NAME' ---task='groupby' ---fun='select group by' ---cache=1 -- #151 ---on_disk=1 - -SET aggregation_memory_efficient_merge_threads = 1; -- ClickHouse/ClickHouse#11773 - -/* q1: question='sum v1 by id1' */ - -SET log_queries = 1; -CREATE TABLE ans ENGINE = Memory AS SELECT id1, sum(v1) AS v1 FROM DATA_NAME GROUP BY id1; -SET log_queries = 0; -SYSTEM FLUSH LOGS; -SELECT 1 AS run, toUnixTimestamp(now()) AS timestamp, 'groupby' AS task, 'DATA_NAME' AS data_name, NULL AS in_rows, 'sum v1 by id1' AS question, result_rows AS out_rows, NULL AS out_cols, 'clickhouse' AS solution, version() AS version, NULL AS git, 'select group by' AS fun, query_duration_ms/1000 AS time_sec, memory_usage/1073741824 AS mem_gb, 1 AS cache, NULL AS chk, NULL AS chk_time_sec, 1 AS on_disk -FROM system.query_log WHERE type=2 AND query='CREATE TABLE ans ENGINE = Memory AS SELECT id1, sum(v1) AS v1 FROM DATA_NAME GROUP BY id1;' ORDER BY query_start_time DESC LIMIT 1 INTO OUTFILE 'clickhouse/log/groupby_DATA_NAME_q1_r1.csv' FORMAT CSVWithNames; -DROP TABLE ans; - -SET log_queries = 1; -CREATE TABLE ans ENGINE = Memory AS SELECT id1, sum(v1) AS v1 FROM DATA_NAME GROUP BY id1; -SET log_queries = 0; -SYSTEM FLUSH LOGS; -SELECT 2 AS run, toUnixTimestamp(now()) AS timestamp, 'groupby' AS task, 'DATA_NAME' AS data_name, NULL AS in_rows, 'sum v1 by id1' AS question, result_rows AS out_rows, NULL AS out_cols, 'clickhouse' AS solution, version() AS version, NULL AS git, 'select group by' AS fun, query_duration_ms/1000 AS time_sec, memory_usage/1073741824 AS mem_gb, 1 AS cache, NULL AS chk, NULL AS chk_time_sec, 1 AS on_disk -FROM system.query_log WHERE type=2 AND query='CREATE TABLE ans ENGINE = Memory AS SELECT id1, sum(v1) AS v1 FROM DATA_NAME GROUP BY id1;' ORDER BY query_start_time DESC LIMIT 1 INTO OUTFILE 'clickhouse/log/groupby_DATA_NAME_q1_r2.csv' FORMAT CSVWithNames; -SELECT * FROM ans LIMIT 3; -DROP TABLE ans; - -/* q2: question='sum v1 by id1:id2' */ - -SET log_queries = 1; -CREATE TABLE ans ENGINE = Memory AS SELECT id1, id2, sum(v1) AS v1 FROM DATA_NAME GROUP BY id1, id2; -SET log_queries = 0; -SYSTEM FLUSH LOGS; -SELECT 1 AS run, toUnixTimestamp(now()) AS timestamp, 'groupby' AS task, 'DATA_NAME' AS data_name, NULL AS in_rows, 'sum v1 by id1:id2' AS question, result_rows AS out_rows, NULL AS out_cols, 'clickhouse' AS solution, version() AS version, NULL AS git, 'select group by' AS fun, query_duration_ms/1000 AS time_sec, memory_usage/1073741824 AS mem_gb, 1 AS cache, NULL AS chk, NULL AS chk_time_sec, 1 AS on_disk -FROM system.query_log WHERE type=2 AND query='CREATE TABLE ans ENGINE = Memory AS SELECT id1, id2, sum(v1) AS v1 FROM DATA_NAME GROUP BY id1, id2;' ORDER BY query_start_time DESC LIMIT 1 INTO OUTFILE 'clickhouse/log/groupby_DATA_NAME_q2_r1.csv' FORMAT CSVWithNames; -DROP TABLE ans; - -SET log_queries = 1; -CREATE TABLE ans ENGINE = Memory AS SELECT id1, id2, sum(v1) AS v1 FROM DATA_NAME GROUP BY id1, id2; -SET log_queries = 0; -SYSTEM FLUSH LOGS; -SELECT 2 AS run, toUnixTimestamp(now()) AS timestamp, 'groupby' AS task, 'DATA_NAME' AS data_name, NULL AS in_rows, 'sum v1 by id1:id2' AS question, result_rows AS out_rows, NULL AS out_cols, 'clickhouse' AS solution, version() AS version, NULL AS git, 'select group by' AS fun, query_duration_ms/1000 AS time_sec, memory_usage/1073741824 AS mem_gb, 1 AS cache, NULL AS chk, NULL AS chk_time_sec, 1 AS on_disk -FROM system.query_log WHERE type=2 AND query='CREATE TABLE ans ENGINE = Memory AS SELECT id1, id2, sum(v1) AS v1 FROM DATA_NAME GROUP BY id1, id2;' ORDER BY query_start_time DESC LIMIT 1 INTO OUTFILE 'clickhouse/log/groupby_DATA_NAME_q2_r2.csv' FORMAT CSVWithNames; -SELECT * FROM ans LIMIT 3; -DROP TABLE ans; - -/* q3: question='sum v1 mean v3 by id3' */ - -SET log_queries = 1; -CREATE TABLE ans ENGINE = Memory AS SELECT id3, sum(v1) AS v1, avg(v3) AS v3 FROM DATA_NAME GROUP BY id3; -SET log_queries = 0; -SYSTEM FLUSH LOGS; -SELECT 1 AS run, toUnixTimestamp(now()) AS timestamp, 'groupby' AS task, 'DATA_NAME' AS data_name, NULL AS in_rows, 'sum v1 mean v3 by id3' AS question, result_rows AS out_rows, NULL AS out_cols, 'clickhouse' AS solution, version() AS version, NULL AS git, 'select group by' AS fun, query_duration_ms/1000 AS time_sec, memory_usage/1073741824 AS mem_gb, 1 AS cache, NULL AS chk, NULL AS chk_time_sec, 1 AS on_disk -FROM system.query_log WHERE type=2 AND query='CREATE TABLE ans ENGINE = Memory AS SELECT id3, sum(v1) AS v1, avg(v3) AS v3 FROM DATA_NAME GROUP BY id3;' ORDER BY query_start_time DESC LIMIT 1 INTO OUTFILE 'clickhouse/log/groupby_DATA_NAME_q3_r1.csv' FORMAT CSVWithNames; -DROP TABLE ans; - -SET log_queries = 1; -CREATE TABLE ans ENGINE = Memory AS SELECT id3, sum(v1) AS v1, avg(v3) AS v3 FROM DATA_NAME GROUP BY id3; -SET log_queries = 0; -SYSTEM FLUSH LOGS; -SELECT 2 AS run, toUnixTimestamp(now()) AS timestamp, 'groupby' AS task, 'DATA_NAME' AS data_name, NULL AS in_rows, 'sum v1 mean v3 by id3' AS question, result_rows AS out_rows, NULL AS out_cols, 'clickhouse' AS solution, version() AS version, NULL AS git, 'select group by' AS fun, query_duration_ms/1000 AS time_sec, memory_usage/1073741824 AS mem_gb, 1 AS cache, NULL AS chk, NULL AS chk_time_sec, 1 AS on_disk -FROM system.query_log WHERE type=2 AND query='CREATE TABLE ans ENGINE = Memory AS SELECT id3, sum(v1) AS v1, avg(v3) AS v3 FROM DATA_NAME GROUP BY id3;' ORDER BY query_start_time DESC LIMIT 1 INTO OUTFILE 'clickhouse/log/groupby_DATA_NAME_q3_r2.csv' FORMAT CSVWithNames; -SELECT * FROM ans LIMIT 3; -DROP TABLE ans; - -/* q4: question='mean v1:v3 by id4' */ - -SET log_queries = 1; -CREATE TABLE ans ENGINE = Memory AS SELECT id4, avg(v1) AS v1, avg(v2) AS v2, avg(v3) AS v3 FROM DATA_NAME GROUP BY id4; -SET log_queries = 0; -SYSTEM FLUSH LOGS; -SELECT 1 AS run, toUnixTimestamp(now()) AS timestamp, 'groupby' AS task, 'DATA_NAME' AS data_name, NULL AS in_rows, 'mean v1:v3 by id4' AS question, result_rows AS out_rows, NULL AS out_cols, 'clickhouse' AS solution, version() AS version, NULL AS git, 'select group by' AS fun, query_duration_ms/1000 AS time_sec, memory_usage/1073741824 AS mem_gb, 1 AS cache, NULL AS chk, NULL AS chk_time_sec, 1 AS on_disk -FROM system.query_log WHERE type=2 AND query='CREATE TABLE ans ENGINE = Memory AS SELECT id4, avg(v1) AS v1, avg(v2) AS v2, avg(v3) AS v3 FROM DATA_NAME GROUP BY id4;' ORDER BY query_start_time DESC LIMIT 1 INTO OUTFILE 'clickhouse/log/groupby_DATA_NAME_q4_r1.csv' FORMAT CSVWithNames; -DROP TABLE ans; - -SET log_queries = 1; -CREATE TABLE ans ENGINE = Memory AS SELECT id4, avg(v1) AS v1, avg(v2) AS v2, avg(v3) AS v3 FROM DATA_NAME GROUP BY id4; -SET log_queries = 0; -SYSTEM FLUSH LOGS; -SELECT 2 AS run, toUnixTimestamp(now()) AS timestamp, 'groupby' AS task, 'DATA_NAME' AS data_name, NULL AS in_rows, 'mean v1:v3 by id4' AS question, result_rows AS out_rows, NULL AS out_cols, 'clickhouse' AS solution, version() AS version, NULL AS git, 'select group by' AS fun, query_duration_ms/1000 AS time_sec, memory_usage/1073741824 AS mem_gb, 1 AS cache, NULL AS chk, NULL AS chk_time_sec, 1 AS on_disk -FROM system.query_log WHERE type=2 AND query='CREATE TABLE ans ENGINE = Memory AS SELECT id4, avg(v1) AS v1, avg(v2) AS v2, avg(v3) AS v3 FROM DATA_NAME GROUP BY id4;' ORDER BY query_start_time DESC LIMIT 1 INTO OUTFILE 'clickhouse/log/groupby_DATA_NAME_q4_r2.csv' FORMAT CSVWithNames; -SELECT * FROM ans LIMIT 3; -DROP TABLE ans; - -/* q5: question='sum v1:v3 by id6' */ - -SET log_queries = 1; -CREATE TABLE ans ENGINE = Memory AS SELECT id6, sum(v1) AS v1, sum(v2) AS v2, sum(v3) AS v3 FROM DATA_NAME GROUP BY id6; -SET log_queries = 0; -SYSTEM FLUSH LOGS; -SELECT 1 AS run, toUnixTimestamp(now()) AS timestamp, 'groupby' AS task, 'DATA_NAME' AS data_name, NULL AS in_rows, 'sum v1:v3 by id6' AS question, result_rows AS out_rows, NULL AS out_cols, 'clickhouse' AS solution, version() AS version, NULL AS git, 'select group by' AS fun, query_duration_ms/1000 AS time_sec, memory_usage/1073741824 AS mem_gb, 1 AS cache, NULL AS chk, NULL AS chk_time_sec, 1 AS on_disk -FROM system.query_log WHERE type=2 AND query='CREATE TABLE ans ENGINE = Memory AS SELECT id6, sum(v1) AS v1, sum(v2) AS v2, sum(v3) AS v3 FROM DATA_NAME GROUP BY id6;' ORDER BY query_start_time DESC LIMIT 1 INTO OUTFILE 'clickhouse/log/groupby_DATA_NAME_q5_r1.csv' FORMAT CSVWithNames; -DROP TABLE ans; - -SET log_queries = 1; -CREATE TABLE ans ENGINE = Memory AS SELECT id6, sum(v1) AS v1, sum(v2) AS v2, sum(v3) AS v3 FROM DATA_NAME GROUP BY id6; -SET log_queries = 0; -SYSTEM FLUSH LOGS; -SELECT 2 AS run, toUnixTimestamp(now()) AS timestamp, 'groupby' AS task, 'DATA_NAME' AS data_name, NULL AS in_rows, 'sum v1:v3 by id6' AS question, result_rows AS out_rows, NULL AS out_cols, 'clickhouse' AS solution, version() AS version, NULL AS git, 'select group by' AS fun, query_duration_ms/1000 AS time_sec, memory_usage/1073741824 AS mem_gb, 1 AS cache, NULL AS chk, NULL AS chk_time_sec, 1 AS on_disk -FROM system.query_log WHERE type=2 AND query='CREATE TABLE ans ENGINE = Memory AS SELECT id6, sum(v1) AS v1, sum(v2) AS v2, sum(v3) AS v3 FROM DATA_NAME GROUP BY id6;' ORDER BY query_start_time DESC LIMIT 1 INTO OUTFILE 'clickhouse/log/groupby_DATA_NAME_q5_r2.csv' FORMAT CSVWithNames; -SELECT * FROM ans LIMIT 3; -DROP TABLE ans; - -/* q6: question='median v3 sd v3 by id4 id5' */ - -SET log_queries = 1; -CREATE TABLE ans ENGINE = Memory AS SELECT id4, id5, medianExact(v3) AS median_v3, stddevPop(v3) AS sd_v3 FROM DATA_NAME GROUP BY id4, id5; -SET log_queries = 0; -SYSTEM FLUSH LOGS; -SELECT 1 AS run, toUnixTimestamp(now()) AS timestamp, 'groupby' AS task, 'DATA_NAME' AS data_name, NULL AS in_rows, 'median v3 sd v3 by id4 id5' AS question, result_rows AS out_rows, NULL AS out_cols, 'clickhouse' AS solution, version() AS version, NULL AS git, 'select group by' AS fun, query_duration_ms/1000 AS time_sec, memory_usage/1073741824 AS mem_gb, 1 AS cache, NULL AS chk, NULL AS chk_time_sec, 1 AS on_disk -FROM system.query_log WHERE type=2 AND query='CREATE TABLE ans ENGINE = Memory AS SELECT id4, id5, medianExact(v3) AS median_v3, stddevPop(v3) AS sd_v3 FROM DATA_NAME GROUP BY id4, id5;' ORDER BY query_start_time DESC LIMIT 1 INTO OUTFILE 'clickhouse/log/groupby_DATA_NAME_q6_r1.csv' FORMAT CSVWithNames; -DROP TABLE ans; - -SET log_queries = 1; -CREATE TABLE ans ENGINE = Memory AS SELECT id4, id5, medianExact(v3) AS median_v3, stddevPop(v3) AS sd_v3 FROM DATA_NAME GROUP BY id4, id5; -SET log_queries = 0; -SYSTEM FLUSH LOGS; -SELECT 2 AS run, toUnixTimestamp(now()) AS timestamp, 'groupby' AS task, 'DATA_NAME' AS data_name, NULL AS in_rows, 'median v3 sd v3 by id4 id5' AS question, result_rows AS out_rows, NULL AS out_cols, 'clickhouse' AS solution, version() AS version, NULL AS git, 'select group by' AS fun, query_duration_ms/1000 AS time_sec, memory_usage/1073741824 AS mem_gb, 1 AS cache, NULL AS chk, NULL AS chk_time_sec, 1 AS on_disk -FROM system.query_log WHERE type=2 AND query='CREATE TABLE ans ENGINE = Memory AS SELECT id4, id5, medianExact(v3) AS median_v3, stddevPop(v3) AS sd_v3 FROM DATA_NAME GROUP BY id4, id5;' ORDER BY query_start_time DESC LIMIT 1 INTO OUTFILE 'clickhouse/log/groupby_DATA_NAME_q6_r2.csv' FORMAT CSVWithNames; -SELECT * FROM ans LIMIT 3; -DROP TABLE ans; - -/* q7: question='max v1 - min v2 by id3' */ - -SET log_queries = 1; -CREATE TABLE ans ENGINE = Memory AS SELECT id3, max(v1) - min(v2) AS range_v1_v2 FROM DATA_NAME GROUP BY id3; -SET log_queries = 0; -SYSTEM FLUSH LOGS; -SELECT 1 AS run, toUnixTimestamp(now()) AS timestamp, 'groupby' AS task, 'DATA_NAME' AS data_name, NULL AS in_rows, 'max v1 - min v2 by id3' AS question, result_rows AS out_rows, NULL AS out_cols, 'clickhouse' AS solution, version() AS version, NULL AS git, 'select group by' AS fun, query_duration_ms/1000 AS time_sec, memory_usage/1073741824 AS mem_gb, 1 AS cache, NULL AS chk, NULL AS chk_time_sec, 1 AS on_disk -FROM system.query_log WHERE type=2 AND query='CREATE TABLE ans ENGINE = Memory AS SELECT id3, max(v1) - min(v2) AS range_v1_v2 FROM DATA_NAME GROUP BY id3;' ORDER BY query_start_time DESC LIMIT 1 INTO OUTFILE 'clickhouse/log/groupby_DATA_NAME_q7_r1.csv' FORMAT CSVWithNames; -DROP TABLE ans; - -SET log_queries = 1; -CREATE TABLE ans ENGINE = Memory AS SELECT id3, max(v1) - min(v2) AS range_v1_v2 FROM DATA_NAME GROUP BY id3; -SET log_queries = 0; -SYSTEM FLUSH LOGS; -SELECT 2 AS run, toUnixTimestamp(now()) AS timestamp, 'groupby' AS task, 'DATA_NAME' AS data_name, NULL AS in_rows, 'max v1 - min v2 by id3' AS question, result_rows AS out_rows, NULL AS out_cols, 'clickhouse' AS solution, version() AS version, NULL AS git, 'select group by' AS fun, query_duration_ms/1000 AS time_sec, memory_usage/1073741824 AS mem_gb, 1 AS cache, NULL AS chk, NULL AS chk_time_sec, 1 AS on_disk -FROM system.query_log WHERE type=2 AND query='CREATE TABLE ans ENGINE = Memory AS SELECT id3, max(v1) - min(v2) AS range_v1_v2 FROM DATA_NAME GROUP BY id3;' ORDER BY query_start_time DESC LIMIT 1 INTO OUTFILE 'clickhouse/log/groupby_DATA_NAME_q7_r2.csv' FORMAT CSVWithNames; -SELECT * FROM ans LIMIT 3; -DROP TABLE ans; - -/* q8: question='largest two v3 by id6' */ - -SET log_queries = 1; -CREATE TABLE ans ENGINE = Memory AS SELECT id6, arrayJoin(arraySlice(arrayReverseSort(groupArray(v3)), 1, 2)) AS v3 FROM (SELECT id6, v3 FROM DATA_NAME WHERE v3 IS NOT NULL) AS subq GROUP BY id6; -SET log_queries = 0; -SYSTEM FLUSH LOGS; -SELECT 1 AS run, toUnixTimestamp(now()) AS timestamp, 'groupby' AS task, 'DATA_NAME' AS data_name, NULL AS in_rows, 'largest two v3 by id6' AS question, result_rows AS out_rows, NULL AS out_cols, 'clickhouse' AS solution, version() AS version, NULL AS git, 'select group by' AS fun, query_duration_ms/1000 AS time_sec, memory_usage/1073741824 AS mem_gb, 1 AS cache, NULL AS chk, NULL AS chk_time_sec, 1 AS on_disk -FROM system.query_log WHERE type=2 AND query='CREATE TABLE ans ENGINE = Memory AS SELECT id6, arrayJoin(arraySlice(arrayReverseSort(groupArray(v3)), 1, 2)) AS v3 FROM (SELECT id6, v3 FROM DATA_NAME WHERE v3 IS NOT NULL) AS subq GROUP BY id6;' ORDER BY query_start_time DESC LIMIT 1 INTO OUTFILE 'clickhouse/log/groupby_DATA_NAME_q8_r1.csv' FORMAT CSVWithNames; -DROP TABLE ans; - -SET log_queries = 1; -CREATE TABLE ans ENGINE = Memory AS SELECT id6, arrayJoin(arraySlice(arrayReverseSort(groupArray(v3)), 1, 2)) AS v3 FROM (SELECT id6, v3 FROM DATA_NAME WHERE v3 IS NOT NULL) AS subq GROUP BY id6; -SET log_queries = 0; -SYSTEM FLUSH LOGS; -SELECT 2 AS run, toUnixTimestamp(now()) AS timestamp, 'groupby' AS task, 'DATA_NAME' AS data_name, NULL AS in_rows, 'largest two v3 by id6' AS question, result_rows AS out_rows, NULL AS out_cols, 'clickhouse' AS solution, version() AS version, NULL AS git, 'select group by' AS fun, query_duration_ms/1000 AS time_sec, memory_usage/1073741824 AS mem_gb, 1 AS cache, NULL AS chk, NULL AS chk_time_sec, 1 AS on_disk -FROM system.query_log WHERE type=2 AND query='CREATE TABLE ans ENGINE = Memory AS SELECT id6, arrayJoin(arraySlice(arrayReverseSort(groupArray(v3)), 1, 2)) AS v3 FROM (SELECT id6, v3 FROM DATA_NAME WHERE v3 IS NOT NULL) AS subq GROUP BY id6;' ORDER BY query_start_time DESC LIMIT 1 INTO OUTFILE 'clickhouse/log/groupby_DATA_NAME_q8_r2.csv' FORMAT CSVWithNames; -SELECT * FROM ans LIMIT 3; -DROP TABLE ans; - -/* q9: question='regression v1 v2 by id2 id4' */ - -SET log_queries = 1; -CREATE TABLE ans ENGINE = Memory AS SELECT id2, id4, pow(corr(v1, v2), 2) AS r2 FROM DATA_NAME GROUP BY id2, id4; -SET log_queries = 0; -SYSTEM FLUSH LOGS; -SELECT 1 AS run, toUnixTimestamp(now()) AS timestamp, 'groupby' AS task, 'DATA_NAME' AS data_name, NULL AS in_rows, 'regression v1 v2 by id2 id4' AS question, result_rows AS out_rows, NULL AS out_cols, 'clickhouse' AS solution, version() AS version, NULL AS git, 'select group by' AS fun, query_duration_ms/1000 AS time_sec, memory_usage/1073741824 AS mem_gb, 1 AS cache, NULL AS chk, NULL AS chk_time_sec, 1 AS on_disk -FROM system.query_log WHERE type=2 AND query='CREATE TABLE ans ENGINE = Memory AS SELECT id2, id4, pow(corr(v1, v2), 2) AS r2 FROM DATA_NAME GROUP BY id2, id4;' ORDER BY query_start_time DESC LIMIT 1 INTO OUTFILE 'clickhouse/log/groupby_DATA_NAME_q9_r1.csv' FORMAT CSVWithNames; -DROP TABLE ans; - -SET log_queries = 1; -CREATE TABLE ans ENGINE = Memory AS SELECT id2, id4, pow(corr(v1, v2), 2) AS r2 FROM DATA_NAME GROUP BY id2, id4; -SET log_queries = 0; -SYSTEM FLUSH LOGS; -SELECT 2 AS run, toUnixTimestamp(now()) AS timestamp, 'groupby' AS task, 'DATA_NAME' AS data_name, NULL AS in_rows, 'regression v1 v2 by id2 id4' AS question, result_rows AS out_rows, NULL AS out_cols, 'clickhouse' AS solution, version() AS version, NULL AS git, 'select group by' AS fun, query_duration_ms/1000 AS time_sec, memory_usage/1073741824 AS mem_gb, 1 AS cache, NULL AS chk, NULL AS chk_time_sec, 1 AS on_disk -FROM system.query_log WHERE type=2 AND query='CREATE TABLE ans ENGINE = Memory AS SELECT id2, id4, pow(corr(v1, v2), 2) AS r2 FROM DATA_NAME GROUP BY id2, id4;' ORDER BY query_start_time DESC LIMIT 1 INTO OUTFILE 'clickhouse/log/groupby_DATA_NAME_q9_r2.csv' FORMAT CSVWithNames; -SELECT * FROM ans LIMIT 3; -DROP TABLE ans; - -/* q10: question='sum v3 count by id1:id6' */ - -SET log_queries = 1; -CREATE TABLE ans ENGINE = Log AS SELECT id1, id2, id3, id4, id5, id6, sum(v3) AS v3, count() AS cnt FROM DATA_NAME GROUP BY id1, id2, id3, id4, id5, id6; -SET log_queries = 0; -SYSTEM FLUSH LOGS; -SELECT 1 AS run, toUnixTimestamp(now()) AS timestamp, 'groupby' AS task, 'DATA_NAME' AS data_name, NULL AS in_rows, 'sum v3 count by id1:id6' AS question, result_rows AS out_rows, NULL AS out_cols, 'clickhouse' AS solution, version() AS version, NULL AS git, 'select group by' AS fun, query_duration_ms/1000 AS time_sec, memory_usage/1073741824 AS mem_gb, 1 AS cache, NULL AS chk, NULL AS chk_time_sec, 1 AS on_disk -FROM system.query_log WHERE type=2 AND query='CREATE TABLE ans ENGINE = Log AS SELECT id1, id2, id3, id4, id5, id6, sum(v3) AS v3, count() AS cnt FROM DATA_NAME GROUP BY id1, id2, id3, id4, id5, id6;' ORDER BY query_start_time DESC LIMIT 1 INTO OUTFILE 'clickhouse/log/groupby_DATA_NAME_q10_r1.csv' FORMAT CSVWithNames; -DROP TABLE ans; - -SET log_queries = 1; -CREATE TABLE ans ENGINE = Log AS SELECT id1, id2, id3, id4, id5, id6, sum(v3) AS v3, count() AS cnt FROM DATA_NAME GROUP BY id1, id2, id3, id4, id5, id6; -SET log_queries = 0; -SYSTEM FLUSH LOGS; -SELECT 2 AS run, toUnixTimestamp(now()) AS timestamp, 'groupby' AS task, 'DATA_NAME' AS data_name, NULL AS in_rows, 'sum v3 count by id1:id6' AS question, result_rows AS out_rows, NULL AS out_cols, 'clickhouse' AS solution, version() AS version, NULL AS git, 'select group by' AS fun, query_duration_ms/1000 AS time_sec, memory_usage/1073741824 AS mem_gb, 1 AS cache, NULL AS chk, NULL AS chk_time_sec, 1 AS on_disk -FROM system.query_log WHERE type=2 AND query='CREATE TABLE ans ENGINE = Log AS SELECT id1, id2, id3, id4, id5, id6, sum(v3) AS v3, count() AS cnt FROM DATA_NAME GROUP BY id1, id2, id3, id4, id5, id6;' ORDER BY query_start_time DESC LIMIT 1 INTO OUTFILE 'clickhouse/log/groupby_DATA_NAME_q10_r2.csv' FORMAT CSVWithNames; -SELECT * FROM ans LIMIT 3; -DROP TABLE ans; diff --git a/clickhouse/join-clickhouse.sh b/clickhouse/join-clickhouse.sh new file mode 100755 index 00000000..df6fae0d --- /dev/null +++ b/clickhouse/join-clickhouse.sh @@ -0,0 +1,39 @@ +#!/bin/bash +set -e +set -x +source ./clickhouse/ch.sh + +SOLUTION=clickhouse +TASK=join + +echo SRC ${SRC_DATANAME} RHS1 ${RHS1} RHS2 ${RHS2} RHS3 ${RHS3} + +# /* q1: question='small inner on int' */ +Q=1 +QUESTION="small inner on int" +QUERY="SELECT id1, x.id2, x.id3, x.id4, y.id4, x.id5, x.id6, x.v1, y.v2 FROM ${SRC_DATANAME} AS x INNER JOIN ${RHS1} AS y USING (id1)" +ch_make_2_runs + +# /* q2: question='medium inner on int' */ +Q=2 +QUESTION="medium inner on int" +QUERY="SELECT x.id1, y.id1, id2, x.id3, x.id4, y.id4, x.id5, y.id5, x.id6, x.v1, y.v2 FROM ${SRC_DATANAME} AS x INNER JOIN ${RHS2} AS y USING (id2)" +ch_make_2_runs + +# /* q3: question='medium outer on int' */ +Q=3 +QUESTION="medium outer on int" +QUERY="SELECT x.id1, y.id1, id2, x.id3, x.id4, y.id4, x.id5, y.id5, x.id6, x.v1, y.v2 FROM ${SRC_DATANAME} AS x LEFT JOIN ${RHS2} AS y USING (id2)" +ch_make_2_runs + +# /* q4: question='medium inner on factor' */ +Q=4 +QUESTION="medium inner on factor" +QUERY="SELECT x.id1, y.id1, x.id2, y.id2, x.id3, x.id4, y.id4, id5, x.id6, x.v1, y.v2 FROM ${SRC_DATANAME} AS x INNER JOIN ${RHS2} AS y USING (id5)" +ch_make_2_runs + +# /* q5: question='big inner on int' */ +Q=5 +QUESTION="big inner on int" +QUERY="SELECT x.id1, y.id1, x.id2, y.id2, id3, x.id4, y.id4, x.id5, y.id5, x.id6, y.id6, x.v1, y.v2 FROM ${SRC_DATANAME} AS x INNER JOIN ${RHS3} AS y USING (id3)" +ch_make_2_runs diff --git a/clickhouse/join-clickhouse.sql.in b/clickhouse/join-clickhouse.sql.in deleted file mode 100644 index 49adb474..00000000 --- a/clickhouse/join-clickhouse.sql.in +++ /dev/null @@ -1,107 +0,0 @@ --- parent sh script 'sed' pre-processing to be removed once sql script will support variables: https://github.com/yandex/ClickHouse/issues/3815 ---data_name='DATA_NAME' ---task='join' ---fun='select join' ---cache=1 -- #151 ---on_disk=1 --- we list all columns in select statement rather using select * due to #167 - -/* q1: question='small inner on int' */ - -SET log_queries = 1; -CREATE TABLE ans ENGINE = Memory AS SELECT id1, x.id2, x.id3, x.id4, y.id4, x.id5, x.id6, x.v1, y.v2 FROM DATA_NAME AS x INNER JOIN RHS_SMALL AS y USING (id1); -SET log_queries = 0; -SYSTEM FLUSH LOGS; -SELECT 1 AS run, toUnixTimestamp(now()) AS timestamp, 'join' AS task, 'DATA_NAME' AS data_name, NULL AS in_rows, 'small inner on int' AS question, result_rows AS out_rows, NULL AS out_cols, 'clickhouse' AS solution, version() AS version, NULL AS git, 'select join' AS fun, query_duration_ms/1000 AS time_sec, memory_usage/1073741824 AS mem_gb, 1 AS cache, NULL AS chk, NULL AS chk_time_sec, 1 AS on_disk -FROM system.query_log WHERE type=2 AND query='CREATE TABLE ans ENGINE = Memory AS SELECT id1, x.id2, x.id3, x.id4, y.id4, x.id5, x.id6, x.v1, y.v2 FROM DATA_NAME AS x INNER JOIN RHS_SMALL AS y USING (id1);' ORDER BY query_start_time DESC LIMIT 1 INTO OUTFILE 'clickhouse/log/join_DATA_NAME_q1_r1.csv' FORMAT CSVWithNames; -SELECT * FROM ans LIMIT 3; -DROP TABLE ans; - -SET log_queries = 1; -CREATE TABLE ans ENGINE = Memory AS SELECT id1, x.id2, x.id3, x.id4, y.id4, x.id5, x.id6, x.v1, y.v2 FROM DATA_NAME AS x INNER JOIN RHS_SMALL AS y USING (id1); -SET log_queries = 0; -SYSTEM FLUSH LOGS; -SELECT 2 AS run, toUnixTimestamp(now()) AS timestamp, 'join' AS task, 'DATA_NAME' AS data_name, NULL AS in_rows, 'small inner on int' AS question, result_rows AS out_rows, NULL AS out_cols, 'clickhouse' AS solution, version() AS version, NULL AS git, 'select join' AS fun, query_duration_ms/1000 AS time_sec, memory_usage/1073741824 AS mem_gb, 1 AS cache, NULL AS chk, NULL AS chk_time_sec, 1 AS on_disk -FROM system.query_log WHERE type=2 AND query='CREATE TABLE ans ENGINE = Memory AS SELECT id1, x.id2, x.id3, x.id4, y.id4, x.id5, x.id6, x.v1, y.v2 FROM DATA_NAME AS x INNER JOIN RHS_SMALL AS y USING (id1);' ORDER BY query_start_time DESC LIMIT 1 INTO OUTFILE 'clickhouse/log/join_DATA_NAME_q1_r2.csv' FORMAT CSVWithNames; -SELECT * FROM ans LIMIT 3; -DROP TABLE ans; - -/* q2: question='medium inner on int' */ - -SET log_queries = 1; -CREATE TABLE ans ENGINE = Memory AS SELECT x.id1, y.id1, id2, x.id3, x.id4, y.id4, x.id5, y.id5, x.id6, x.v1, y.v2 FROM DATA_NAME AS x INNER JOIN RHS_MEDIUM AS y USING (id2); -SET log_queries = 0; -SYSTEM FLUSH LOGS; -SELECT 1 AS run, toUnixTimestamp(now()) AS timestamp, 'join' AS task, 'DATA_NAME' AS data_name, NULL AS in_rows, 'medium inner on int' AS question, result_rows AS out_rows, NULL AS out_cols, 'clickhouse' AS solution, version() AS version, NULL AS git, 'select join' AS fun, query_duration_ms/1000 AS time_sec, memory_usage/1073741824 AS mem_gb, 1 AS cache, NULL AS chk, NULL AS chk_time_sec, 1 AS on_disk -FROM system.query_log WHERE type=2 AND query='CREATE TABLE ans ENGINE = Memory AS SELECT x.id1, y.id1, id2, x.id3, x.id4, y.id4, x.id5, y.id5, x.id6, x.v1, y.v2 FROM DATA_NAME AS x INNER JOIN RHS_MEDIUM AS y USING (id2);' ORDER BY query_start_time DESC LIMIT 1 INTO OUTFILE 'clickhouse/log/join_DATA_NAME_q2_r1.csv' FORMAT CSVWithNames; -SELECT * FROM ans LIMIT 3; -DROP TABLE ans; - -SET log_queries = 1; -CREATE TABLE ans ENGINE = Memory AS SELECT x.id1, y.id1, id2, x.id3, x.id4, y.id4, x.id5, y.id5, x.id6, x.v1, y.v2 FROM DATA_NAME AS x INNER JOIN RHS_MEDIUM AS y USING (id2); -SET log_queries = 0; -SYSTEM FLUSH LOGS; -SELECT 2 AS run, toUnixTimestamp(now()) AS timestamp, 'join' AS task, 'DATA_NAME' AS data_name, NULL AS in_rows, 'medium inner on int' AS question, result_rows AS out_rows, NULL AS out_cols, 'clickhouse' AS solution, version() AS version, NULL AS git, 'select join' AS fun, query_duration_ms/1000 AS time_sec, memory_usage/1073741824 AS mem_gb, 1 AS cache, NULL AS chk, NULL AS chk_time_sec, 1 AS on_disk -FROM system.query_log WHERE type=2 AND query='CREATE TABLE ans ENGINE = Memory AS SELECT x.id1, y.id1, id2, x.id3, x.id4, y.id4, x.id5, y.id5, x.id6, x.v1, y.v2 FROM DATA_NAME AS x INNER JOIN RHS_MEDIUM AS y USING (id2);' ORDER BY query_start_time DESC LIMIT 1 INTO OUTFILE 'clickhouse/log/join_DATA_NAME_q2_r2.csv' FORMAT CSVWithNames; -SELECT * FROM ans LIMIT 3; -DROP TABLE ans; - -/* q3: question='medium outer on int' */ - -SET log_queries = 1; -CREATE TABLE ans ENGINE = Memory AS SELECT x.id1, y.id1, id2, x.id3, x.id4, y.id4, x.id5, y.id5, x.id6, x.v1, y.v2 FROM DATA_NAME AS x LEFT JOIN RHS_MEDIUM AS y USING (id2); -SET log_queries = 0; -SYSTEM FLUSH LOGS; -SELECT 1 AS run, toUnixTimestamp(now()) AS timestamp, 'join' AS task, 'DATA_NAME' AS data_name, NULL AS in_rows, 'medium outer on int' AS question, result_rows AS out_rows, NULL AS out_cols, 'clickhouse' AS solution, version() AS version, NULL AS git, 'select join' AS fun, query_duration_ms/1000 AS time_sec, memory_usage/1073741824 AS mem_gb, 1 AS cache, NULL AS chk, NULL AS chk_time_sec, 1 AS on_disk -FROM system.query_log WHERE type=2 AND query='CREATE TABLE ans ENGINE = Memory AS SELECT x.id1, y.id1, id2, x.id3, x.id4, y.id4, x.id5, y.id5, x.id6, x.v1, y.v2 FROM DATA_NAME AS x LEFT JOIN RHS_MEDIUM AS y USING (id2);' ORDER BY query_start_time DESC LIMIT 1 INTO OUTFILE 'clickhouse/log/join_DATA_NAME_q3_r1.csv' FORMAT CSVWithNames; -SELECT * FROM ans LIMIT 3; -DROP TABLE ans; - -SET log_queries = 1; -CREATE TABLE ans ENGINE = Memory AS SELECT x.id1, y.id1, id2, x.id3, x.id4, y.id4, x.id5, y.id5, x.id6, x.v1, y.v2 FROM DATA_NAME AS x LEFT JOIN RHS_MEDIUM AS y USING (id2); -SET log_queries = 0; -SYSTEM FLUSH LOGS; -SELECT 2 AS run, toUnixTimestamp(now()) AS timestamp, 'join' AS task, 'DATA_NAME' AS data_name, NULL AS in_rows, 'medium outer on int' AS question, result_rows AS out_rows, NULL AS out_cols, 'clickhouse' AS solution, version() AS version, NULL AS git, 'select join' AS fun, query_duration_ms/1000 AS time_sec, memory_usage/1073741824 AS mem_gb, 1 AS cache, NULL AS chk, NULL AS chk_time_sec, 1 AS on_disk -FROM system.query_log WHERE type=2 AND query='CREATE TABLE ans ENGINE = Memory AS SELECT x.id1, y.id1, id2, x.id3, x.id4, y.id4, x.id5, y.id5, x.id6, x.v1, y.v2 FROM DATA_NAME AS x LEFT JOIN RHS_MEDIUM AS y USING (id2);' ORDER BY query_start_time DESC LIMIT 1 INTO OUTFILE 'clickhouse/log/join_DATA_NAME_q3_r2.csv' FORMAT CSVWithNames; -SELECT * FROM ans LIMIT 3; -DROP TABLE ans; - -/* q4: question='medium inner on factor' */ - -SET log_queries = 1; -CREATE TABLE ans ENGINE = Memory AS SELECT x.id1, y.id1, x.id2, y.id2, x.id3, x.id4, y.id4, id5, x.id6, x.v1, y.v2 FROM DATA_NAME AS x INNER JOIN RHS_MEDIUM AS y USING (id5); -SET log_queries = 0; -SYSTEM FLUSH LOGS; -SELECT 1 AS run, toUnixTimestamp(now()) AS timestamp, 'join' AS task, 'DATA_NAME' AS data_name, NULL AS in_rows, 'medium inner on factor' AS question, result_rows AS out_rows, NULL AS out_cols, 'clickhouse' AS solution, version() AS version, NULL AS git, 'select join' AS fun, query_duration_ms/1000 AS time_sec, memory_usage/1073741824 AS mem_gb, 1 AS cache, NULL AS chk, NULL AS chk_time_sec, 1 AS on_disk -FROM system.query_log WHERE type=2 AND query='CREATE TABLE ans ENGINE = Memory AS SELECT x.id1, y.id1, x.id2, y.id2, x.id3, x.id4, y.id4, id5, x.id6, x.v1, y.v2 FROM DATA_NAME AS x INNER JOIN RHS_MEDIUM AS y USING (id5);' ORDER BY query_start_time DESC LIMIT 1 INTO OUTFILE 'clickhouse/log/join_DATA_NAME_q4_r1.csv' FORMAT CSVWithNames; -SELECT * FROM ans LIMIT 3; -DROP TABLE ans; - -SET log_queries = 1; -CREATE TABLE ans ENGINE = Memory AS SELECT x.id1, y.id1, x.id2, y.id2, x.id3, x.id4, y.id4, id5, x.id6, x.v1, y.v2 FROM DATA_NAME AS x INNER JOIN RHS_MEDIUM AS y USING (id5); -SET log_queries = 0; -SYSTEM FLUSH LOGS; -SELECT 2 AS run, toUnixTimestamp(now()) AS timestamp, 'join' AS task, 'DATA_NAME' AS data_name, NULL AS in_rows, 'medium inner on factor' AS question, result_rows AS out_rows, NULL AS out_cols, 'clickhouse' AS solution, version() AS version, NULL AS git, 'select join' AS fun, query_duration_ms/1000 AS time_sec, memory_usage/1073741824 AS mem_gb, 1 AS cache, NULL AS chk, NULL AS chk_time_sec, 1 AS on_disk -FROM system.query_log WHERE type=2 AND query='CREATE TABLE ans ENGINE = Memory AS SELECT x.id1, y.id1, x.id2, y.id2, x.id3, x.id4, y.id4, id5, x.id6, x.v1, y.v2 FROM DATA_NAME AS x INNER JOIN RHS_MEDIUM AS y USING (id5);' ORDER BY query_start_time DESC LIMIT 1 INTO OUTFILE 'clickhouse/log/join_DATA_NAME_q4_r2.csv' FORMAT CSVWithNames; -SELECT * FROM ans LIMIT 3; -DROP TABLE ans; - -/* q5: question='big inner on int' */ - -SET log_queries = 1; -CREATE TABLE ans ENGINE = Memory AS SELECT x.id1, y.id1, x.id2, y.id2, id3, x.id4, y.id4, x.id5, y.id5, x.id6, y.id6, x.v1, y.v2 FROM DATA_NAME AS x INNER JOIN RHS_BIG AS y USING (id3); -SET log_queries = 0; -SYSTEM FLUSH LOGS; -SELECT 1 AS run, toUnixTimestamp(now()) AS timestamp, 'join' AS task, 'DATA_NAME' AS data_name, NULL AS in_rows, 'big inner on int' AS question, result_rows AS out_rows, NULL AS out_cols, 'clickhouse' AS solution, version() AS version, NULL AS git, 'select join' AS fun, query_duration_ms/1000 AS time_sec, memory_usage/1073741824 AS mem_gb, 1 AS cache, NULL AS chk, NULL AS chk_time_sec, 1 AS on_disk -FROM system.query_log WHERE type=2 AND query='CREATE TABLE ans ENGINE = Memory AS SELECT x.id1, y.id1, x.id2, y.id2, id3, x.id4, y.id4, x.id5, y.id5, x.id6, y.id6, x.v1, y.v2 FROM DATA_NAME AS x INNER JOIN RHS_BIG AS y USING (id3);' ORDER BY query_start_time DESC LIMIT 1 INTO OUTFILE 'clickhouse/log/join_DATA_NAME_q5_r1.csv' FORMAT CSVWithNames; -SELECT * FROM ans LIMIT 3; -DROP TABLE ans; - -SET log_queries = 1; -CREATE TABLE ans ENGINE = Memory AS SELECT x.id1, y.id1, x.id2, y.id2, id3, x.id4, y.id4, x.id5, y.id5, x.id6, y.id6, x.v1, y.v2 FROM DATA_NAME AS x INNER JOIN RHS_BIG AS y USING (id3); -SET log_queries = 0; -SYSTEM FLUSH LOGS; -SELECT 2 AS run, toUnixTimestamp(now()) AS timestamp, 'join' AS task, 'DATA_NAME' AS data_name, NULL AS in_rows, 'big inner on int' AS question, result_rows AS out_rows, NULL AS out_cols, 'clickhouse' AS solution, version() AS version, NULL AS git, 'select join' AS fun, query_duration_ms/1000 AS time_sec, memory_usage/1073741824 AS mem_gb, 1 AS cache, NULL AS chk, NULL AS chk_time_sec, 1 AS on_disk -FROM system.query_log WHERE type=2 AND query='CREATE TABLE ans ENGINE = Memory AS SELECT x.id1, y.id1, x.id2, y.id2, id3, x.id4, y.id4, x.id5, y.id5, x.id6, y.id6, x.v1, y.v2 FROM DATA_NAME AS x INNER JOIN RHS_BIG AS y USING (id3);' ORDER BY query_start_time DESC LIMIT 1 INTO OUTFILE 'clickhouse/log/join_DATA_NAME_q5_r2.csv' FORMAT CSVWithNames; -SELECT * FROM ans LIMIT 3; -DROP TABLE ans; diff --git a/clickhouse/setup-clickhouse.sh b/clickhouse/setup-clickhouse.sh index 4c6e87cc..71f05524 100755 --- a/clickhouse/setup-clickhouse.sh +++ b/clickhouse/setup-clickhouse.sh @@ -1,4 +1,3 @@ - # install sudo apt-get install -y apt-transport-https ca-certificates dirmngr sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754 @@ -8,25 +7,13 @@ sudo apt-get update sudo apt-get install -y clickhouse-server clickhouse-client +# stop server if service was already running +sudo service clickhouse-server start ||: + # start server sudo rm /var/log/clickhouse-server/clickhouse-server.err.log /var/log/clickhouse-server/clickhouse-server.log sudo service clickhouse-server start -# stop server -#sudo service clickhouse-server stop - -# let file table function access csv -- NO LONGER NECESSARY -# grep '/var/lib/clickhouse/user_files/' /etc/clickhouse-server/config.xml -# sudo sed -i -e "s|/home/ubuntu/|/var/lib/clickhouse/user_files/|" /etc/clickhouse-server/config.xml -# sudo grep 'user_files_path' /etc/clickhouse-server/config.xml - -# server start/stop without sudo: use visudo to edit sudoers -#sudo cp /etc/sudoers etc_sudoers.bak -#sudo EDITOR=vim visudo -# add two lines for your user at the end of section: Members of the admin group may gain root privileges -#user ALL=NOPASSWD: /usr/sbin/service clickhouse-server start -#user ALL=NOPASSWD: /usr/sbin/service clickhouse-server stop - # interactive debugging # copy exec.sh body and substitute $1 for groupby and $2 for G1_1e7_1e2_0_0, avoid exit calls \ No newline at end of file From b5ce53ea581a2b157d2a7d425c194b90c5c57c70 Mon Sep 17 00:00:00 2001 From: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> Date: Wed, 22 Nov 2023 07:36:08 +0000 Subject: [PATCH 2/9] Update ClickHouse results --- _benchplot/benchplot-dict.R | 2 - _launcher/launcher.R | 3 +- _launcher/solution.R | 3 +- clickhouse/VERSION | 2 +- clickhouse/ch.sh | 31 ++- clickhouse/exec.sh | 93 +++++++-- clickhouse/join-clickhouse.sh | 2 +- logs.csv | 44 ++++ run.sh | 3 +- time.csv | 370 ++++++++++++++++++++++++++++++++++ 10 files changed, 519 insertions(+), 34 deletions(-) diff --git a/_benchplot/benchplot-dict.R b/_benchplot/benchplot-dict.R index 6ac2df8a..c834fa6b 100644 --- a/_benchplot/benchplot-dict.R +++ b/_benchplot/benchplot-dict.R @@ -490,8 +490,6 @@ join.data.exceptions = {list( "juliads" = {list( )}, "clickhouse" = {list( - "out of memory" = c("J1_1e9_NA_0_0", # q1 r2 #169 - "J1_1e9_NA_5_0","J1_1e9_NA_0_1") # q1 r1 )}, "polars" = {list( "out of memory" = c("J1_1e9_NA_0_0","J1_1e9_NA_5_0","J1_1e9_NA_0_1") diff --git a/_launcher/launcher.R b/_launcher/launcher.R index 167d9dee..db5b1a83 100644 --- a/_launcher/launcher.R +++ b/_launcher/launcher.R @@ -16,8 +16,7 @@ file.ext = function(x) { x, "collapse"=, "data.table"=, "dplyr"=, "h2o"=, "arrow"=, "duckdb"="R", "duckdb-latest"="R", "pandas"=, "spark"=, "pydatatable"=, "modin"=, "dask"=, "datafusion"=, "polars"="py", - "clickhouse"="sql", - "juliadf"="jl", "juliads"="jl", + "clickhouse"="sh", "juliadf"="jl", "juliads"="jl", ) if (is.null(ans)) stop(sprintf("solution %s does not have file extension defined in file.ext helper function", x)) ans diff --git a/_launcher/solution.R b/_launcher/solution.R index f66b4311..530999aa 100755 --- a/_launcher/solution.R +++ b/_launcher/solution.R @@ -112,8 +112,7 @@ file.ext = function(x) { x, "collapse"=, "data.table"=, "dplyr"=, "h2o"=, "arrow"=, "duckdb"="R", "duckdb-latest"="R", "pandas"="py", "spark"=, "pydatatable"=, "modin"=, "dask"=, "datafusion"=, "polars"="py", - "clickhouse"="sql", - "juliadf"="jl", "juliads"="jl" + "clickhouse"="sh", "juliadf"="jl", "juliads"="jl" ) if (is.null(ans)) stop(sprintf("solution %s does not have file extension defined in file.ext helper function", x)) ans diff --git a/clickhouse/VERSION b/clickhouse/VERSION index ed8b0e7d..396a5968 100644 --- a/clickhouse/VERSION +++ b/clickhouse/VERSION @@ -1 +1 @@ -23.9.1.1854 \ No newline at end of file +23.10.2.13 \ No newline at end of file diff --git a/clickhouse/ch.sh b/clickhouse/ch.sh index 8083067f..14075e32 100644 --- a/clickhouse/ch.sh +++ b/clickhouse/ch.sh @@ -1,32 +1,47 @@ ch_installed() { dpkg-query -Wf'${db:Status-abbrev}' clickhouse-server 2>/dev/null | grep -q '^i' } + +ch_active() { + clickhouse-client --query="SELECT 0;" > /dev/null 2>&1 + local ret=$?; + if [[ $ret -eq 0 ]]; then return 0; elif [[ $ret -eq 210 ]]; then return 1; else echo "Unexpected return code from clickhouse-client: $ret" >&2 && return 1; fi; +} + +ch_wait() { + for _ in $(seq 1 60); do if [[ $(wget -q 'localhost:8123' -O-) == 'Ok.' ]]; then break ; else sleep 1; fi ; done + ch_active +} + ch_start() { echo '# ch_start: starting clickhouse-server' - sudo service clickhouse-server start && sleep 15 + sudo service clickhouse-server start + ch_wait } + ch_stop() { echo '# ch_stop: stopping clickhouse-server' sudo service clickhouse-server stop && sleep 15 } -ch_active() { - clickhouse-client --query="SELECT 0;" > /dev/null 2>&1 - local ret=$?; - if [[ $ret -eq 0 ]]; then return 0; elif [[ $ret -eq 210 ]]; then return 1; else echo "Unexpected return code from clickhouse-client: $ret" >&2 && return 1; fi; -} ch_query() { + ENGINE=Memory + if [ $ON_DISK -eq 1 ]; then + ENGINE="MergeTree ORDER BY tuple()" + fi + sudo touch '/var/lib/clickhouse/flags/force_drop_table' && sudo chmod 666 '/var/lib/clickhouse/flags/force_drop_table' clickhouse-client --query "DROP TABLE IF EXISTS ans;" - clickhouse-client --log_comment ${RUNNAME} --query "CREATE TABLE ans ENGINE = Memory AS ${QUERY};" + clickhouse-client --log_comment ${RUNNAME} --query "CREATE TABLE ans ENGINE = ${ENGINE} AS ${QUERY} SETTINGS max_insert_threads=${THREADS}, max_threads=${THREADS};" local ret=$?; if [[ $ret -eq 0 ]]; then return 0; elif [[ $ret -eq 210 ]]; then return 1; else echo "Unexpected return code from clickhouse-client: $ret" >&2 && return 1; fi; clickhouse-client --query "SELECT * FROM ans LIMIT 3;" + sudo touch '/var/lib/clickhouse/flags/force_drop_table' && sudo chmod 666 '/var/lib/clickhouse/flags/force_drop_table' clickhouse-client --query "DROP TABLE ans;" } ch_logrun() { clickhouse-client --query "SYSTEM FLUSH LOGS;" - clickhouse-client --query "SELECT ${RUN} AS run, toUnixTimestamp(now()) AS timestamp, '${TASK}' AS task, '${SRC_DATANAME}' AS data_name, NULL AS in_rows, '${QUESTION}' AS question, result_rows AS out_rows, NULL AS out_cols, 'clickhouse' AS solution, version() AS version, NULL AS git, 'select group by' AS fun, query_duration_ms/1000 AS time_sec, memory_usage/1073741824 AS mem_gb, 1 AS cache, NULL AS chk, NULL AS chk_time_sec, 1 AS on_disk FROM system.query_log WHERE type='QueryFinish' AND log_comment='${RUNNAME}' ORDER BY query_start_time DESC LIMIT 1 FORMAT CSVWithNames;" > clickhouse/log/${RUNNAME}.csv + clickhouse-client --query "SELECT ${RUN} AS run, toUnixTimestamp(now()) AS timestamp, '${TASK}' AS task, '${SRC_DATANAME}' AS data_name, NULL AS in_rows, '${QUESTION}' AS question, result_rows AS out_rows, NULL AS out_cols, 'clickhouse' AS solution, version() AS version, NULL AS git, '${FUNCTION}' AS fun, query_duration_ms/1000 AS time_sec, memory_usage/1073741824 AS mem_gb, 1 AS cache, NULL AS chk, NULL AS chk_time_sec, 1 AS on_disk FROM system.query_log WHERE type='QueryFinish' AND log_comment='${RUNNAME}' ORDER BY query_start_time DESC LIMIT 1 FORMAT CSVWithNames;" > clickhouse/log/${RUNNAME}.csv local ret=$?; if [[ $ret -eq 0 ]]; then return 0; elif [[ $ret -eq 210 ]]; then return 1; else echo "Unexpected return code from clickhouse-client: $ret" >&2 && return 1; fi; } diff --git a/clickhouse/exec.sh b/clickhouse/exec.sh index 95e50d10..cc411dd8 100755 --- a/clickhouse/exec.sh +++ b/clickhouse/exec.sh @@ -25,36 +25,92 @@ ch_active || exit 1 # tail -n+2 data/G1_1e7_1e2_0_0.csv | clickhouse-client --query="INSERT INTO G1_1e7_1e2_0_0 SELECT * FROM input('id1 Nullable(String), id2 Nullable(String), id3 Nullable(String), id4 Nullable(Int32), id5 Nullable(Int32), id6 Nullable(Int32), v1 Nullable(Int32), v2 Nullable(Int32), v3 Nullable(Float64)') FORMAT CSV" # tune CH settings and load data +sudo touch '/var/lib/clickhouse/flags/force_drop_table' && sudo chmod 666 '/var/lib/clickhouse/flags/force_drop_table' clickhouse-client --query 'DROP TABLE IF EXISTS ans' echo '# clickhouse/exec.sh: creating tables and loading data' +# set ClickHouse parallelism at half of virtual cores +THREADS=$(($(nproc --all) /2)) +HAS_NULL=$(clickhouse-client --query "SELECT splitByChar('_','$SRC_DATANAME')[4]>0 FORMAT TSV") +IS_SORTED=$(clickhouse-client --query "SELECT splitByChar('_','$SRC_DATANAME')[5]=1 FORMAT TSV") +ON_DISK=0 + if [ $1 == 'groupby' ]; then clickhouse-client --query "DROP TABLE IF EXISTS $SRC_DATANAME" - clickhouse-client --query "CREATE TABLE $SRC_DATANAME (id1 Nullable(String), id2 Nullable(String), id3 Nullable(String), id4 Nullable(Int32), id5 Nullable(Int32), id6 Nullable(Int32), v1 Nullable(Int32), v2 Nullable(Int32), v3 Nullable(Float64)) ENGINE = MergeTree() ORDER BY tuple();" + if [ $HAS_NULL -eq 1 ]; then + if [ $IS_SORTED -eq 1 ]; then + clickhouse-client --query "CREATE TABLE $SRC_DATANAME (id1 LowCardinality(Nullable(String)), id2 LowCardinality(Nullable(String)), id3 Nullable(String), id4 Nullable(Int32), id5 Nullable(Int32), id6 Nullable(Int32), v1 Nullable(Int32), v2 Nullable(Int32), v3 Nullable(Float64)) ENGINE = MergeTree() ORDER BY (id1,id2,id3,id4,id5,id6,v1,v2,v3);" + else + clickhouse-client --query "CREATE TABLE $SRC_DATANAME (id1 LowCardinality(Nullable(String)), id2 LowCardinality(Nullable(String)), id3 Nullable(String), id4 Nullable(Int32), id5 Nullable(Int32), id6 Nullable(Int32), v1 Nullable(Int32), v2 Nullable(Int32), v3 Nullable(Float64)) ENGINE = MergeTree() ORDER BY tuple();" + fi + else + if [ $IS_SORTED -eq 1 ]; then + clickhouse-client --query "CREATE TABLE $SRC_DATANAME (id1 LowCardinality(String), id2 LowCardinality(String), id3 String, id4 Int32, id5 Int32, id6 Int32, v1 Int32, v2 Int32, v3 Float64) ENGINE = MergeTree() ORDER BY (id1,id2,id3,id4,id5,id6,v1,v2,v3);" + else + clickhouse-client --query "CREATE TABLE $SRC_DATANAME (id1 LowCardinality(String), id2 LowCardinality(String), id3 String, id4 Int32, id5 Int32, id6 Int32, v1 Int32, v2 Int32, v3 Float64) ENGINE = MergeTree() ORDER BY tuple();" + fi + fi clickhouse-client --query "INSERT INTO $SRC_DATANAME FROM INFILE 'data/${SRC_DATANAME}.csv'" # confirm all data loaded echo -e "clickhouse-client --query 'SELECT count(*) FROM $SRC_DATANAME'\n$(echo $SRC_DATANAME | cut -d'_' -f2)" | Rscript -e 'stdin=readLines(file("stdin")); if ((loaded<-as.numeric(system(stdin[1L], intern=TRUE)))!=as.numeric(stdin[2L])) stop("incomplete data load, expected: ", stdin[2L],", loaded: ", loaded)' + export THREADS elif [ $1 == 'join' ]; then - # lhs - clickhouse-client --query "DROP TABLE IF EXISTS $SRC_DATANAME" - clickhouse-client --query "CREATE TABLE $SRC_DATANAME (id1 Nullable(Int32), id2 Nullable(Int32), id3 Nullable(Int32), id4 Nullable(String), id5 Nullable(String), id6 Nullable(String), v1 Nullable(Float64)) ENGINE = MergeTree() ORDER BY tuple();" - tail -n+2 data/$SRC_DATANAME.csv | clickhouse-client --query "INSERT INTO $SRC_DATANAME SELECT * FROM input('id1 Nullable(Int32), id2 Nullable(Int32), id3 Nullable(Int32), id4 Nullable(String), id5 Nullable(String), id6 Nullable(String), v1 Nullable(Float64)') FORMAT CSV" - echo -e "clickhouse-client --query 'SELECT count(*) FROM $SRC_DATANAME'\n$(echo $SRC_DATANAME | cut -d'_' -f2)" | Rscript -e 'stdin=readLines(file("stdin")); if ((loaded<-as.numeric(system(stdin[1L], intern=TRUE)))!=as.numeric(stdin[2L])) stop("incomplete data load, expected: ", stdin[2L],", loaded: ", loaded)' RHS=$(join_to_tbls $SRC_DATANAME) RHS1=$(echo $RHS | cut -d' ' -f1) - clickhouse-client --query "DROP TABLE IF EXISTS $RHS1" - clickhouse-client --query "CREATE TABLE $RHS1 (id1 Nullable(Int32), id4 Nullable(String), v2 Nullable(Float64)) ENGINE = MergeTree() ORDER BY tuple();" - tail -n+2 data/$RHS1.csv | clickhouse-client --query "INSERT INTO $RHS1 SELECT * FROM input('id1 Nullable(Int32), id4 Nullable(String), v2 Nullable(Float64)') FORMAT CSV" - echo -e "clickhouse-client --query 'SELECT count(*) FROM $RHS1'\n$(echo $RHS1 | cut -d'_' -f3)" | Rscript -e 'stdin=readLines(file("stdin")); if ((loaded<-as.numeric(system(stdin[1L], intern=TRUE)))!=as.numeric(stdin[2L])) stop("incomplete data load, expected: ", stdin[2L],", loaded: ", loaded)' RHS2=$(echo $RHS | cut -d' ' -f2) - clickhouse-client --query "DROP TABLE IF EXISTS $RHS2" - clickhouse-client --query "CREATE TABLE $RHS2 (id1 Nullable(Int32), id2 Nullable(Int32), id4 Nullable(String), id5 Nullable(String), v2 Nullable(Float64)) ENGINE = MergeTree() ORDER BY tuple();" - tail -n+2 data/$RHS2.csv | clickhouse-client --query "INSERT INTO $RHS2 SELECT * FROM input('id1 Nullable(Int32), id2 Nullable(Int32), id4 Nullable(String), id5 Nullable(String), v2 Nullable(Float64)') FORMAT CSV" - echo -e "clickhouse-client --query 'SELECT count(*) FROM $RHS2'\n$(echo $RHS2 | cut -d'_' -f3)" | Rscript -e 'stdin=readLines(file("stdin")); if ((loaded<-as.numeric(system(stdin[1L], intern=TRUE)))!=as.numeric(stdin[2L])) stop("incomplete data load, expected: ", stdin[2L],", loaded: ", loaded)' RHS3=$(echo $RHS | cut -d' ' -f3) + ON_DISK=$(clickhouse-client --query "SELECT (splitByChar('_','$SRC_DATANAME')[2])::Float32 >= 1e9::Float32 FORMAT TSV") + + # cleanup + clickhouse-client --query "DROP TABLE IF EXISTS $SRC_DATANAME" + clickhouse-client --query "DROP TABLE IF EXISTS $RHS1" + clickhouse-client --query "DROP TABLE IF EXISTS $RHS2" clickhouse-client --query "DROP TABLE IF EXISTS $RHS3" - clickhouse-client --query "CREATE TABLE $RHS3 (id1 Nullable(Int32), id2 Nullable(Int32), id3 Nullable(Int32), id4 Nullable(String), id5 Nullable(String), id6 Nullable(String), v2 Nullable(Float64)) ENGINE = MergeTree() ORDER BY tuple();" + + echo IS_SORTED ${IS_SORTED} HAS_NULL ${HAS_NULL} ON_DISK ${ON_DISK} + # schemas + if [ $HAS_NULL -eq 1 ]; then + if [ $IS_SORTED -eq 1 ]; then + clickhouse-client --query "CREATE TABLE $SRC_DATANAME (id1 Nullable(Int32), id2 Nullable(Int32), id3 Nullable(Int32), id4 LowCardinality(Nullable(String)), id5 LowCardinality(Nullable(String)), id6 Nullable(String), v1 Nullable(Float64)) ENGINE = MergeTree() ORDER BY (id1, id2, id3, id4, id5, id6);" + clickhouse-client --query "CREATE TABLE $RHS1 (id1 Nullable(Int32), id4 LowCardinality(Nullable(String)), v2 Nullable(Float64)) ENGINE = MergeTree() ORDER BY (id1, id4);" + clickhouse-client --query "CREATE TABLE $RHS2 (id1 Nullable(Int32), id2 Nullable(Int32), id4 LowCardinality(Nullable(String)), id5 LowCardinality(Nullable(String)), v2 Nullable(Float64)) ENGINE = MergeTree() ORDER BY (id1, id2, id4, id5);" + clickhouse-client --query "CREATE TABLE $RHS3 (id1 Nullable(Int32), id2 Nullable(Int32), id3 Nullable(Int32), id4 LowCardinality(Nullable(String)), id5 LowCardinality(Nullable(String)), id6 Nullable(String), v2 Nullable(Float64)) ENGINE = MergeTree() ORDER BY (id1, id2, id3, id4, id5, id6);" + else + clickhouse-client --query "CREATE TABLE $SRC_DATANAME (id1 Nullable(Int32), id2 Nullable(Int32), id3 Nullable(Int32), id4 LowCardinality(Nullable(String)), id5 LowCardinality(Nullable(String)), id6 Nullable(String), v1 Nullable(Float64)) ENGINE = MergeTree() ORDER BY tuple();" + clickhouse-client --query "CREATE TABLE $RHS1 (id1 Nullable(Int32), id4 LowCardinality(Nullable(String)), v2 Nullable(Float64)) ENGINE = MergeTree() ORDER BY tuple();" + clickhouse-client --query "CREATE TABLE $RHS2 (id1 Nullable(Int32), id2 Nullable(Int32), id4 LowCardinality(Nullable(String)), id5 LowCardinality(Nullable(String)), v2 Nullable(Float64)) ENGINE = MergeTree() ORDER BY tuple();" + clickhouse-client --query "CREATE TABLE $RHS3 (id1 Nullable(Int32), id2 Nullable(Int32), id3 Nullable(Int32), id4 LowCardinality(Nullable(String)), id5 LowCardinality(Nullable(String)), id6 Nullable(String), v2 Nullable(Float64)) ENGINE = MergeTree() ORDER BY tuple();" + fi + else + if [ $IS_SORTED -eq 1 ]; then + clickhouse-client --query "CREATE TABLE $SRC_DATANAME (id1 Int32, id2 Int32, id3 Int32, id4 LowCardinality(String), id5 LowCardinality(String), id6 String, v1 Float64) ENGINE = MergeTree() ORDER BY (id1, id2, id3, id4, id5, id6);" + clickhouse-client --query "CREATE TABLE $RHS1 (id1 Int32, id4 LowCardinality(String), v2 Float64) ENGINE = MergeTree() ORDER BY (id1, id4);" + clickhouse-client --query "CREATE TABLE $RHS2 (id1 Int32, id2 Int32, id4 LowCardinality(String), id5 LowCardinality(String), v2 Float64) ENGINE = MergeTree() ORDER BY (id1, id2, id4, id5);" + clickhouse-client --query "CREATE TABLE $RHS3 (id1 Int32, id2 Int32, id3 Int32, id4 LowCardinality(String), id5 LowCardinality(String), id6 String, v2 Float64) ENGINE = MergeTree() ORDER BY (id1, id2, id3, id4, id5, id6);" + else + clickhouse-client --query "CREATE TABLE $SRC_DATANAME (id1 Int32, id2 Int32, id3 Int32, id4 LowCardinality(String), id5 LowCardinality(String), id6 String, v1 Float64) ENGINE = MergeTree() ORDER BY tuple();" + clickhouse-client --query "CREATE TABLE $RHS1 (id1 Int32, id4 LowCardinality(String), v2 Float64) ENGINE = MergeTree() ORDER BY tuple();" + clickhouse-client --query "CREATE TABLE $RHS2 (id1 Int32, id2 Int32, id4 LowCardinality(String), id5 LowCardinality(String), v2 Float64) ENGINE = MergeTree() ORDER BY tuple();" + clickhouse-client --query "CREATE TABLE $RHS3 (id1 Int32, id2 Int32, id3 Int32, id4 LowCardinality(String), id5 LowCardinality(String), id6 String, v2 Float64) ENGINE = MergeTree() ORDER BY tuple();" + fi + fi + + # insert + tail -n+2 data/$SRC_DATANAME.csv | clickhouse-client --query "INSERT INTO $SRC_DATANAME SELECT * FROM input('id1 Nullable(Int32), id2 Nullable(Int32), id3 Nullable(Int32), id4 Nullable(String), id5 Nullable(String), id6 Nullable(String), v1 Nullable(Float64)') FORMAT CSV" + tail -n+2 data/$RHS1.csv | clickhouse-client --query "INSERT INTO $RHS1 SELECT * FROM input('id1 Nullable(Int32), id4 Nullable(String), v2 Nullable(Float64)') FORMAT CSV" + tail -n+2 data/$RHS2.csv | clickhouse-client --query "INSERT INTO $RHS2 SELECT * FROM input('id1 Nullable(Int32), id2 Nullable(Int32), id4 Nullable(String), id5 Nullable(String), v2 Nullable(Float64)') FORMAT CSV" tail -n+2 data/$RHS3.csv | clickhouse-client --query "INSERT INTO $RHS3 SELECT * FROM input('id1 Nullable(Int32), id2 Nullable(Int32), id3 Nullable(Int32), id4 Nullable(String), id5 Nullable(String), id6 Nullable(String), v2 Nullable(Float64)') FORMAT CSV" + + # validate + echo -e "clickhouse-client --query 'SELECT count(*) FROM $SRC_DATANAME'\n$(echo $SRC_DATANAME | cut -d'_' -f2)" | Rscript -e 'stdin=readLines(file("stdin")); if ((loaded<-as.numeric(system(stdin[1L], intern=TRUE)))!=as.numeric(stdin[2L])) stop("incomplete data load, expected: ", stdin[2L],", loaded: ", loaded)' + echo -e "clickhouse-client --query 'SELECT count(*) FROM $RHS1'\n$(echo $RHS1 | cut -d'_' -f3)" | Rscript -e 'stdin=readLines(file("stdin")); if ((loaded<-as.numeric(system(stdin[1L], intern=TRUE)))!=as.numeric(stdin[2L])) stop("incomplete data load, expected: ", stdin[2L],", loaded: ", loaded)' + echo -e "clickhouse-client --query 'SELECT count(*) FROM $RHS2'\n$(echo $RHS2 | cut -d'_' -f3)" | Rscript -e 'stdin=readLines(file("stdin")); if ((loaded<-as.numeric(system(stdin[1L], intern=TRUE)))!=as.numeric(stdin[2L])) stop("incomplete data load, expected: ", stdin[2L],", loaded: ", loaded)' echo -e "clickhouse-client --query 'SELECT count(*) FROM $RHS3'\n$(echo $RHS3 | cut -d'_' -f3)" | Rscript -e 'stdin=readLines(file("stdin")); if ((loaded<-as.numeric(system(stdin[1L], intern=TRUE)))!=as.numeric(stdin[2L])) stop("incomplete data load, expected: ", stdin[2L],", loaded: ", loaded)' + + export RHS1 + export RHS2 + export RHS3 + export ON_DISK + export THREADS else echo "clickhouse task $1 not implemented" >&2 && exit 1 fi @@ -76,7 +132,12 @@ fi sleep 90 # cleanup data -ch_active && echo '# clickhouse/exec.sh: finishing, cleaning up' && clickhouse-client --query "DROP TABLE IF EXISTS ans" && clickhouse-client --query "DROP TABLE IF EXISTS $SRC_DATANAME" || echo '# clickhouse/exec.sh: finishing, clickhouse server down, possibly crashed, could not clean up' +sudo touch '/var/lib/clickhouse/flags/force_drop_table' && sudo chmod 666 '/var/lib/clickhouse/flags/force_drop_table' +ch_active && echo '# clickhouse/exec.sh: finishing, cleaning up' && clickhouse-client --query "DROP TABLE IF EXISTS ans" || echo '# clickhouse/exec.sh: finishing, clickhouse server down, could not clean up' +ch_active && clickhouse-client --query "DROP TABLE IF EXISTS $SRC_DATANAME" || echo '# clickhouse/exec.sh: finishing, clickhouse server down, could not clean up' +ch_active && clickhouse-client --query "DROP TABLE IF EXISTS $RHS1" || echo '# clickhouse/exec.sh: finishing, clickhouse server down, could not clean up' +ch_active && clickhouse-client --query "DROP TABLE IF EXISTS $RHS2" || echo '# clickhouse/exec.sh: finishing, clickhouse server down, could not clean up' +ch_active && clickhouse-client --query "DROP TABLE IF EXISTS $RHS3" || echo '# clickhouse/exec.sh: finishing, clickhouse server down, could not clean up' # stop server ch_stop && echo '# clickhouse/exec.sh: stopping server finished' || echo '# clickhouse/exec.sh: stopping server failed' diff --git a/clickhouse/join-clickhouse.sh b/clickhouse/join-clickhouse.sh index df6fae0d..4b9841ec 100755 --- a/clickhouse/join-clickhouse.sh +++ b/clickhouse/join-clickhouse.sh @@ -6,7 +6,7 @@ source ./clickhouse/ch.sh SOLUTION=clickhouse TASK=join -echo SRC ${SRC_DATANAME} RHS1 ${RHS1} RHS2 ${RHS2} RHS3 ${RHS3} +echo SRC ${SRC_DATANAME} RHS1 ${RHS1} RHS2 ${RHS2} RHS3 ${RHS3} ON_DISK ${ON_DISK} THREADS ${THREADS} # /* q1: question='small inner on int' */ Q=1 diff --git a/logs.csv b/logs.csv index 1775ca19..0e6fb133 100644 --- a/logs.csv +++ b/logs.csv @@ -899,3 +899,47 @@ ip-172-31-31-147,1699289348,arrow,13.0.0.1,,join,J1_1e9_NA_0_0,1699309325.24506, ip-172-31-31-147,1699289348,arrow,13.0.0.1,,join,J1_1e9_NA_0_0,1699309934.66574,finish,1,137 ip-172-31-31-147,1699437325,data.table,1.14.9,88039186915028ab3c93ccfd8e22c0d1c3534b1a,join,J1_1e9_NA_0_0,1699437325.61783,start,, ip-172-31-31-147,1699437325,data.table,1.14.9,88039186915028ab3c93ccfd8e22c0d1c3534b1a,join,J1_1e9_NA_0_0,1699451725.72193,finish,, +ip-172-31-38-198,1700587790,clickhouse,23.10.2.13,,groupby,G1_1e7_1e2_0_0,1700587791.42537,start,, +ip-172-31-38-198,1700587790,clickhouse,23.10.2.13,,groupby,G1_1e7_1e2_0_0,1700587938.58807,finish,131,0 +ip-172-31-38-198,1700587790,clickhouse,23.10.2.13,,groupby,G1_1e7_1e1_0_0,1700587953.60331,start,, +ip-172-31-38-198,1700587790,clickhouse,23.10.2.13,,groupby,G1_1e7_1e1_0_0,1700588100.60355,finish,131,0 +ip-172-31-38-198,1700587790,clickhouse,23.10.2.13,,groupby,G1_1e7_2e0_0_0,1700588115.61882,start,, +ip-172-31-38-198,1700587790,clickhouse,23.10.2.13,,groupby,G1_1e7_2e0_0_0,1700588261.57888,finish,131,0 +ip-172-31-38-198,1700587790,clickhouse,23.10.2.13,,groupby,G1_1e7_1e2_0_1,1700588276.59411,start,, +ip-172-31-38-198,1700587790,clickhouse,23.10.2.13,,groupby,G1_1e7_1e2_0_1,1700588423.54863,finish,131,0 +ip-172-31-38-198,1700587790,clickhouse,23.10.2.13,,groupby,G1_1e7_1e2_5_0,1700588438.56387,start,, +ip-172-31-38-198,1700587790,clickhouse,23.10.2.13,,groupby,G1_1e7_1e2_5_0,1700588585.52743,finish,131,0 +ip-172-31-38-198,1700587790,clickhouse,23.10.2.13,,groupby,G1_1e8_1e2_0_0,1700588600.54268,start,, +ip-172-31-38-198,1700587790,clickhouse,23.10.2.13,,groupby,G1_1e8_1e2_0_0,1700588768.08545,finish,131,0 +ip-172-31-38-198,1700587790,clickhouse,23.10.2.13,,groupby,G1_1e8_1e1_0_0,1700588783.10067,start,, +ip-172-31-38-198,1700587790,clickhouse,23.10.2.13,,groupby,G1_1e8_1e1_0_0,1700588953.42058,finish,131,0 +ip-172-31-38-198,1700587790,clickhouse,23.10.2.13,,groupby,G1_1e8_2e0_0_0,1700588968.43582,start,, +ip-172-31-38-198,1700587790,clickhouse,23.10.2.13,,groupby,G1_1e8_2e0_0_0,1700589140.05388,finish,131,0 +ip-172-31-38-198,1700587790,clickhouse,23.10.2.13,,groupby,G1_1e8_1e2_0_1,1700589155.06912,start,, +ip-172-31-38-198,1700587790,clickhouse,23.10.2.13,,groupby,G1_1e8_1e2_0_1,1700589323.62503,finish,131,0 +ip-172-31-38-198,1700587790,clickhouse,23.10.2.13,,groupby,G1_1e8_1e2_5_0,1700589338.64027,start,, +ip-172-31-38-198,1700587790,clickhouse,23.10.2.13,,groupby,G1_1e8_1e2_5_0,1700589511.2378,finish,131,0 +ip-172-31-38-198,1700587790,clickhouse,23.10.2.13,,groupby,G1_1e9_1e2_0_0,1700589526.25303,start,, +ip-172-31-38-198,1700587790,clickhouse,23.10.2.13,,groupby,G1_1e9_1e2_0_0,1700589923.66532,finish,131,0 +ip-172-31-38-198,1700587790,clickhouse,23.10.2.13,,groupby,G1_1e9_1e1_0_0,1700589938.68043,start,, +ip-172-31-38-198,1700587790,clickhouse,23.10.2.13,,groupby,G1_1e9_1e1_0_0,1700590367.76374,finish,131,0 +ip-172-31-38-198,1700587790,clickhouse,23.10.2.13,,groupby,G1_1e9_2e0_0_0,1700590382.77899,start,, +ip-172-31-38-198,1700587790,clickhouse,23.10.2.13,,groupby,G1_1e9_2e0_0_0,1700590849.19496,finish,131,0 +ip-172-31-38-198,1700587790,clickhouse,23.10.2.13,,groupby,G1_1e9_1e2_0_1,1700590864.2102,start,, +ip-172-31-38-198,1700587790,clickhouse,23.10.2.13,,groupby,G1_1e9_1e2_0_1,1700591277.14579,finish,131,0 +ip-172-31-38-198,1700587790,clickhouse,23.10.2.13,,groupby,G1_1e9_1e2_5_0,1700591292.16105,start,, +ip-172-31-38-198,1700587790,clickhouse,23.10.2.13,,groupby,G1_1e9_1e2_5_0,1700591759.14913,finish,131,0 +ip-172-31-38-198,1700600133,clickhouse,23.10.2.13,,join,J1_1e7_NA_0_0,1700600133.70651,start,, +ip-172-31-38-198,1700600133,clickhouse,23.10.2.13,,join,J1_1e7_NA_0_0,1700600286.62925,finish,374,0 +ip-172-31-38-198,1700600133,clickhouse,23.10.2.13,,join,J1_1e7_NA_5_0,1700600301.64177,start,, +ip-172-31-38-198,1700600133,clickhouse,23.10.2.13,,join,J1_1e7_NA_5_0,1700600454.73575,finish,374,0 +ip-172-31-38-198,1700600133,clickhouse,23.10.2.13,,join,J1_1e7_NA_0_1,1700600469.75086,start,, +ip-172-31-38-198,1700600133,clickhouse,23.10.2.13,,join,J1_1e7_NA_0_1,1700600619.6822,finish,374,0 +ip-172-31-38-198,1700600133,clickhouse,23.10.2.13,,join,J1_1e8_NA_0_0,1700600634.69741,start,, +ip-172-31-38-198,1700600133,clickhouse,23.10.2.13,,join,J1_1e8_NA_0_0,1700600898.70815,finish,374,0 +ip-172-31-38-198,1700600133,clickhouse,23.10.2.13,,join,J1_1e8_NA_5_0,1700600913.72327,start,, +ip-172-31-38-198,1700600133,clickhouse,23.10.2.13,,join,J1_1e8_NA_5_0,1700601185.09986,finish,374,0 +ip-172-31-38-198,1700600133,clickhouse,23.10.2.13,,join,J1_1e8_NA_0_1,1700601200.1151,start,, +ip-172-31-38-198,1700600133,clickhouse,23.10.2.13,,join,J1_1e8_NA_0_1,1700601443.93793,finish,374,0 +ip-172-31-38-198,1700600133,clickhouse,23.10.2.13,,join,J1_1e9_NA_0_0,1700601458.95318,start,, +ip-172-31-38-198,1700600133,clickhouse,23.10.2.13,,join,J1_1e9_NA_0_0,1700603770.91887,finish,384,0 diff --git a/run.sh b/run.sh index 8afc679c..6573d764 100755 --- a/run.sh +++ b/run.sh @@ -1,6 +1,5 @@ #!/bin/bash set -e - ## run script the following way to exit if benchmark is already running #if [[ -f ./run.lock ]]; then echo "# Benchmark run discarded due to previous run $(cat run.lock) still running" > "./run_discarded_at_$(date +%s).out"; else ./run.sh > ./run.out; fi; @@ -18,7 +17,7 @@ pidof java > /dev/null 2>&1 && echo "# Benchmark run $BATCH aborted. java is run # confirm clickhouse is not running source ./clickhouse/ch.sh -ch_installed && ch_active && echo "# Benchmark run $BATCH aborted. clickhouse-server is running, shut it down before calling 'run.sh'" && exit; +ch_installed && ch_active && ch_stop if [[ $IGNORE_SWAP == true ]] diff --git a/time.csv b/time.csv index ab100aa0..45d26e9b 100644 --- a/time.csv +++ b/time.csv @@ -6267,3 +6267,373 @@ ip-172-31-31-147,1699289348,1699301553,join,J1_1e9_NA_0_0,1000000000,small inner ip-172-31-31-147,1699289348,1699309384,join,J1_1e9_NA_0_0,1000000000,small inner on int,899999033,9,arrow,13.0.0.1,,inner_join,1,9.688,NA,TRUE,44998904641;45286789554,1.74,NA,FALSE ip-172-31-31-147,1699289348,1699309393,join,J1_1e9_NA_0_0,1000000000,small inner on int,899999033,9,arrow,13.0.0.1,,inner_join,2,6.281,NA,TRUE,44998904641;45286789554,1.965,NA,FALSE ip-172-31-31-147,1699437325,1699447786,join,J1_1e9_NA_0_0,1000000000,small inner on int,899999033,9,data.table,1.14.9,88039186915028ab3c93ccfd8e22c0d1c3534b1a,[.data.table,1,18.94,NA,TRUE,44998904641;45286789554,3.059,NA,FALSE +ip-172-31-38-198,1700587790,1700587795,groupby,G1_1e7_1e2_0_0,10000000,sum v1 by id1,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.017,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700587795,groupby,G1_1e7_1e2_0_0,10000000,sum v1 by id1,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.013,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700587795,groupby,G1_1e7_1e2_0_0,10000000,sum v1 by id1:id2,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.056,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700587795,groupby,G1_1e7_1e2_0_0,10000000,sum v1 by id1:id2,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.103,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700587796,groupby,G1_1e7_1e2_0_0,10000000,sum v1 mean v3 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.09,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700587796,groupby,G1_1e7_1e2_0_0,10000000,sum v1 mean v3 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.071,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700587796,groupby,G1_1e7_1e2_0_0,10000000,mean v1:v3 by id4,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.024,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700587796,groupby,G1_1e7_1e2_0_0,10000000,mean v1:v3 by id4,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.022,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700587796,groupby,G1_1e7_1e2_0_0,10000000,sum v1:v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.059,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700587797,groupby,G1_1e7_1e2_0_0,10000000,sum v1:v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.059,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700587797,groupby,G1_1e7_1e2_0_0,10000000,median v3 sd v3 by id4 id5,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.048,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700587797,groupby,G1_1e7_1e2_0_0,10000000,median v3 sd v3 by id4 id5,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.045,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700587797,groupby,G1_1e7_1e2_0_0,10000000,max v1 - min v2 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.072,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700587798,groupby,G1_1e7_1e2_0_0,10000000,max v1 - min v2 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.067,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700587798,groupby,G1_1e7_1e2_0_0,10000000,largest two v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.075,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700587798,groupby,G1_1e7_1e2_0_0,10000000,largest two v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.068,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700587798,groupby,G1_1e7_1e2_0_0,10000000,regression v1 v2 by id2 id4,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.053,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700587798,groupby,G1_1e7_1e2_0_0,10000000,regression v1 v2 by id2 id4,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.05,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700587799,groupby,G1_1e7_1e2_0_0,10000000,sum v3 count by id1:id6,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.292,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700587799,groupby,G1_1e7_1e2_0_0,10000000,sum v3 count by id1:id6,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.266,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700587957,groupby,G1_1e7_1e1_0_0,10000000,sum v1 by id1,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.017,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700587957,groupby,G1_1e7_1e1_0_0,10000000,sum v1 by id1,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.012,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700587957,groupby,G1_1e7_1e1_0_0,10000000,sum v1 by id1:id2,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.05,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700587957,groupby,G1_1e7_1e1_0_0,10000000,sum v1 by id1:id2,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.057,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700587957,groupby,G1_1e7_1e1_0_0,10000000,sum v1 mean v3 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.124,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700587958,groupby,G1_1e7_1e1_0_0,10000000,sum v1 mean v3 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.111,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700587958,groupby,G1_1e7_1e1_0_0,10000000,mean v1:v3 by id4,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.021,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700587958,groupby,G1_1e7_1e1_0_0,10000000,mean v1:v3 by id4,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.021,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700587958,groupby,G1_1e7_1e1_0_0,10000000,sum v1:v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.088,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700587959,groupby,G1_1e7_1e1_0_0,10000000,sum v1:v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.077,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700587959,groupby,G1_1e7_1e1_0_0,10000000,median v3 sd v3 by id4 id5,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.031,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700587959,groupby,G1_1e7_1e1_0_0,10000000,median v3 sd v3 by id4 id5,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.036,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700587959,groupby,G1_1e7_1e1_0_0,10000000,max v1 - min v2 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.096,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700587960,groupby,G1_1e7_1e1_0_0,10000000,max v1 - min v2 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.098,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700587960,groupby,G1_1e7_1e1_0_0,10000000,largest two v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.105,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700587960,groupby,G1_1e7_1e1_0_0,10000000,largest two v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.089,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700587960,groupby,G1_1e7_1e1_0_0,10000000,regression v1 v2 by id2 id4,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.027,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700587960,groupby,G1_1e7_1e1_0_0,10000000,regression v1 v2 by id2 id4,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.027,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700587961,groupby,G1_1e7_1e1_0_0,10000000,sum v3 count by id1:id6,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.259,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700587961,groupby,G1_1e7_1e1_0_0,10000000,sum v3 count by id1:id6,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.247,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588119,groupby,G1_1e7_2e0_0_0,10000000,sum v1 by id1,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.015,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588119,groupby,G1_1e7_2e0_0_0,10000000,sum v1 by id1,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.011,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588119,groupby,G1_1e7_2e0_0_0,10000000,sum v1 by id1:id2,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.04,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588119,groupby,G1_1e7_2e0_0_0,10000000,sum v1 by id1:id2,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.051,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588120,groupby,G1_1e7_2e0_0_0,10000000,sum v1 mean v3 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.147,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588120,groupby,G1_1e7_2e0_0_0,10000000,sum v1 mean v3 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.128,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588120,groupby,G1_1e7_2e0_0_0,10000000,mean v1:v3 by id4,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.022,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588120,groupby,G1_1e7_2e0_0_0,10000000,mean v1:v3 by id4,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.022,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588120,groupby,G1_1e7_2e0_0_0,10000000,sum v1:v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.096,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588121,groupby,G1_1e7_2e0_0_0,10000000,sum v1:v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.09,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588121,groupby,G1_1e7_2e0_0_0,10000000,median v3 sd v3 by id4 id5,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.058,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588121,groupby,G1_1e7_2e0_0_0,10000000,median v3 sd v3 by id4 id5,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.056,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588121,groupby,G1_1e7_2e0_0_0,10000000,max v1 - min v2 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.113,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588122,groupby,G1_1e7_2e0_0_0,10000000,max v1 - min v2 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.107,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588122,groupby,G1_1e7_2e0_0_0,10000000,largest two v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.119,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588122,groupby,G1_1e7_2e0_0_0,10000000,largest two v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.099,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588122,groupby,G1_1e7_2e0_0_0,10000000,regression v1 v2 by id2 id4,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.039,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588122,groupby,G1_1e7_2e0_0_0,10000000,regression v1 v2 by id2 id4,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.034,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588123,groupby,G1_1e7_2e0_0_0,10000000,sum v3 count by id1:id6,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.246,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588123,groupby,G1_1e7_2e0_0_0,10000000,sum v3 count by id1:id6,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.24,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588280,groupby,G1_1e7_1e2_0_1,10000000,sum v1 by id1,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.016,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588280,groupby,G1_1e7_1e2_0_1,10000000,sum v1 by id1,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.012,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588280,groupby,G1_1e7_1e2_0_1,10000000,sum v1 by id1:id2,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.048,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588280,groupby,G1_1e7_1e2_0_1,10000000,sum v1 by id1:id2,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.051,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588281,groupby,G1_1e7_1e2_0_1,10000000,sum v1 mean v3 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.087,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588281,groupby,G1_1e7_1e2_0_1,10000000,sum v1 mean v3 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.07,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588281,groupby,G1_1e7_1e2_0_1,10000000,mean v1:v3 by id4,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.022,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588281,groupby,G1_1e7_1e2_0_1,10000000,mean v1:v3 by id4,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.02,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588281,groupby,G1_1e7_1e2_0_1,10000000,sum v1:v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.057,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588282,groupby,G1_1e7_1e2_0_1,10000000,sum v1:v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.048,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588282,groupby,G1_1e7_1e2_0_1,10000000,median v3 sd v3 by id4 id5,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.043,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588282,groupby,G1_1e7_1e2_0_1,10000000,median v3 sd v3 by id4 id5,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.044,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588282,groupby,G1_1e7_1e2_0_1,10000000,max v1 - min v2 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.068,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588283,groupby,G1_1e7_1e2_0_1,10000000,max v1 - min v2 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.063,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588283,groupby,G1_1e7_1e2_0_1,10000000,largest two v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.068,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588283,groupby,G1_1e7_1e2_0_1,10000000,largest two v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.063,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588283,groupby,G1_1e7_1e2_0_1,10000000,regression v1 v2 by id2 id4,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.037,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588283,groupby,G1_1e7_1e2_0_1,10000000,regression v1 v2 by id2 id4,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.035,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588284,groupby,G1_1e7_1e2_0_1,10000000,sum v3 count by id1:id6,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.275,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588284,groupby,G1_1e7_1e2_0_1,10000000,sum v3 count by id1:id6,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.245,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588442,groupby,G1_1e7_1e2_5_0,10000000,sum v1 by id1,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.021,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588442,groupby,G1_1e7_1e2_5_0,10000000,sum v1 by id1,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.014,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588442,groupby,G1_1e7_1e2_5_0,10000000,sum v1 by id1:id2,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.062,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588443,groupby,G1_1e7_1e2_5_0,10000000,sum v1 by id1:id2,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.07,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588443,groupby,G1_1e7_1e2_5_0,10000000,sum v1 mean v3 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.096,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588443,groupby,G1_1e7_1e2_5_0,10000000,sum v1 mean v3 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.081,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588443,groupby,G1_1e7_1e2_5_0,10000000,mean v1:v3 by id4,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.025,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588443,groupby,G1_1e7_1e2_5_0,10000000,mean v1:v3 by id4,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.025,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588444,groupby,G1_1e7_1e2_5_0,10000000,sum v1:v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.063,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588444,groupby,G1_1e7_1e2_5_0,10000000,sum v1:v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.066,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588444,groupby,G1_1e7_1e2_5_0,10000000,median v3 sd v3 by id4 id5,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.054,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588444,groupby,G1_1e7_1e2_5_0,10000000,median v3 sd v3 by id4 id5,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.053,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588444,groupby,G1_1e7_1e2_5_0,10000000,max v1 - min v2 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.08,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588445,groupby,G1_1e7_1e2_5_0,10000000,max v1 - min v2 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.07,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588445,groupby,G1_1e7_1e2_5_0,10000000,largest two v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.127,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588445,groupby,G1_1e7_1e2_5_0,10000000,largest two v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.124,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588445,groupby,G1_1e7_1e2_5_0,10000000,regression v1 v2 by id2 id4,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.053,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588446,groupby,G1_1e7_1e2_5_0,10000000,regression v1 v2 by id2 id4,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.053,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588446,groupby,G1_1e7_1e2_5_0,10000000,sum v3 count by id1:id6,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.312,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588447,groupby,G1_1e7_1e2_5_0,10000000,sum v3 count by id1:id6,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.256,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588616,groupby,G1_1e8_1e2_0_0,100000000,sum v1 by id1,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.041,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588616,groupby,G1_1e8_1e2_0_0,100000000,sum v1 by id1,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.034,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588617,groupby,G1_1e8_1e2_0_0,100000000,sum v1 by id1:id2,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.223,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588617,groupby,G1_1e8_1e2_0_0,100000000,sum v1 by id1:id2,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.228,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588618,groupby,G1_1e8_1e2_0_0,100000000,sum v1 mean v3 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.56,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588618,groupby,G1_1e8_1e2_0_0,100000000,sum v1 mean v3 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.48,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588619,groupby,G1_1e8_1e2_0_0,100000000,mean v1:v3 by id4,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.078,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588619,groupby,G1_1e8_1e2_0_0,100000000,mean v1:v3 by id4,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.076,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588619,groupby,G1_1e8_1e2_0_0,100000000,sum v1:v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.418,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588620,groupby,G1_1e8_1e2_0_0,100000000,sum v1:v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.328,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588620,groupby,G1_1e8_1e2_0_0,100000000,median v3 sd v3 by id4 id5,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.239,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588621,groupby,G1_1e8_1e2_0_0,100000000,median v3 sd v3 by id4 id5,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.23,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588621,groupby,G1_1e8_1e2_0_0,100000000,max v1 - min v2 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.443,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588622,groupby,G1_1e8_1e2_0_0,100000000,max v1 - min v2 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.459,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588623,groupby,G1_1e8_1e2_0_0,100000000,largest two v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.612,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588623,groupby,G1_1e8_1e2_0_0,100000000,largest two v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.502,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588624,groupby,G1_1e8_1e2_0_0,100000000,regression v1 v2 by id2 id4,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.212,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588624,groupby,G1_1e8_1e2_0_0,100000000,regression v1 v2 by id2 id4,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.206,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588626,groupby,G1_1e8_1e2_0_0,100000000,sum v3 count by id1:id6,NA,NA,clickhouse,23.10.2.13,NA,NA,1,2.007,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588628,groupby,G1_1e8_1e2_0_0,100000000,sum v3 count by id1:id6,NA,NA,clickhouse,23.10.2.13,NA,NA,2,1.756,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588800,groupby,G1_1e8_1e1_0_0,100000000,sum v1 by id1,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.044,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588800,groupby,G1_1e8_1e1_0_0,100000000,sum v1 by id1,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.037,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588801,groupby,G1_1e8_1e1_0_0,100000000,sum v1 by id1:id2,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.414,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588801,groupby,G1_1e8_1e1_0_0,100000000,sum v1 by id1:id2,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.176,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588802,groupby,G1_1e8_1e1_0_0,100000000,sum v1 mean v3 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.94,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588803,groupby,G1_1e8_1e1_0_0,100000000,sum v1 mean v3 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.758,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588803,groupby,G1_1e8_1e1_0_0,100000000,mean v1:v3 by id4,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.072,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588803,groupby,G1_1e8_1e1_0_0,100000000,mean v1:v3 by id4,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.074,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588804,groupby,G1_1e8_1e1_0_0,100000000,sum v1:v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.574,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588805,groupby,G1_1e8_1e1_0_0,100000000,sum v1:v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.533,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588805,groupby,G1_1e8_1e1_0_0,100000000,median v3 sd v3 by id4 id5,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.157,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588806,groupby,G1_1e8_1e1_0_0,100000000,median v3 sd v3 by id4 id5,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.129,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588806,groupby,G1_1e8_1e1_0_0,100000000,max v1 - min v2 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.719,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588807,groupby,G1_1e8_1e1_0_0,100000000,max v1 - min v2 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.691,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588808,groupby,G1_1e8_1e1_0_0,100000000,largest two v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.816,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588809,groupby,G1_1e8_1e1_0_0,100000000,largest two v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.73,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588809,groupby,G1_1e8_1e1_0_0,100000000,regression v1 v2 by id2 id4,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.118,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588810,groupby,G1_1e8_1e1_0_0,100000000,regression v1 v2 by id2 id4,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.107,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588812,groupby,G1_1e8_1e1_0_0,100000000,sum v3 count by id1:id6,NA,NA,clickhouse,23.10.2.13,NA,NA,1,1.837,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588814,groupby,G1_1e8_1e1_0_0,100000000,sum v3 count by id1:id6,NA,NA,clickhouse,23.10.2.13,NA,NA,2,1.796,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588985,groupby,G1_1e8_2e0_0_0,100000000,sum v1 by id1,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.044,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588985,groupby,G1_1e8_2e0_0_0,100000000,sum v1 by id1,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.037,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588985,groupby,G1_1e8_2e0_0_0,100000000,sum v1 by id1:id2,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.436,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588986,groupby,G1_1e8_2e0_0_0,100000000,sum v1 by id1:id2,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.159,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588987,groupby,G1_1e8_2e0_0_0,100000000,sum v1 mean v3 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,1,1.068,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588988,groupby,G1_1e8_2e0_0_0,100000000,sum v1 mean v3 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.866,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588988,groupby,G1_1e8_2e0_0_0,100000000,mean v1:v3 by id4,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.078,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588988,groupby,G1_1e8_2e0_0_0,100000000,mean v1:v3 by id4,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.071,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588989,groupby,G1_1e8_2e0_0_0,100000000,sum v1:v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.723,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588990,groupby,G1_1e8_2e0_0_0,100000000,sum v1:v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.605,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588991,groupby,G1_1e8_2e0_0_0,100000000,median v3 sd v3 by id4 id5,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.435,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588991,groupby,G1_1e8_2e0_0_0,100000000,median v3 sd v3 by id4 id5,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.427,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588992,groupby,G1_1e8_2e0_0_0,100000000,max v1 - min v2 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.876,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588993,groupby,G1_1e8_2e0_0_0,100000000,max v1 - min v2 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.822,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588994,groupby,G1_1e8_2e0_0_0,100000000,largest two v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.895,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588995,groupby,G1_1e8_2e0_0_0,100000000,largest two v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.777,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588996,groupby,G1_1e8_2e0_0_0,100000000,regression v1 v2 by id2 id4,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.432,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588996,groupby,G1_1e8_2e0_0_0,100000000,regression v1 v2 by id2 id4,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.342,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700588998,groupby,G1_1e8_2e0_0_0,100000000,sum v3 count by id1:id6,NA,NA,clickhouse,23.10.2.13,NA,NA,1,1.864,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589000,groupby,G1_1e8_2e0_0_0,100000000,sum v3 count by id1:id6,NA,NA,clickhouse,23.10.2.13,NA,NA,2,1.729,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589172,groupby,G1_1e8_1e2_0_1,100000000,sum v1 by id1,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.047,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589173,groupby,G1_1e8_1e2_0_1,100000000,sum v1 by id1,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.036,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589173,groupby,G1_1e8_1e2_0_1,100000000,sum v1 by id1:id2,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.268,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589173,groupby,G1_1e8_1e2_0_1,100000000,sum v1 by id1:id2,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.154,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589174,groupby,G1_1e8_1e2_0_1,100000000,sum v1 mean v3 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.582,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589175,groupby,G1_1e8_1e2_0_1,100000000,sum v1 mean v3 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.496,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589175,groupby,G1_1e8_1e2_0_1,100000000,mean v1:v3 by id4,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.078,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589175,groupby,G1_1e8_1e2_0_1,100000000,mean v1:v3 by id4,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.079,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589176,groupby,G1_1e8_1e2_0_1,100000000,sum v1:v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.422,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589176,groupby,G1_1e8_1e2_0_1,100000000,sum v1:v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.349,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589177,groupby,G1_1e8_1e2_0_1,100000000,median v3 sd v3 by id4 id5,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.239,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589177,groupby,G1_1e8_1e2_0_1,100000000,median v3 sd v3 by id4 id5,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.225,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589178,groupby,G1_1e8_1e2_0_1,100000000,max v1 - min v2 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.423,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589178,groupby,G1_1e8_1e2_0_1,100000000,max v1 - min v2 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.411,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589179,groupby,G1_1e8_1e2_0_1,100000000,largest two v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.617,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589180,groupby,G1_1e8_1e2_0_1,100000000,largest two v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.493,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589180,groupby,G1_1e8_1e2_0_1,100000000,regression v1 v2 by id2 id4,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.12,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589180,groupby,G1_1e8_1e2_0_1,100000000,regression v1 v2 by id2 id4,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.123,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589182,groupby,G1_1e8_1e2_0_1,100000000,sum v3 count by id1:id6,NA,NA,clickhouse,23.10.2.13,NA,NA,1,1.99,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589184,groupby,G1_1e8_1e2_0_1,100000000,sum v3 count by id1:id6,NA,NA,clickhouse,23.10.2.13,NA,NA,2,1.75,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589356,groupby,G1_1e8_1e2_5_0,100000000,sum v1 by id1,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.063,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589356,groupby,G1_1e8_1e2_5_0,100000000,sum v1 by id1,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.046,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589357,groupby,G1_1e8_1e2_5_0,100000000,sum v1 by id1:id2,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.244,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589357,groupby,G1_1e8_1e2_5_0,100000000,sum v1 by id1:id2,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.233,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589358,groupby,G1_1e8_1e2_5_0,100000000,sum v1 mean v3 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.619,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589358,groupby,G1_1e8_1e2_5_0,100000000,sum v1 mean v3 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.535,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589359,groupby,G1_1e8_1e2_5_0,100000000,mean v1:v3 by id4,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.099,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589359,groupby,G1_1e8_1e2_5_0,100000000,mean v1:v3 by id4,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.093,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589360,groupby,G1_1e8_1e2_5_0,100000000,sum v1:v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.515,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589360,groupby,G1_1e8_1e2_5_0,100000000,sum v1:v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.426,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589361,groupby,G1_1e8_1e2_5_0,100000000,median v3 sd v3 by id4 id5,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.263,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589361,groupby,G1_1e8_1e2_5_0,100000000,median v3 sd v3 by id4 id5,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.25,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589362,groupby,G1_1e8_1e2_5_0,100000000,max v1 - min v2 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.498,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589362,groupby,G1_1e8_1e2_5_0,100000000,max v1 - min v2 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.498,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589364,groupby,G1_1e8_1e2_5_0,100000000,largest two v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,1,1.472,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589366,groupby,G1_1e8_1e2_5_0,100000000,largest two v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,2,1.389,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589366,groupby,G1_1e8_1e2_5_0,100000000,regression v1 v2 by id2 id4,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.242,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589366,groupby,G1_1e8_1e2_5_0,100000000,regression v1 v2 by id2 id4,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.221,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589369,groupby,G1_1e8_1e2_5_0,100000000,sum v3 count by id1:id6,NA,NA,clickhouse,23.10.2.13,NA,NA,1,2.214,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589371,groupby,G1_1e8_1e2_5_0,100000000,sum v3 count by id1:id6,NA,NA,clickhouse,23.10.2.13,NA,NA,2,1.856,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589668,groupby,G1_1e9_1e2_0_0,1000000000,sum v1 by id1,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.249,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589668,groupby,G1_1e9_1e2_0_0,1000000000,sum v1 by id1,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.207,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589670,groupby,G1_1e9_1e2_0_0,1000000000,sum v1 by id1:id2,NA,NA,clickhouse,23.10.2.13,NA,NA,1,1.531,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589672,groupby,G1_1e9_1e2_0_0,1000000000,sum v1 by id1:id2,NA,NA,clickhouse,23.10.2.13,NA,NA,2,1.477,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589678,groupby,G1_1e9_1e2_0_0,1000000000,sum v1 mean v3 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,1,6.656,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589684,groupby,G1_1e9_1e2_0_0,1000000000,sum v1 mean v3 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,2,5.809,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589685,groupby,G1_1e9_1e2_0_0,1000000000,mean v1:v3 by id4,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.476,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589686,groupby,G1_1e9_1e2_0_0,1000000000,mean v1:v3 by id4,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.466,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589691,groupby,G1_1e9_1e2_0_0,1000000000,sum v1:v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,1,4.835,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589696,groupby,G1_1e9_1e2_0_0,1000000000,sum v1:v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,2,4.791,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589698,groupby,G1_1e9_1e2_0_0,1000000000,median v3 sd v3 by id4 id5,NA,NA,clickhouse,23.10.2.13,NA,NA,1,1.78,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589700,groupby,G1_1e9_1e2_0_0,1000000000,median v3 sd v3 by id4 id5,NA,NA,clickhouse,23.10.2.13,NA,NA,2,1.764,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589706,groupby,G1_1e9_1e2_0_0,1000000000,max v1 - min v2 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,1,5.686,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589711,groupby,G1_1e9_1e2_0_0,1000000000,max v1 - min v2 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,2,5.215,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589718,groupby,G1_1e9_1e2_0_0,1000000000,largest two v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,1,6.287,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589724,groupby,G1_1e9_1e2_0_0,1000000000,largest two v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,2,6.464,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589727,groupby,G1_1e9_1e2_0_0,1000000000,regression v1 v2 by id2 id4,NA,NA,clickhouse,23.10.2.13,NA,NA,1,2.249,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589729,groupby,G1_1e9_1e2_0_0,1000000000,regression v1 v2 by id2 id4,NA,NA,clickhouse,23.10.2.13,NA,NA,2,2.154,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589755,groupby,G1_1e9_1e2_0_0,1000000000,sum v3 count by id1:id6,NA,NA,clickhouse,23.10.2.13,NA,NA,1,25.58,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700589782,groupby,G1_1e9_1e2_0_0,1000000000,sum v3 count by id1:id6,NA,NA,clickhouse,23.10.2.13,NA,NA,2,26.975,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700590084,groupby,G1_1e9_1e1_0_0,1000000000,sum v1 by id1,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.271,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700590084,groupby,G1_1e9_1e1_0_0,1000000000,sum v1 by id1,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.214,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700590088,groupby,G1_1e9_1e1_0_0,1000000000,sum v1 by id1:id2,NA,NA,clickhouse,23.10.2.13,NA,NA,1,3.828,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700590089,groupby,G1_1e9_1e1_0_0,1000000000,sum v1 by id1:id2,NA,NA,clickhouse,23.10.2.13,NA,NA,2,1.28,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700590101,groupby,G1_1e9_1e1_0_0,1000000000,sum v1 mean v3 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,1,10.896,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700590112,groupby,G1_1e9_1e1_0_0,1000000000,sum v1 mean v3 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,2,10.58,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700590113,groupby,G1_1e9_1e1_0_0,1000000000,mean v1:v3 by id4,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.9,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700590113,groupby,G1_1e9_1e1_0_0,1000000000,mean v1:v3 by id4,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.562,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700590122,groupby,G1_1e9_1e1_0_0,1000000000,sum v1:v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,1,8.199,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700590130,groupby,G1_1e9_1e1_0_0,1000000000,sum v1:v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,2,7.628,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700590131,groupby,G1_1e9_1e1_0_0,1000000000,median v3 sd v3 by id4 id5,NA,NA,clickhouse,23.10.2.13,NA,NA,1,1.328,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700590133,groupby,G1_1e9_1e1_0_0,1000000000,median v3 sd v3 by id4 id5,NA,NA,clickhouse,23.10.2.13,NA,NA,2,1.325,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700590142,groupby,G1_1e9_1e1_0_0,1000000000,max v1 - min v2 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,1,9.059,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700590152,groupby,G1_1e9_1e1_0_0,1000000000,max v1 - min v2 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,2,9.269,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700590162,groupby,G1_1e9_1e1_0_0,1000000000,largest two v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,1,9.504,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700590171,groupby,G1_1e9_1e1_0_0,1000000000,largest two v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,2,9.583,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700590173,groupby,G1_1e9_1e1_0_0,1000000000,regression v1 v2 by id2 id4,NA,NA,clickhouse,23.10.2.13,NA,NA,1,1.008,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700590174,groupby,G1_1e9_1e1_0_0,1000000000,regression v1 v2 by id2 id4,NA,NA,clickhouse,23.10.2.13,NA,NA,2,1.203,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700590200,groupby,G1_1e9_1e1_0_0,1000000000,sum v3 count by id1:id6,NA,NA,clickhouse,23.10.2.13,NA,NA,1,25.456,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700590227,groupby,G1_1e9_1e1_0_0,1000000000,sum v3 count by id1:id6,NA,NA,clickhouse,23.10.2.13,NA,NA,2,26.396,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700590534,groupby,G1_1e9_2e0_0_0,1000000000,sum v1 by id1,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.27,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700590535,groupby,G1_1e9_2e0_0_0,1000000000,sum v1 by id1,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.239,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700590538,groupby,G1_1e9_2e0_0_0,1000000000,sum v1 by id1:id2,NA,NA,clickhouse,23.10.2.13,NA,NA,1,2.984,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700590539,groupby,G1_1e9_2e0_0_0,1000000000,sum v1 by id1:id2,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.958,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700590551,groupby,G1_1e9_2e0_0_0,1000000000,sum v1 mean v3 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,1,11.987,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700590563,groupby,G1_1e9_2e0_0_0,1000000000,sum v1 mean v3 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,2,10.964,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700590564,groupby,G1_1e9_2e0_0_0,1000000000,mean v1:v3 by id4,NA,NA,clickhouse,23.10.2.13,NA,NA,1,1.048,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700590565,groupby,G1_1e9_2e0_0_0,1000000000,mean v1:v3 by id4,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.617,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700590574,groupby,G1_1e9_2e0_0_0,1000000000,sum v1:v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,1,8.989,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700590583,groupby,G1_1e9_2e0_0_0,1000000000,sum v1:v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,2,8.933,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700590588,groupby,G1_1e9_2e0_0_0,1000000000,median v3 sd v3 by id4 id5,NA,NA,clickhouse,23.10.2.13,NA,NA,1,4.055,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700590592,groupby,G1_1e9_2e0_0_0,1000000000,median v3 sd v3 by id4 id5,NA,NA,clickhouse,23.10.2.13,NA,NA,2,4.249,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700590605,groupby,G1_1e9_2e0_0_0,1000000000,max v1 - min v2 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,1,12.34,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700590616,groupby,G1_1e9_2e0_0_0,1000000000,max v1 - min v2 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,2,10.773,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700590626,groupby,G1_1e9_2e0_0_0,1000000000,largest two v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,1,10.259,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700590639,groupby,G1_1e9_2e0_0_0,1000000000,largest two v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,2,12.639,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700590643,groupby,G1_1e9_2e0_0_0,1000000000,regression v1 v2 by id2 id4,NA,NA,clickhouse,23.10.2.13,NA,NA,1,3.298,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700590645,groupby,G1_1e9_2e0_0_0,1000000000,regression v1 v2 by id2 id4,NA,NA,clickhouse,23.10.2.13,NA,NA,2,2.403,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700590677,groupby,G1_1e9_2e0_0_0,1000000000,sum v3 count by id1:id6,NA,NA,clickhouse,23.10.2.13,NA,NA,1,25.919,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700590707,groupby,G1_1e9_2e0_0_0,1000000000,sum v3 count by id1:id6,NA,NA,clickhouse,23.10.2.13,NA,NA,2,25.457,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700591024,groupby,G1_1e9_1e2_0_1,1000000000,sum v1 by id1,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.231,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700591024,groupby,G1_1e9_1e2_0_1,1000000000,sum v1 by id1,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.199,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700591026,groupby,G1_1e9_1e2_0_1,1000000000,sum v1 by id1:id2,NA,NA,clickhouse,23.10.2.13,NA,NA,1,1.328,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700591027,groupby,G1_1e9_1e2_0_1,1000000000,sum v1 by id1:id2,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.871,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700591033,groupby,G1_1e9_1e2_0_1,1000000000,sum v1 mean v3 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,1,6.136,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700591039,groupby,G1_1e9_1e2_0_1,1000000000,sum v1 mean v3 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,2,5.838,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700591040,groupby,G1_1e9_1e2_0_1,1000000000,mean v1:v3 by id4,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.51,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700591040,groupby,G1_1e9_1e2_0_1,1000000000,mean v1:v3 by id4,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.507,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700591046,groupby,G1_1e9_1e2_0_1,1000000000,sum v1:v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,1,4.963,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700591051,groupby,G1_1e9_1e2_0_1,1000000000,sum v1:v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,2,4.982,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700591053,groupby,G1_1e9_1e2_0_1,1000000000,median v3 sd v3 by id4 id5,NA,NA,clickhouse,23.10.2.13,NA,NA,1,1.795,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700591055,groupby,G1_1e9_1e2_0_1,1000000000,median v3 sd v3 by id4 id5,NA,NA,clickhouse,23.10.2.13,NA,NA,2,1.764,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700591061,groupby,G1_1e9_1e2_0_1,1000000000,max v1 - min v2 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,1,5.633,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700591066,groupby,G1_1e9_1e2_0_1,1000000000,max v1 - min v2 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,2,5.392,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700591074,groupby,G1_1e9_1e2_0_1,1000000000,largest two v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,1,7.846,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700591081,groupby,G1_1e9_1e2_0_1,1000000000,largest two v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,2,7.058,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700591083,groupby,G1_1e9_1e2_0_1,1000000000,regression v1 v2 by id2 id4,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.942,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700591084,groupby,G1_1e9_1e2_0_1,1000000000,regression v1 v2 by id2 id4,NA,NA,clickhouse,23.10.2.13,NA,NA,2,1.095,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700591112,groupby,G1_1e9_1e2_0_1,1000000000,sum v3 count by id1:id6,NA,NA,clickhouse,23.10.2.13,NA,NA,1,27.654,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700591137,groupby,G1_1e9_1e2_0_1,1000000000,sum v3 count by id1:id6,NA,NA,clickhouse,23.10.2.13,NA,NA,2,24.775,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700591452,groupby,G1_1e9_1e2_5_0,1000000000,sum v1 by id1,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.376,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700591452,groupby,G1_1e9_1e2_5_0,1000000000,sum v1 by id1,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.314,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700591454,groupby,G1_1e9_1e2_5_0,1000000000,sum v1 by id1:id2,NA,NA,clickhouse,23.10.2.13,NA,NA,1,1.92,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700591456,groupby,G1_1e9_1e2_5_0,1000000000,sum v1 by id1:id2,NA,NA,clickhouse,23.10.2.13,NA,NA,2,1.781,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700591464,groupby,G1_1e9_1e2_5_0,1000000000,sum v1 mean v3 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,1,7.65,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700591471,groupby,G1_1e9_1e2_5_0,1000000000,sum v1 mean v3 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,2,7.184,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700591472,groupby,G1_1e9_1e2_5_0,1000000000,mean v1:v3 by id4,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.733,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700591473,groupby,G1_1e9_1e2_5_0,1000000000,mean v1:v3 by id4,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.743,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700591480,groupby,G1_1e9_1e2_5_0,1000000000,sum v1:v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,1,6.687,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700591487,groupby,G1_1e9_1e2_5_0,1000000000,sum v1:v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,2,6.623,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700591490,groupby,G1_1e9_1e2_5_0,1000000000,median v3 sd v3 by id4 id5,NA,NA,clickhouse,23.10.2.13,NA,NA,1,2.498,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700591492,groupby,G1_1e9_1e2_5_0,1000000000,median v3 sd v3 by id4 id5,NA,NA,clickhouse,23.10.2.13,NA,NA,2,2.407,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700591499,groupby,G1_1e9_1e2_5_0,1000000000,max v1 - min v2 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,1,6.945,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700591507,groupby,G1_1e9_1e2_5_0,1000000000,max v1 - min v2 by id3,NA,NA,clickhouse,23.10.2.13,NA,NA,2,6.842,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700591528,groupby,G1_1e9_1e2_5_0,1000000000,largest two v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,1,21.657,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700591550,groupby,G1_1e9_1e2_5_0,1000000000,largest two v3 by id6,NA,NA,clickhouse,23.10.2.13,NA,NA,2,21.131,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700591553,groupby,G1_1e9_1e2_5_0,1000000000,regression v1 v2 by id2 id4,NA,NA,clickhouse,23.10.2.13,NA,NA,1,2.604,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700591556,groupby,G1_1e9_1e2_5_0,1000000000,regression v1 v2 by id2 id4,NA,NA,clickhouse,23.10.2.13,NA,NA,2,2.558,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700591589,groupby,G1_1e9_1e2_5_0,1000000000,sum v3 count by id1:id6,NA,NA,clickhouse,23.10.2.13,NA,NA,1,33.009,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700587790,1700591617,groupby,G1_1e9_1e2_5_0,1000000000,sum v3 count by id1:id6,NA,NA,clickhouse,23.10.2.13,NA,NA,2,27.094,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700600142,join,J1_1e7_NA_0_0,10000000,small inner on int,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.143,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700600142,join,J1_1e7_NA_0_0,10000000,small inner on int,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.1,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700600143,join,J1_1e7_NA_0_0,10000000,medium inner on int,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.144,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700600143,join,J1_1e7_NA_0_0,10000000,medium inner on int,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.143,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700600143,join,J1_1e7_NA_0_0,10000000,medium outer on int,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.142,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700600144,join,J1_1e7_NA_0_0,10000000,medium outer on int,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.128,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700600144,join,J1_1e7_NA_0_0,10000000,medium inner on factor,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.099,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700600144,join,J1_1e7_NA_0_0,10000000,medium inner on factor,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.097,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700600146,join,J1_1e7_NA_0_0,10000000,big inner on int,NA,NA,clickhouse,23.10.2.13,NA,NA,1,1.639,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700600148,join,J1_1e7_NA_0_0,10000000,big inner on int,NA,NA,clickhouse,23.10.2.13,NA,NA,2,1.505,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700600309,join,J1_1e7_NA_5_0,10000000,small inner on int,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.147,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700600310,join,J1_1e7_NA_5_0,10000000,small inner on int,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.116,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700600310,join,J1_1e7_NA_5_0,10000000,medium inner on int,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.169,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700600310,join,J1_1e7_NA_5_0,10000000,medium inner on int,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.161,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700600311,join,J1_1e7_NA_5_0,10000000,medium outer on int,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.169,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700600311,join,J1_1e7_NA_5_0,10000000,medium outer on int,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.178,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700600311,join,J1_1e7_NA_5_0,10000000,medium inner on factor,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.129,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700600311,join,J1_1e7_NA_5_0,10000000,medium inner on factor,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.122,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700600313,join,J1_1e7_NA_5_0,10000000,big inner on int,NA,NA,clickhouse,23.10.2.13,NA,NA,1,1.74,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700600315,join,J1_1e7_NA_5_0,10000000,big inner on int,NA,NA,clickhouse,23.10.2.13,NA,NA,2,1.726,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700600476,join,J1_1e7_NA_0_1,10000000,small inner on int,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.087,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700600476,join,J1_1e7_NA_0_1,10000000,small inner on int,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.069,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700600477,join,J1_1e7_NA_0_1,10000000,medium inner on int,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.08,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700600477,join,J1_1e7_NA_0_1,10000000,medium inner on int,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.076,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700600477,join,J1_1e7_NA_0_1,10000000,medium outer on int,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.078,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700600477,join,J1_1e7_NA_0_1,10000000,medium outer on int,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.079,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700600478,join,J1_1e7_NA_0_1,10000000,medium inner on factor,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.069,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700600478,join,J1_1e7_NA_0_1,10000000,medium inner on factor,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.076,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700600479,join,J1_1e7_NA_0_1,10000000,big inner on int,NA,NA,clickhouse,23.10.2.13,NA,NA,1,1.499,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700600481,join,J1_1e7_NA_0_1,10000000,big inner on int,NA,NA,clickhouse,23.10.2.13,NA,NA,2,1.55,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700600716,join,J1_1e8_NA_0_0,100000000,small inner on int,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.94,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700600717,join,J1_1e8_NA_0_0,100000000,small inner on int,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.716,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700600718,join,J1_1e8_NA_0_0,100000000,medium inner on int,NA,NA,clickhouse,23.10.2.13,NA,NA,1,1.302,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700600720,join,J1_1e8_NA_0_0,100000000,medium inner on int,NA,NA,clickhouse,23.10.2.13,NA,NA,2,1.251,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700600721,join,J1_1e8_NA_0_0,100000000,medium outer on int,NA,NA,clickhouse,23.10.2.13,NA,NA,1,1.307,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700600723,join,J1_1e8_NA_0_0,100000000,medium outer on int,NA,NA,clickhouse,23.10.2.13,NA,NA,2,1.28,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700600724,join,J1_1e8_NA_0_0,100000000,medium inner on factor,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.868,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700600725,join,J1_1e8_NA_0_0,100000000,medium inner on factor,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.861,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700600741,join,J1_1e8_NA_0_0,100000000,big inner on int,NA,NA,clickhouse,23.10.2.13,NA,NA,1,16.369,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700600758,join,J1_1e8_NA_0_0,100000000,big inner on int,NA,NA,clickhouse,23.10.2.13,NA,NA,2,16.531,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700600991,join,J1_1e8_NA_5_0,100000000,small inner on int,NA,NA,clickhouse,23.10.2.13,NA,NA,1,1.489,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700600993,join,J1_1e8_NA_5_0,100000000,small inner on int,NA,NA,clickhouse,23.10.2.13,NA,NA,2,1.294,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700600995,join,J1_1e8_NA_5_0,100000000,medium inner on int,NA,NA,clickhouse,23.10.2.13,NA,NA,1,2.451,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700600998,join,J1_1e8_NA_5_0,100000000,medium inner on int,NA,NA,clickhouse,23.10.2.13,NA,NA,2,2.39,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700601000,join,J1_1e8_NA_5_0,100000000,medium outer on int,NA,NA,clickhouse,23.10.2.13,NA,NA,1,2.206,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700601003,join,J1_1e8_NA_5_0,100000000,medium outer on int,NA,NA,clickhouse,23.10.2.13,NA,NA,2,2.234,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700601004,join,J1_1e8_NA_5_0,100000000,medium inner on factor,NA,NA,clickhouse,23.10.2.13,NA,NA,1,1.488,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700601006,join,J1_1e8_NA_5_0,100000000,medium inner on factor,NA,NA,clickhouse,23.10.2.13,NA,NA,2,1.469,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700601026,join,J1_1e8_NA_5_0,100000000,big inner on int,NA,NA,clickhouse,23.10.2.13,NA,NA,1,19.534,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700601044,join,J1_1e8_NA_5_0,100000000,big inner on int,NA,NA,clickhouse,23.10.2.13,NA,NA,2,18.231,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700601268,join,J1_1e8_NA_0_1,100000000,small inner on int,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.614,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700601268,join,J1_1e8_NA_0_1,100000000,small inner on int,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.473,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700601269,join,J1_1e8_NA_0_1,100000000,medium inner on int,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.627,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700601270,join,J1_1e8_NA_0_1,100000000,medium inner on int,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.607,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700601271,join,J1_1e8_NA_0_1,100000000,medium outer on int,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.669,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700601271,join,J1_1e8_NA_0_1,100000000,medium outer on int,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.662,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700601272,join,J1_1e8_NA_0_1,100000000,medium inner on factor,NA,NA,clickhouse,23.10.2.13,NA,NA,1,0.556,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700601273,join,J1_1e8_NA_0_1,100000000,medium inner on factor,NA,NA,clickhouse,23.10.2.13,NA,NA,2,0.55,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700601288,join,J1_1e8_NA_0_1,100000000,big inner on int,NA,NA,clickhouse,23.10.2.13,NA,NA,1,15.076,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700601304,join,J1_1e8_NA_0_1,100000000,big inner on int,NA,NA,clickhouse,23.10.2.13,NA,NA,2,15.702,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700602758,join,J1_1e9_NA_0_0,1000000000,small inner on int,NA,NA,clickhouse,23.10.2.13,NA,NA,1,19.672,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700602796,join,J1_1e9_NA_0_0,1000000000,small inner on int,NA,NA,clickhouse,23.10.2.13,NA,NA,2,37.174,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700602851,join,J1_1e9_NA_0_0,1000000000,medium inner on int,NA,NA,clickhouse,23.10.2.13,NA,NA,1,54.789,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700602905,join,J1_1e9_NA_0_0,1000000000,medium inner on int,NA,NA,clickhouse,23.10.2.13,NA,NA,2,53.46,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700602964,join,J1_1e9_NA_0_0,1000000000,medium outer on int,NA,NA,clickhouse,23.10.2.13,NA,NA,1,57.758,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700603023,join,J1_1e9_NA_0_0,1000000000,medium outer on int,NA,NA,clickhouse,23.10.2.13,NA,NA,2,58.079,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700603067,join,J1_1e9_NA_0_0,1000000000,medium inner on factor,NA,NA,clickhouse,23.10.2.13,NA,NA,1,43.766,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700603113,join,J1_1e9_NA_0_0,1000000000,medium inner on factor,NA,NA,clickhouse,23.10.2.13,NA,NA,2,45.225,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700603356,join,J1_1e9_NA_0_0,1000000000,big inner on int,NA,NA,clickhouse,23.10.2.13,NA,NA,1,235.105,NA,TRUE,NA,NA,NA,TRUE +ip-172-31-38-198,1700600133,1700603631,join,J1_1e9_NA_0_0,1000000000,big inner on int,NA,NA,clickhouse,23.10.2.13,NA,NA,2,266.59,NA,TRUE,NA,NA,NA,TRUE From 3aeb207aaac86e8294ab8645348a421ec772cbfa Mon Sep 17 00:00:00 2001 From: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> Date: Wed, 22 Nov 2023 15:15:41 +0000 Subject: [PATCH 3/9] address comment --- clickhouse/exec.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clickhouse/exec.sh b/clickhouse/exec.sh index cc411dd8..1ef76699 100755 --- a/clickhouse/exec.sh +++ b/clickhouse/exec.sh @@ -38,13 +38,13 @@ if [ $1 == 'groupby' ]; then clickhouse-client --query "DROP TABLE IF EXISTS $SRC_DATANAME" if [ $HAS_NULL -eq 1 ]; then if [ $IS_SORTED -eq 1 ]; then - clickhouse-client --query "CREATE TABLE $SRC_DATANAME (id1 LowCardinality(Nullable(String)), id2 LowCardinality(Nullable(String)), id3 Nullable(String), id4 Nullable(Int32), id5 Nullable(Int32), id6 Nullable(Int32), v1 Nullable(Int32), v2 Nullable(Int32), v3 Nullable(Float64)) ENGINE = MergeTree() ORDER BY (id1,id2,id3,id4,id5,id6,v1,v2,v3);" + clickhouse-client --query "CREATE TABLE $SRC_DATANAME (id1 LowCardinality(Nullable(String)), id2 LowCardinality(Nullable(String)), id3 Nullable(String), id4 Nullable(Int32), id5 Nullable(Int32), id6 Nullable(Int32), v1 Nullable(Int32), v2 Nullable(Int32), v3 Nullable(Float64)) ENGINE = MergeTree() ORDER BY (id1,id2,id3,id4,id5,id6);" else clickhouse-client --query "CREATE TABLE $SRC_DATANAME (id1 LowCardinality(Nullable(String)), id2 LowCardinality(Nullable(String)), id3 Nullable(String), id4 Nullable(Int32), id5 Nullable(Int32), id6 Nullable(Int32), v1 Nullable(Int32), v2 Nullable(Int32), v3 Nullable(Float64)) ENGINE = MergeTree() ORDER BY tuple();" fi else if [ $IS_SORTED -eq 1 ]; then - clickhouse-client --query "CREATE TABLE $SRC_DATANAME (id1 LowCardinality(String), id2 LowCardinality(String), id3 String, id4 Int32, id5 Int32, id6 Int32, v1 Int32, v2 Int32, v3 Float64) ENGINE = MergeTree() ORDER BY (id1,id2,id3,id4,id5,id6,v1,v2,v3);" + clickhouse-client --query "CREATE TABLE $SRC_DATANAME (id1 LowCardinality(String), id2 LowCardinality(String), id3 String, id4 Int32, id5 Int32, id6 Int32, v1 Int32, v2 Int32, v3 Float64) ENGINE = MergeTree() ORDER BY (id1,id2,id3,id4,id5,id6);" else clickhouse-client --query "CREATE TABLE $SRC_DATANAME (id1 LowCardinality(String), id2 LowCardinality(String), id3 String, id4 Int32, id5 Int32, id6 Int32, v1 Int32, v2 Int32, v3 Float64) ENGINE = MergeTree() ORDER BY tuple();" fi From d5e71b63513848c1520c3ff126b91c29e6b57a36 Mon Sep 17 00:00:00 2001 From: Tmonster Date: Thu, 30 Nov 2023 16:14:56 +0100 Subject: [PATCH 4/9] add clickhouse to regression script --- .github/workflows/regression.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/regression.yml b/.github/workflows/regression.yml index 53a7684e..f4ecf306 100644 --- a/.github/workflows/regression.yml +++ b/.github/workflows/regression.yml @@ -17,7 +17,7 @@ jobs: strategy: fail-fast: false matrix: - solution: [data.table, collapse, dplyr, pandas, pydatatable, spark, juliadf, juliads, polars, arrow, duckdb, duckdb-latest, datafusion] + solution: [data.table, collapse, dplyr, pandas, pydatatable, spark, juliadf, juliads, polars, arrow, duckdb, duckdb-latest, datafusion, clickhouse] name: Regression Tests solo solutions runs-on: ubuntu-20.04 env: From b9e399686ca105cdadd61f840681645cc58f0b8f Mon Sep 17 00:00:00 2001 From: Tmonster Date: Thu, 30 Nov 2023 16:24:01 +0100 Subject: [PATCH 5/9] actually install clickhouse for regression test --- _utils/install_all_solutions.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/_utils/install_all_solutions.py b/_utils/install_all_solutions.py index 58bd847b..f16aff95 100755 --- a/_utils/install_all_solutions.py +++ b/_utils/install_all_solutions.py @@ -26,8 +26,6 @@ def install_all_solutions(): with open(SOLUTIONS_FILENAME, newline="") as solutions_file: solutions = csv.DictReader(solutions_file, delimiter=',') for row in solutions: - if row['solution'] == "clickhouse": - continue elif row['solution'] == "data.table": install_solutions.add("datatable") else: From 8d5e27459ba4db30c22f7d64809d2f2822e53ba0 Mon Sep 17 00:00:00 2001 From: Tmonster Date: Thu, 30 Nov 2023 16:25:09 +0100 Subject: [PATCH 6/9] remove clickhouse from skipped solutions --- _utils/prep_solutions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_utils/prep_solutions.py b/_utils/prep_solutions.py index 98f4ddfc..41637d62 100755 --- a/_utils/prep_solutions.py +++ b/_utils/prep_solutions.py @@ -5,7 +5,7 @@ SOLUTIONS_FILENAME = "_control/solutions.csv" RUN_CONF_FILENAME = "run.conf" -SKIPPED_SOLUTIONS = ["clickhouse"] +SKIPPED_SOLUTIONS = [] def print_usage(): From 23c7f8dbd0800dc8d312aa4a1305a9878ed26aa0 Mon Sep 17 00:00:00 2001 From: Tmonster Date: Thu, 30 Nov 2023 16:25:57 +0100 Subject: [PATCH 7/9] add tmate solution --- .github/workflows/regression.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/regression.yml b/.github/workflows/regression.yml index f4ecf306..1090e86b 100644 --- a/.github/workflows/regression.yml +++ b/.github/workflows/regression.yml @@ -54,6 +54,9 @@ jobs: shell: bash run: sudo swapoff -a + - name: Setup tmate session + uses: mxschmitt/action-tmate@v3 + - name: Run mini GroupBy benchmark shell: bash run: | From 91831f7fb5c7dad7c7174a6268b0476b491bf15f Mon Sep 17 00:00:00 2001 From: Tmonster Date: Thu, 30 Nov 2023 16:31:13 +0100 Subject: [PATCH 8/9] fix syntax mistake --- _utils/install_all_solutions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_utils/install_all_solutions.py b/_utils/install_all_solutions.py index f16aff95..c540e4ff 100755 --- a/_utils/install_all_solutions.py +++ b/_utils/install_all_solutions.py @@ -26,7 +26,7 @@ def install_all_solutions(): with open(SOLUTIONS_FILENAME, newline="") as solutions_file: solutions = csv.DictReader(solutions_file, delimiter=',') for row in solutions: - elif row['solution'] == "data.table": + if row['solution'] == "data.table": install_solutions.add("datatable") else: install_solutions.add(row['solution']) From e564e5102d167036f19654de3da07391458a6e8c Mon Sep 17 00:00:00 2001 From: Tmonster Date: Thu, 30 Nov 2023 16:45:30 +0100 Subject: [PATCH 9/9] remove clickhouse skip again --- _utils/install_all_solutions.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/_utils/install_all_solutions.py b/_utils/install_all_solutions.py index c540e4ff..ec2dfd40 100755 --- a/_utils/install_all_solutions.py +++ b/_utils/install_all_solutions.py @@ -42,9 +42,7 @@ def install_all_solutions(): if solution.strip() == "all": install_all_solutions() else: - if solution == "clickhouse": - continue - elif solution == "data.table": + if solution == "data.table": install_solution("datatable") else: install_solution(solution)