diff --git a/.github/workflows/regression.yml b/.github/workflows/regression.yml index 53a7684e..88265087 100644 --- a/.github/workflows/regression.yml +++ b/.github/workflows/regression.yml @@ -17,7 +17,7 @@ jobs: strategy: fail-fast: false matrix: - solution: [data.table, collapse, dplyr, pandas, pydatatable, spark, juliadf, juliads, polars, arrow, duckdb, duckdb-latest, datafusion] + solution: [data.table, collapse, dplyr, pandas, pydatatable, spark, juliadf, juliads, polars, R-arrow, duckdb, duckdb-latest, datafusion, dask, clickhouse] name: Regression Tests solo solutions runs-on: ubuntu-20.04 env: @@ -54,6 +54,18 @@ jobs: shell: bash run: sudo swapoff -a + + # needed because clickhouse for some reason produces an error the first + # time a benchmark is run. The next benchmark run will work and overwrite the + # old benchmark files. + - name: Run mini GroupBy benchmark if clickhouse + shell: bash + if: ${{ matrix.solution == 'clickhouse' || matrix.solution == 'all' }} + run: | + python3 _utils/prep_solutions.py --task=groupby --solution=clickhouse + source path.env + TEST_RUN=true ./run.sh + - name: Run mini GroupBy benchmark shell: bash run: | diff --git a/arrow/groupby-arrow.R b/R-arrow/groupby-R-arrow.R similarity index 98% rename from arrow/groupby-arrow.R rename to R-arrow/groupby-R-arrow.R index 950bcff0..100d3dec 100755 --- a/arrow/groupby-arrow.R +++ b/R-arrow/groupby-R-arrow.R @@ -7,13 +7,13 @@ source("./_helpers/helpers.R") stopifnot(requireNamespace("bit64", quietly=TRUE)) # used in chk to sum numeric columns .libPaths("./arrow/r-arrow") # tidyverse/dplyr#4641 ## leave it like here in case if this affects arrow pkg as well suppressPackageStartupMessages({ - library("arrow", lib.loc="./arrow/r-arrow", warn.conflicts=FALSE) - library("dplyr", lib.loc="./arrow/r-arrow", warn.conflicts=FALSE) + library("arrow", lib.loc="./R-arrow/r-arrow", warn.conflicts=FALSE) + library("dplyr", lib.loc="./R-arrow/r-arrow", warn.conflicts=FALSE) }) ver = packageVersion("arrow") git = "" task = "groupby" -solution = "arrow" +solution = "R-arrow" fun = "group_by" cache = TRUE on_disk = FALSE diff --git a/arrow/join-arrow.R b/R-arrow/join-R-arrow.R similarity index 97% rename from arrow/join-arrow.R rename to R-arrow/join-R-arrow.R index 69df274d..559d05c9 100755 --- a/arrow/join-arrow.R +++ b/R-arrow/join-R-arrow.R @@ -6,13 +6,13 @@ source("./_helpers/helpers.R") .libPaths("./arrow/r-arrow") # tidyverse/dplyr#4641 ## leave it like here in case if this affects arrow pkg as well suppressPackageStartupMessages({ - library("arrow", lib.loc="./arrow/r-arrow", warn.conflicts=FALSE) - library("dplyr", lib.loc="./arrow/r-arrow", warn.conflicts=FALSE) + library("arrow", lib.loc="./R-arrow/r-arrow", warn.conflicts=FALSE) + library("dplyr", lib.loc="./R-arrow/r-arrow", warn.conflicts=FALSE) }) ver = packageVersion("arrow") git = "" task = "join" -solution = "arrow" +solution = "R-arrow" cache = TRUE on_disk = FALSE diff --git a/R-arrow/setup-R-arrow.sh b/R-arrow/setup-R-arrow.sh new file mode 100755 index 00000000..e5ff947a --- /dev/null +++ b/R-arrow/setup-R-arrow.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -e + +# install stable arrow +mkdir -p ./R-arrow/r-arrow +Rscript -e 'install.packages(c("arrow","dplyr"), lib="./R-arrow/r-arrow")' diff --git a/arrow/upg-arrow.sh b/R-arrow/upg-R-arrow.sh similarity index 55% rename from arrow/upg-arrow.sh rename to R-arrow/upg-R-arrow.sh index d2fb9de5..4d677d3e 100755 --- a/arrow/upg-arrow.sh +++ b/R-arrow/upg-R-arrow.sh @@ -3,4 +3,4 @@ set -e # upgrade all packages in arrow library only if new arrow is out echo 'upgrading arrow...' -Rscript -e 'ap=available.packages(); if (ap["arrow","Version"]!=packageVersion("arrow", lib.loc="./arrow/r-arrow")) update.packages(lib.loc="./arrow/r-arrow", ask=FALSE, checkBuilt=TRUE, quiet=TRUE)' +Rscript -e 'ap=available.packages(); if (ap["arrow","Version"]!=packageVersion("arrow", lib.loc="./R-arrow/r-arrow")) update.packages(lib.loc="./R-arrow/r-arrow", ask=FALSE, checkBuilt=TRUE, quiet=TRUE)' diff --git a/R-arrow/ver-R-arrow.sh b/R-arrow/ver-R-arrow.sh new file mode 100755 index 00000000..8c24e043 --- /dev/null +++ b/R-arrow/ver-R-arrow.sh @@ -0,0 +1,4 @@ +#!/bin/bash +set -e + +Rscript -e 'v=read.dcf(system.file(package="arrow", lib.loc="./R-arrow/r-arrow", "DESCRIPTION"), fields=c("Version","RemoteSha")); colnames(v)[colnames(v)=="RemoteSha"]="Revision"; cnafill=function(x) {x=c(x); x[is.na(x)]=""; x}; fw=function(f, v) writeLines(v, file.path("R-arrow", f)); invisible(mapply(fw, toupper(colnames(v)), cnafill(v)))' diff --git a/_benchplot/benchplot-dict.R b/_benchplot/benchplot-dict.R index 6ac2df8a..63a80206 100644 --- a/_benchplot/benchplot-dict.R +++ b/_benchplot/benchplot-dict.R @@ -42,7 +42,7 @@ solution.dict = {list( "juliads" = list(name=c(short="IMD.jl", long="InMemoryDatasets.jl"), color=c(strong="#b80000", light="#ff1f1f")), "clickhouse" = list(name=c(short="clickhouse", long="ClickHouse"), color=c(strong="hotpink4", light="hotpink1")), "polars" = list(name=c(short="polars", long="Polars"), color=c(strong="deepskyblue4", light="deepskyblue3")), - "arrow" = list(name=c(short="arrow", long="Arrow"), color=c(strong="aquamarine3", light="aquamarine1")), + "R-arrow" = list(name=c(short="R-arrow", long="R-arrow"), color=c(strong="aquamarine3", light="aquamarine1")), "duckdb" = list(name=c(short="duckdb", long="DuckDB"), color=c(strong="#ddcd07", light="#fff100")), "duckdb-latest" = list(name=c(short="duckdb-latest", long="duckdb-latest"), color=c(strong="#ddcd07", light="#fff100")), "datafusion" = list(name=c(short="datafusion", long="Datafusion"), color=c(strong="deepskyblue4", light="deepskyblue3")) @@ -199,7 +199,7 @@ groupby.syntax.dict = {list( "regression v1 v2 by id2 id4" = "DF.groupby(['id2','id4']).agg((pl.pearson_corr('v1','v2')**2).alias('r2')).collect()", "sum v3 count by id1:id6" = "DF.groupby(['id1','id2','id3','id4','id5','id6']).agg([pl.sum('v3').alias('v3'), pl.count('v1').alias('count')]).collect()" )}, - "arrow" = {c( + "R-arrow" = {c( "sum v1 by id1" = "AT %>% group_by(id1) %>% summarise(v1=sum(v1, na.rm=TRUE))", "sum v1 by id1:id2" = "AT %>% group_by(id1, id2) %>% summarise(v1=sum(v1, na.rm=TRUE))", "sum v1 mean v3 by id3" = "AT %>% group_by(id3) %>% summarise(v1=sum(v1, na.rm=TRUE), v3=mean(v3, na.rm=TRUE))", @@ -260,7 +260,7 @@ groupby.syntax.dict = {list( "juliads" = list(), "clickhouse" = list(), "polars" = list(), - "arrow" = list("Expression row_number() <= 2L not supported in Arrow; pulling data into R" = "max v1 - min v2 by id3", "Expression cor(v1, v2, ... is not supported in arrow; pulling data into R" = "regression v1 v2 by id2 id4"), + "R-arrow" = list("Expression row_number() <= 2L not supported in R-arrow; pulling data into R" = "max v1 - min v2 by id3", "Expression cor(v1, v2, ... is not supported in R-arrow; pulling data into R" = "regression v1 v2 by id2 id4"), "duckdb" = list(), "duckdb-latest" = list(), "datafusion" = list() @@ -309,7 +309,7 @@ groupby.data.exceptions = {list( "polars" = {list( # "out of memory" = c("G1_1e9_1e2_0_0","G1_1e9_1e1_0_0","G1_1e9_2e0_0_0","G1_1e9_1e2_0_1","G1_1e9_1e2_5_0") # q10 )}, - "arrow" = {list( + "R-arrow" = {list( # "timeout" = c(), # q10 "internal error" = c("G1_1e8_2e0_0_0", "G1_1e8_1e2_0_1", "G1_1e8_1e2_5_0", "G1_1e9_1e2_0_0","G1_1e9_1e2_0_1","G1_1e9_1e2_5_0","G1_1e9_1e1_0_0", # inherits from dplyr "G1_1e9_2e0_0_0"), # #190 @@ -413,7 +413,7 @@ join.syntax.dict = {list( "medium inner on factor" = "DF.merge(medium, on='id5')", "big inner on int" = "DF.merge(big, on='id3')" )}, - "arrow" = {c( + "R-arrow" = {c( "small inner on int" = "inner_join(DF, small, by='id1')", "medium inner on int" = "inner_join(DF, medium, by='id2')", "medium outer on int" = "left_join(DF, medium, by='id2')", @@ -454,7 +454,7 @@ join.query.exceptions = {list( "juliads" = list(), "clickhouse" = list(), "polars" = list(), - "arrow" = list(), + "R-arrow" = list(), "duckdb" = list(), "duckdb-latest" = list(), "datafusion" = list() @@ -496,7 +496,7 @@ join.data.exceptions = {list( "polars" = {list( "out of memory" = c("J1_1e9_NA_0_0","J1_1e9_NA_5_0","J1_1e9_NA_0_1") )}, - "arrow" = {list( + "R-arrow" = {list( "out of memory" = c("J1_1e9_NA_0_0","J1_1e9_NA_5_0","J1_1e9_NA_0_1", "J1_1e8_NA_0_0", "J1_1e8_NA_5_0", "J1_1e8_NA_0_1" )#, # "not yet implemented: #189" = c("J1_1e7_NA_0_0","J1_1e7_NA_5_0","J1_1e7_NA_0_1","J1_1e8_NA_0_0","J1_1e8_NA_5_0","J1_1e8_NA_0_1","J1_1e9_NA_0_0","J1_1e9_NA_5_0","J1_1e9_NA_0_1") )}, diff --git a/_control/solutions.csv b/_control/solutions.csv index c96f07cf..89009a06 100644 --- a/_control/solutions.csv +++ b/_control/solutions.csv @@ -25,8 +25,8 @@ clickhouse,groupby clickhouse,join polars,groupby polars,join -arrow,groupby -arrow,join +R-arrow,groupby +R-arrow,join duckdb,groupby duckdb,join duckdb-latest,groupby diff --git a/_launcher/launcher.R b/_launcher/launcher.R index 167d9dee..c2d59bdb 100644 --- a/_launcher/launcher.R +++ b/_launcher/launcher.R @@ -14,7 +14,7 @@ readret = function(x) { file.ext = function(x) { ans = switch( x, - "collapse"=, "data.table"=, "dplyr"=, "h2o"=, "arrow"=, "duckdb"="R", "duckdb-latest"="R", + "collapse"=, "data.table"=, "dplyr"=, "h2o"=, "R-arrow"=, "duckdb"="R", "duckdb-latest"="R", "pandas"=, "spark"=, "pydatatable"=, "modin"=, "dask"=, "datafusion"=, "polars"="py", "clickhouse"="sql", "juliadf"="jl", "juliads"="jl", diff --git a/_launcher/solution.R b/_launcher/solution.R index f66b4311..35d3a6a2 100755 --- a/_launcher/solution.R +++ b/_launcher/solution.R @@ -110,7 +110,7 @@ if ("quiet" %in% names(args)) { file.ext = function(x) { ans = switch( x, - "collapse"=, "data.table"=, "dplyr"=, "h2o"=, "arrow"=, "duckdb"="R", "duckdb-latest"="R", + "collapse"=, "data.table"=, "dplyr"=, "h2o"=, "R-arrow"=, "duckdb"="R", "duckdb-latest"="R", "pandas"="py", "spark"=, "pydatatable"=, "modin"=, "dask"=, "datafusion"=, "polars"="py", "clickhouse"="sql", "juliadf"="jl", "juliads"="jl" diff --git a/_report/report.R b/_report/report.R index 35082113..35ef889f 100644 --- a/_report/report.R +++ b/_report/report.R @@ -6,7 +6,7 @@ get_report_status_file = function(path=getwd()) { file.path(path, "report-done") } get_report_solutions = function() { - c("collapse", "data.table", "dplyr", "pandas", "pydatatable", "spark", "dask", "juliadf", "juliads", "clickhouse", "cudf", "polars","arrow","duckdb", "duckdb-latest", "datafusion") + c("collapse", "data.table", "dplyr", "pandas", "pydatatable", "spark", "dask", "juliadf", "juliads", "clickhouse", "cudf", "polars", "duckdb", "duckdb-latest", "datafusion", "arrow", "R-arrow") } get_data_levels = function() { ## groupby @@ -69,6 +69,9 @@ clean_time = function(d) { if (nrow(d[!nzchar(version) | is.na(version)])) stop("timings data contains NA or '' as version field, that should not happen") old_advanced_groupby_questions = c("median v3 sd v3 by id2 id4","max v1 - min v2 by id2 id4","largest two v3 by id2 id4","regression v1 v2 by id2 id4","sum v3 count by id1:id6") + + # replace arrow with R-arrow (see https://github.com/duckdblabs/db-benchmark/pull/66) + d$solution[d$solution == "arrow"] <- "R-arrow" d[!nzchar(git), git := NA_character_ ][,"on_disk" := as.logical(on_disk) ][task=="groupby" & solution%in%c("pandas","dask","spark") & batch<1558106628, "out_cols" := NA_integer_ @@ -243,9 +246,13 @@ transform = function(ld) { # all ---- time_logs = function(path=getwd()) { - ct = clean_time(load_time(path=getwd())) + lt <- load_time(path=getwd()) + + ct = clean_time(lt) d = model_time(ct) - l = model_logs(clean_logs(load_logs(path=path))) + ll <- load_logs(path=path) + ll$solution[ll$solution == "arrow"] <- "R-arrow" + l = model_logs(clean_logs(ll)) q = model_questions(clean_questions(load_questions(path=path))) lq = merge_logs_questions(l, q) diff --git a/_utils/install_all_solutions.py b/_utils/install_all_solutions.py index 58bd847b..c000e521 100755 --- a/_utils/install_all_solutions.py +++ b/_utils/install_all_solutions.py @@ -26,9 +26,7 @@ def install_all_solutions(): with open(SOLUTIONS_FILENAME, newline="") as solutions_file: solutions = csv.DictReader(solutions_file, delimiter=',') for row in solutions: - if row['solution'] == "clickhouse": - continue - elif row['solution'] == "data.table": + if row['solution'] == "data.table": install_solutions.add("datatable") else: install_solutions.add(row['solution']) @@ -44,10 +42,11 @@ def install_all_solutions(): if solution.strip() == "all": install_all_solutions() else: - if solution == "clickhouse": - continue - elif solution == "data.table": + if solution == "data.table": install_solution("datatable") + elif solution == "clickhouse": + install_solution("clickhouse") + install_solution("polars") else: install_solution(solution) diff --git a/_utils/prep_solutions.py b/_utils/prep_solutions.py index 98f4ddfc..ed3e4a24 100755 --- a/_utils/prep_solutions.py +++ b/_utils/prep_solutions.py @@ -5,7 +5,7 @@ SOLUTIONS_FILENAME = "_control/solutions.csv" RUN_CONF_FILENAME = "run.conf" -SKIPPED_SOLUTIONS = ["clickhouse"] +SKIPPED_SOLUTIONS = [] def print_usage(): @@ -33,6 +33,8 @@ def main(): solution = parse_solution() if solution == "all": solution = get_solutions(task) + if solution == "clickhouse": + solution = "clickhouse polars" update_run_conf_solutions(solution, task) def update_run_conf_solutions(solution_name_list, task): diff --git a/_utils/validate_no_errors.sh b/_utils/validate_no_errors.sh index cd855369..9c8fa4ef 100755 --- a/_utils/validate_no_errors.sh +++ b/_utils/validate_no_errors.sh @@ -1,10 +1,10 @@ -if [ $(grep -i "error" out/run_*.err | wc -l) = 0 ] +if [ $(grep -i 'error|exception' out/run_*.err | wc -l) = 0 ] then # no true errors found, print last line of each output script echo "No Errors found in run_*.err logs" else echo "The following errors have been found. Failing check" - grep -i "error" out/*.err + grep -i "error|exception" out/*.err exit 1 fi diff --git a/arrow/setup-arrow.sh b/arrow/setup-arrow.sh deleted file mode 100755 index dcad2ad3..00000000 --- a/arrow/setup-arrow.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -set -e - -# install stable arrow -mkdir -p ./arrow/r-arrow -Rscript -e 'install.packages(c("arrow","dplyr"), lib="./arrow/r-arrow")' diff --git a/arrow/ver-arrow.sh b/arrow/ver-arrow.sh deleted file mode 100755 index 44bb8ede..00000000 --- a/arrow/ver-arrow.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -set -e - -Rscript -e 'v=read.dcf(system.file(package="arrow", lib.loc="./arrow/r-arrow", "DESCRIPTION"), fields=c("Version","RemoteSha")); colnames(v)[colnames(v)=="RemoteSha"]="Revision"; cnafill=function(x) {x=c(x); x[is.na(x)]=""; x}; fw=function(f, v) writeLines(v, file.path("arrow", f)); invisible(mapply(fw, toupper(colnames(v)), cnafill(v)))' diff --git a/clickhouse/exec.sh b/clickhouse/exec.sh index 1b697c12..4d7c799c 100755 --- a/clickhouse/exec.sh +++ b/clickhouse/exec.sh @@ -34,29 +34,39 @@ if [ $1 == 'groupby' ]; then clickhouse-client --query "CREATE TABLE $SRC_DATANAME (id1 Nullable(String), id2 Nullable(String), id3 Nullable(String), id4 Nullable(Int32), id5 Nullable(Int32), id6 Nullable(Int32), v1 Nullable(Int32), v2 Nullable(Int32), v3 Nullable(Float64)) ENGINE = MergeTree() ORDER BY tuple();" tail -n+2 data/$SRC_DATANAME.csv | clickhouse-client --max_memory_usage $CH_MEM --max_insert_threads 1 --query "INSERT INTO $SRC_DATANAME SELECT * FROM input('id1 Nullable(String), id2 Nullable(String), id3 Nullable(String), id4 Nullable(Int32), id5 Nullable(Int32), id6 Nullable(Int32), v1 Nullable(Int32), v2 Nullable(Int32), v3 Nullable(Float64)') FORMAT CSV" # confirm all data loaded yandex/ClickHouse#4463 - echo -e "clickhouse-client --query 'SELECT count(*) FROM $SRC_DATANAME'\n$(echo $SRC_DATANAME | cut -d'_' -f2)" | Rscript -e 'stdin=readLines(file("stdin")); if ((loaded<-as.numeric(system(stdin[1L], intern=TRUE)))!=as.numeric(stdin[2L])) stop("incomplete data load, expected: ", stdin[2L],", loaded: ", loaded)' + if [ ! $TEST_RUN ]; then + echo -e "clickhouse-client --query 'SELECT count(*) FROM $SRC_DATANAME'\n$(echo $SRC_DATANAME | cut -d'_' -f2)" | Rscript -e 'stdin=readLines(file("stdin")); if ((loaded<-as.numeric(system(stdin[1L], intern=TRUE)))!=as.numeric(stdin[2L])) stop("incomplete data load, expected: ", stdin[2L],", loaded: ", loaded)' + fi elif [ $1 == 'join' ]; then # lhs clickhouse-client --query "DROP TABLE IF EXISTS $SRC_DATANAME" clickhouse-client --query "CREATE TABLE $SRC_DATANAME (id1 Nullable(Int32), id2 Nullable(Int32), id3 Nullable(Int32), id4 Nullable(String), id5 Nullable(String), id6 Nullable(String), v1 Nullable(Float64)) ENGINE = MergeTree() ORDER BY tuple();" tail -n+2 data/$SRC_DATANAME.csv | clickhouse-client --max_memory_usage $CH_MEM --max_insert_threads 1 --query "INSERT INTO $SRC_DATANAME SELECT * FROM input('id1 Nullable(Int32), id2 Nullable(Int32), id3 Nullable(Int32), id4 Nullable(String), id5 Nullable(String), id6 Nullable(String), v1 Nullable(Float64)') FORMAT CSV" - echo -e "clickhouse-client --query 'SELECT count(*) FROM $SRC_DATANAME'\n$(echo $SRC_DATANAME | cut -d'_' -f2)" | Rscript -e 'stdin=readLines(file("stdin")); if ((loaded<-as.numeric(system(stdin[1L], intern=TRUE)))!=as.numeric(stdin[2L])) stop("incomplete data load, expected: ", stdin[2L],", loaded: ", loaded)' + if [ ! $TEST_RUN ]; then + echo -e "clickhouse-client --query 'SELECT count(*) FROM $SRC_DATANAME'\n$(echo $SRC_DATANAME | cut -d'_' -f2)" | Rscript -e 'stdin=readLines(file("stdin")); if ((loaded<-as.numeric(system(stdin[1L], intern=TRUE)))!=as.numeric(stdin[2L])) stop("incomplete data load, expected: ", stdin[2L],", loaded: ", loaded)' + fi RHS=$(join_to_tbls $SRC_DATANAME) RHS1=$(echo $RHS | cut -d' ' -f1) clickhouse-client --query "DROP TABLE IF EXISTS $RHS1" clickhouse-client --query "CREATE TABLE $RHS1 (id1 Nullable(Int32), id4 Nullable(String), v2 Nullable(Float64)) ENGINE = MergeTree() ORDER BY tuple();" tail -n+2 data/$RHS1.csv | clickhouse-client --max_memory_usage $CH_MEM --max_insert_threads 1 --query "INSERT INTO $RHS1 SELECT * FROM input('id1 Nullable(Int32), id4 Nullable(String), v2 Nullable(Float64)') FORMAT CSV" - echo -e "clickhouse-client --query 'SELECT count(*) FROM $RHS1'\n$(echo $RHS1 | cut -d'_' -f3)" | Rscript -e 'stdin=readLines(file("stdin")); if ((loaded<-as.numeric(system(stdin[1L], intern=TRUE)))!=as.numeric(stdin[2L])) stop("incomplete data load, expected: ", stdin[2L],", loaded: ", loaded)' + if [ ! $TEST_RUN ]; then + echo -e "clickhouse-client --query 'SELECT count(*) FROM $RHS1'\n$(echo $RHS1 | cut -d'_' -f3)" | Rscript -e 'stdin=readLines(file("stdin")); if ((loaded<-as.numeric(system(stdin[1L], intern=TRUE)))!=as.numeric(stdin[2L])) stop("incomplete data load, expected: ", stdin[2L],", loaded: ", loaded)' + fi RHS2=$(echo $RHS | cut -d' ' -f2) clickhouse-client --query "DROP TABLE IF EXISTS $RHS2" clickhouse-client --query "CREATE TABLE $RHS2 (id1 Nullable(Int32), id2 Nullable(Int32), id4 Nullable(String), id5 Nullable(String), v2 Nullable(Float64)) ENGINE = MergeTree() ORDER BY tuple();" tail -n+2 data/$RHS2.csv | clickhouse-client --max_memory_usage $CH_MEM --max_insert_threads 1 --query "INSERT INTO $RHS2 SELECT * FROM input('id1 Nullable(Int32), id2 Nullable(Int32), id4 Nullable(String), id5 Nullable(String), v2 Nullable(Float64)') FORMAT CSV" - echo -e "clickhouse-client --query 'SELECT count(*) FROM $RHS2'\n$(echo $RHS2 | cut -d'_' -f3)" | Rscript -e 'stdin=readLines(file("stdin")); if ((loaded<-as.numeric(system(stdin[1L], intern=TRUE)))!=as.numeric(stdin[2L])) stop("incomplete data load, expected: ", stdin[2L],", loaded: ", loaded)' + if [ ! $TEST_RUN ]; then + echo -e "clickhouse-client --query 'SELECT count(*) FROM $RHS2'\n$(echo $RHS2 | cut -d'_' -f3)" | Rscript -e 'stdin=readLines(file("stdin")); if ((loaded<-as.numeric(system(stdin[1L], intern=TRUE)))!=as.numeric(stdin[2L])) stop("incomplete data load, expected: ", stdin[2L],", loaded: ", loaded)' + fi RHS3=$(echo $RHS | cut -d' ' -f3) clickhouse-client --query "DROP TABLE IF EXISTS $RHS3" clickhouse-client --query "CREATE TABLE $RHS3 (id1 Nullable(Int32), id2 Nullable(Int32), id3 Nullable(Int32), id4 Nullable(String), id5 Nullable(String), id6 Nullable(String), v2 Nullable(Float64)) ENGINE = MergeTree() ORDER BY tuple();" tail -n+2 data/$RHS3.csv | clickhouse-client --max_memory_usage $CH_MEM --max_insert_threads 1 --query "INSERT INTO $RHS3 SELECT * FROM input('id1 Nullable(Int32), id2 Nullable(Int32), id3 Nullable(Int32), id4 Nullable(String), id5 Nullable(String), id6 Nullable(String), v2 Nullable(Float64)') FORMAT CSV" - echo -e "clickhouse-client --query 'SELECT count(*) FROM $RHS3'\n$(echo $RHS3 | cut -d'_' -f3)" | Rscript -e 'stdin=readLines(file("stdin")); if ((loaded<-as.numeric(system(stdin[1L], intern=TRUE)))!=as.numeric(stdin[2L])) stop("incomplete data load, expected: ", stdin[2L],", loaded: ", loaded)' + if [ ! $TEST_RUN ]; then + echo -e "clickhouse-client --query 'SELECT count(*) FROM $RHS3'\n$(echo $RHS3 | cut -d'_' -f3)" | Rscript -e 'stdin=readLines(file("stdin")); if ((loaded<-as.numeric(system(stdin[1L], intern=TRUE)))!=as.numeric(stdin[2L])) stop("incomplete data load, expected: ", stdin[2L],", loaded: ", loaded)' + fi else echo "clickhouse task $1 not implemented" >&2 && exit 1 fi diff --git a/clickhouse/setup-clickhouse.sh b/clickhouse/setup-clickhouse.sh index 4c6e87cc..8d442835 100755 --- a/clickhouse/setup-clickhouse.sh +++ b/clickhouse/setup-clickhouse.sh @@ -14,7 +14,7 @@ sudo rm /var/log/clickhouse-server/clickhouse-server.err.log /var/log/clickhouse sudo service clickhouse-server start # stop server -#sudo service clickhouse-server stop +sudo service clickhouse-server stop # let file table function access csv -- NO LONGER NECESSARY # grep '/var/lib/clickhouse/user_files/' /etc/clickhouse-server/config.xml diff --git a/dask/VERSION b/dask/VERSION index ea516b56..3bae6081 100644 --- a/dask/VERSION +++ b/dask/VERSION @@ -1 +1 @@ -2023.10.1 \ No newline at end of file +2023.10.0 \ No newline at end of file diff --git a/dask/groupby-dask2.py b/dask/groupby-dask2.py index 41c0f231..52cb0e34 100755 --- a/dask/groupby-dask2.py +++ b/dask/groupby-dask2.py @@ -23,8 +23,6 @@ from dask import distributed # we use process-pool instead of thread-pool due to GIL cost client = distributed.Client(processes=True, silence_logs=logging.ERROR) -# since we are running on local cluster of processes, we would prefer to keep the communication between workers to relative minimum, thus it's better to trade some tasks granularity for better processing locality -dk.config.set({"optimization.fuse.ave-width": 20}) data_name = os.environ['SRC_DATANAME'] on_disk = False #data_name.split("_")[1] == "1e9" # on-disk data storage #126 @@ -38,9 +36,8 @@ exit(0) # not yet implemented #171, currently groupby's dropna=False argument is ignored print("using disk memory-mapped data storage" if on_disk else "using in-memory data storage", flush=True) -#x = dd.read_parquet(src_grp, engine="fastparquet") if on_disk else -x = dd.read_csv(src_grp, dtype={"id1":"category","id2":"category","id3":"category","id4":"Int32","id5":"Int32","id6":"Int32","v1":"Int32","v2":"Int32","v3":"float64"}) - +#x = dd.read_parquet(src_grp, engine="pyarrow") if on_disk else +x = dd.read_csv(src_grp, engine="pyarrow") x = x.persist() in_rows = len(x) @@ -189,147 +186,166 @@ print(ans.tail(3), flush=True) del ans -#question = "median v3 sd v3 by id4 id5" # q6 # median function not yet implemented: https://github.com/dask/dask/issues/4362 -#gc.collect() -#t_start = timeit.default_timer() -#ans = x.groupby(['id4','id5'], dropna=False, observed=True).agg({'v3': ['median','std']}).compute() -#ans.reset_index(inplace=True) -#print(ans.shape, flush=True) -#t = timeit.default_timer() - t_start -#m = memory_usage() -#t_start = timeit.default_timer() -#chk = [ans['v3']['median'].sum(), ans['v3']['std'].sum()] -#chkt = timeit.default_timer() - t_start -#write_log(task=task, data=data_name, in_rows=in_rows, question=question, out_rows=ans.shape[0], out_cols=ans.shape[1], solution=solution, version=ver, git=git, fun=fun, run=1, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(chk), chk_time_sec=chkt, on_disk=on_disk) -#del ans -#gc.collect() -#t_start = timeit.default_timer() -#ans = x.groupby(['id4','id5'], dropna=False, observed=True).agg({'v3': ['median','std']}).compute() -#ans.reset_index(inplace=True) -#print(ans.shape, flush=True) -#t = timeit.default_timer() - t_start -#m = memory_usage() -#t_start = timeit.default_timer() -#chk = [ans['v3']['median'].sum(), ans['v3']['std'].sum()] -#chkt = timeit.default_timer() - t_start -#write_log(task=task, data=data_name, in_rows=in_rows, question=question, out_rows=ans.shape[0], out_cols=ans.shape[1], solution=solution, version=ver, git=git, fun=fun, run=2, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(chk), chk_time_sec=chkt, on_disk=on_disk) -#print(ans.head(3), flush=True) -#print(ans.tail(3), flush=True) -#del ans +question = "median v3 sd v3 by id4 id5" # q6 +gc.collect() +t_start = timeit.default_timer() +ans = x.groupby(['id4','id5'], dropna=False, observed=True).agg({'v3': ['median','std']}, shuffle='p2p').compute() +ans.reset_index(inplace=True) +print(ans.shape, flush=True) +t = timeit.default_timer() - t_start +m = memory_usage() +t_start = timeit.default_timer() +chk = [ans['v3']['median'].sum(), ans['v3']['std'].sum()] +chkt = timeit.default_timer() - t_start +write_log(task=task, data=data_name, in_rows=in_rows, question=question, out_rows=ans.shape[0], out_cols=ans.shape[1], solution=solution, version=ver, git=git, fun=fun, run=1, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(chk), chk_time_sec=chkt, on_disk=on_disk) +del ans +gc.collect() +t_start = timeit.default_timer() +ans = x.groupby(['id4','id5'], dropna=False, observed=True).agg({'v3': ['median','std']}, shuffle='p2p').compute() +ans.reset_index(inplace=True) +print(ans.shape, flush=True) +t = timeit.default_timer() - t_start +m = memory_usage() +t_start = timeit.default_timer() +chk = [ans['v3']['median'].sum(), ans['v3']['std'].sum()] +chkt = timeit.default_timer() - t_start +write_log(task=task, data=data_name, in_rows=in_rows, question=question, out_rows=ans.shape[0], out_cols=ans.shape[1], solution=solution, version=ver, git=git, fun=fun, run=2, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(chk), chk_time_sec=chkt, on_disk=on_disk) +print(ans.head(3), flush=True) +print(ans.tail(3), flush=True) +del ans -# question = "max v1 - min v2 by id3" # q7 -# gc.collect() -# t_start = timeit.default_timer() -# ans = x.groupby('id3', dropna=False, observed=True).agg({'v1':'max', 'v2':'min'}).assign(range_v1_v2=lambda x: x['v1']-x['v2'])[['range_v1_v2']].compute() -# ans.reset_index(inplace=True) -# print(ans.shape, flush=True) -# t = timeit.default_timer() - t_start -# m = memory_usage() -# t_start = timeit.default_timer() -# chk = [ans['range_v1_v2'].sum()] -# chkt = timeit.default_timer() - t_start -# write_log(task=task, data=data_name, in_rows=in_rows, question=question, out_rows=ans.shape[0], out_cols=ans.shape[1], solution=solution, version=ver, git=git, fun=fun, run=1, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(chk), chk_time_sec=chkt, on_disk=on_disk) -# del ans -# gc.collect() -# t_start = timeit.default_timer() -# ans = x.groupby('id3', dropna=False, observed=True).agg({'v1':'max', 'v2':'min'}).assign(range_v1_v2=lambda x: x['v1']-x['v2'])[['range_v1_v2']].compute() -# ans.reset_index(inplace=True) -# print(ans.shape, flush=True) -# t = timeit.default_timer() - t_start -# m = memory_usage() -# t_start = timeit.default_timer() -# chk = [ans['range_v1_v2'].sum()] -# chkt = timeit.default_timer() - t_start -# write_log(task=task, data=data_name, in_rows=in_rows, question=question, out_rows=ans.shape[0], out_cols=ans.shape[1], solution=solution, version=ver, git=git, fun=fun, run=2, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(chk), chk_time_sec=chkt, on_disk=on_disk) -# print(ans.head(3), flush=True) -# print(ans.tail(3), flush=True) -# del ans +question = "max v1 - min v2 by id3" # q7 +gc.collect() +t_start = timeit.default_timer() +ans = x.groupby('id3', dropna=False, observed=True).agg({'v1':'max', 'v2':'min'}).assign(range_v1_v2=lambda x: x['v1']-x['v2'])[['range_v1_v2']].compute() +ans.reset_index(inplace=True) +print(ans.shape, flush=True) +t = timeit.default_timer() - t_start +m = memory_usage() +t_start = timeit.default_timer() +chk = [ans['range_v1_v2'].sum()] +chkt = timeit.default_timer() - t_start +write_log(task=task, data=data_name, in_rows=in_rows, question=question, out_rows=ans.shape[0], out_cols=ans.shape[1], solution=solution, version=ver, git=git, fun=fun, run=1, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(chk), chk_time_sec=chkt, on_disk=on_disk) +del ans +gc.collect() +t_start = timeit.default_timer() +ans = x.groupby('id3', dropna=False, observed=True).agg({'v1':'max', 'v2':'min'}).assign(range_v1_v2=lambda x: x['v1']-x['v2'])[['range_v1_v2']].compute() +ans.reset_index(inplace=True) +print(ans.shape, flush=True) +t = timeit.default_timer() - t_start +m = memory_usage() +t_start = timeit.default_timer() +chk = [ans['range_v1_v2'].sum()] +chkt = timeit.default_timer() - t_start +write_log(task=task, data=data_name, in_rows=in_rows, question=question, out_rows=ans.shape[0], out_cols=ans.shape[1], solution=solution, version=ver, git=git, fun=fun, run=2, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(chk), chk_time_sec=chkt, on_disk=on_disk) +print(ans.head(3), flush=True) +print(ans.tail(3), flush=True) +del ans -#question = "largest two v3 by id6" # q8 -#gc.collect() -#t_start = timeit.default_timer() -#ans = x[~x['v3'].isna()][['id6','v3']].groupby('id6', dropna=False, observed=True).apply(lambda x: x.nlargest(2, columns='v3'), meta={'id6':'Int64', 'v3':'float64'})[['v3']].compute() -#ans.reset_index(level='id6', inplace=True) -#ans.reset_index(drop=True, inplace=True) # drop because nlargest creates some extra new index field -#print(ans.shape, flush=True) -#t = timeit.default_timer() - t_start -#m = memory_usage() -#t_start = timeit.default_timer() -#chk = [ans['v3'].sum()] -#chkt = timeit.default_timer() - t_start -#write_log(task=task, data=data_name, in_rows=in_rows, question=question, out_rows=ans.shape[0], out_cols=ans.shape[1], solution=solution, version=ver, git=git, fun=fun, run=1, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(chk), chk_time_sec=chkt, on_disk=on_disk) -#del ans -#gc.collect() -#t_start = timeit.default_timer() -#ans = x[~x['v3'].isna()][['id6','v3']].groupby('id6', dropna=False, observed=True).apply(lambda x: x.nlargest(2, columns='v3'), meta={'id6':'Int64', 'v3':'float64'})[['v3']].compute() -#ans.reset_index(level='id6', inplace=True) -#ans.reset_index(drop=True, inplace=True) -#print(ans.shape, flush=True) -#t = timeit.default_timer() - t_start -#m = memory_usage() -#t_start = timeit.default_timer() -#chk = [ans['v3'].sum()] -#chkt = timeit.default_timer() - t_start -#write_log(task=task, data=data_name, in_rows=in_rows, question=question, out_rows=ans.shape[0], out_cols=ans.shape[1], solution=solution, version=ver, git=git, fun=fun, run=2, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(chk), chk_time_sec=chkt, on_disk=on_disk) -#print(ans.head(3), flush=True) -#print(ans.tail(3), flush=True) -#del ans +question = "largest two v3 by id6" # q8 +gc.collect() +t_start = timeit.default_timer() +ans = x[~x['v3'].isna()][['id6','v3']].groupby('id6', dropna=False, observed=True).apply(lambda x: x.nlargest(2, columns='v3'), meta={'id6':'Int64', 'v3':'float64'})[['v3']].compute() +ans.reset_index(level='id6', inplace=True) +ans.reset_index(drop=True, inplace=True) # drop because nlargest creates some extra new index field +print(ans.shape, flush=True) +t = timeit.default_timer() - t_start +m = memory_usage() +t_start = timeit.default_timer() +chk = [ans['v3'].sum()] +chkt = timeit.default_timer() - t_start +write_log(task=task, data=data_name, in_rows=in_rows, question=question, out_rows=ans.shape[0], out_cols=ans.shape[1], solution=solution, version=ver, git=git, fun=fun, run=1, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(chk), chk_time_sec=chkt, on_disk=on_disk) +del ans +gc.collect() +t_start = timeit.default_timer() +ans = x[~x['v3'].isna()][['id6','v3']].groupby('id6', dropna=False, observed=True).apply(lambda x: x.nlargest(2, columns='v3'), meta={'id6':'Int64', 'v3':'float64'})[['v3']].compute() +ans.reset_index(level='id6', inplace=True) +ans.reset_index(drop=True, inplace=True) +print(ans.shape, flush=True) +t = timeit.default_timer() - t_start +m = memory_usage() +t_start = timeit.default_timer() +chk = [ans['v3'].sum()] +chkt = timeit.default_timer() - t_start +write_log(task=task, data=data_name, in_rows=in_rows, question=question, out_rows=ans.shape[0], out_cols=ans.shape[1], solution=solution, version=ver, git=git, fun=fun, run=2, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(chk), chk_time_sec=chkt, on_disk=on_disk) +print(ans.head(3), flush=True) +print(ans.tail(3), flush=True) +del ans -#question = "regression v1 v2 by id2 id4" # q9 -#gc.collect() -#t_start = timeit.default_timer() -#ans = x[['id2','id4','v1','v2']].groupby(['id2','id4'], dropna=False, observed=True).apply(lambda x: pd.Series({'r2': x.corr()['v1']['v2']**2}), meta={'r2':'float64'}).compute() -#ans.reset_index(inplace=True) -#print(ans.shape, flush=True) -#t = timeit.default_timer() - t_start -#m = memory_usage() -#t_start = timeit.default_timer() -#chk = [ans['r2'].sum()] -#chkt = timeit.default_timer() - t_start -#write_log(task=task, data=data_name, in_rows=in_rows, question=question, out_rows=ans.shape[0], out_cols=ans.shape[1], solution=solution, version=ver, git=git, fun=fun, run=1, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(chk), chk_time_sec=chkt, on_disk=on_disk) -#del ans -#gc.collect() -#t_start = timeit.default_timer() -#ans = x[['id2','id4','v1','v2']].groupby(['id2','id4'], dropna=False, observed=True).apply(lambda x: pd.Series({'r2': x.corr()['v1']['v2']**2}), meta={'r2':'float64'}).compute() -#ans.reset_index(inplace=True) -#print(ans.shape, flush=True) -#t = timeit.default_timer() - t_start -#m = memory_usage() -#t_start = timeit.default_timer() -#chk = [ans['r2'].sum()] -#chkt = timeit.default_timer() - t_start -#write_log(task=task, data=data_name, in_rows=in_rows, question=question, out_rows=ans.shape[0], out_cols=ans.shape[1], solution=solution, version=ver, git=git, fun=fun, run=2, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(chk), chk_time_sec=chkt, on_disk=on_disk) -#print(ans.head(3), flush=True) -#print(ans.tail(3), flush=True) -#del ans +question = "regression v1 v2 by id2 id4" # q9 +gc.collect() +t_start = timeit.default_timer() +ans = x[['id2','id4','v1','v2']].groupby(['id2','id4'], dropna=False, observed=True)[["v1", "v2"]].apply(lambda x: pd.Series({'r2': x.corr()['v1']['v2']**2}), meta={'r2':'float64'}).compute() +ans.reset_index(inplace=True) +print(ans.shape, flush=True) +t = timeit.default_timer() - t_start +m = memory_usage() +t_start = timeit.default_timer() +chk = [ans['r2'].sum()] +chkt = timeit.default_timer() - t_start +write_log(task=task, data=data_name, in_rows=in_rows, question=question, out_rows=ans.shape[0], out_cols=ans.shape[1], solution=solution, version=ver, git=git, fun=fun, run=1, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(chk), chk_time_sec=chkt, on_disk=on_disk) +del ans +gc.collect() +t_start = timeit.default_timer() +ans = x[['id2','id4','v1','v2']].groupby(['id2','id4'], dropna=False, observed=True)[["v1", "v2"]].apply(lambda x: pd.Series({'r2': x.corr()['v1']['v2']**2}), meta={'r2':'float64'}).compute() +ans.reset_index(inplace=True) +print(ans.shape, flush=True) +t = timeit.default_timer() - t_start +m = memory_usage() +t_start = timeit.default_timer() +chk = [ans['r2'].sum()] +chkt = timeit.default_timer() - t_start +write_log(task=task, data=data_name, in_rows=in_rows, question=question, out_rows=ans.shape[0], out_cols=ans.shape[1], solution=solution, version=ver, git=git, fun=fun, run=2, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(chk), chk_time_sec=chkt, on_disk=on_disk) +print(ans.head(3), flush=True) +print(ans.tail(3), flush=True) +del ans -#question = "sum v3 count by id1:id6" # q10 -#gc.collect() -#t_start = timeit.default_timer() -#ans = x.groupby(['id1','id2','id3','id4','id5','id6'], dropna=False, observed=True).agg({'v3':'sum', 'v1':'size'}).compute() # column name different than expected, ignore it because: ValueError: Metadata inference failed in `rename`: Original error is below: ValueError('Level values must be unique: [nan, nan] on level 0',) -#ans.reset_index(inplace=True) -#print(ans.shape, flush=True) -#t = timeit.default_timer() - t_start -#m = memory_usage() -#t_start = timeit.default_timer() -#chk = [ans.v3.sum(), ans.v1.sum()] -#chkt = timeit.default_timer() - t_start -#write_log(task=task, data=data_name, in_rows=in_rows, question=question, out_rows=ans.shape[0], out_cols=ans.shape[1], solution=solution, version=ver, git=git, fun=fun, run=1, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(chk), chk_time_sec=chkt, on_disk=on_disk) -#del ans -#gc.collect() -#t_start = timeit.default_timer() -#ans = x.groupby(['id1','id2','id3','id4','id5','id6'], dropna=False, observed=True).agg({'v3':'sum', 'v1':'size'}).compute() -#ans.reset_index(inplace=True) -#print(ans.shape, flush=True) -#t = timeit.default_timer() - t_start -#m = memory_usage() -#t_start = timeit.default_timer() -#chk = [ans.v3.sum(), ans.v1.sum()] -#chkt = timeit.default_timer() - t_start -#write_log(task=task, data=data_name, in_rows=in_rows, question=question, out_rows=ans.shape[0], out_cols=ans.shape[1], solution=solution, version=ver, git=git, fun=fun, run=2, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(chk), chk_time_sec=chkt, on_disk=on_disk) -#print(ans.head(3), flush=True) -#print(ans.tail(3), flush=True) -#del ans +question = "sum v3 count by id1:id6" # q10 +print(question) +gc.collect() +t_start = timeit.default_timer() +ans = ( + x.groupby( + ['id1', 'id2', 'id3', 'id4', 'id5', 'id6'], + dropna=False, + observed=True, + ) + .agg({'v3': 'sum', 'v1': 'size'}, split_out=x.npartitions) + .rename(columns={"v1": "count"}) + .compute() +) +ans.reset_index(inplace=True) +print(ans.shape, flush=True) +t = timeit.default_timer() - t_start +m = memory_usage() +t_start = timeit.default_timer() +chk = [ans.v3.sum(), ans["count"].sum()] +chkt = timeit.default_timer() - t_start +write_log(task=task, data=data_name, in_rows=in_rows, question=question, out_rows=ans.shape[0], out_cols=ans.shape[1], solution=solution, version=ver, git=git, fun=fun, run=1, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(chk), chk_time_sec=chkt, on_disk=on_disk) +del ans +gc.collect() +t_start = timeit.default_timer() +ans = ( + x.groupby( + ['id1', 'id2', 'id3', 'id4', 'id5', 'id6'], + dropna=False, + observed=True, + ) + .agg({'v3': 'sum', 'v1': 'size'}, split_out=x.npartitions) + .rename(columns={"v1": "count"}) + .compute() +) +ans.reset_index(inplace=True) +print(ans.shape, flush=True) +t = timeit.default_timer() - t_start +m = memory_usage() +t_start = timeit.default_timer() +chk = [ans.v3.sum(), ans["count"].sum()] +chkt = timeit.default_timer() - t_start +write_log(task=task, data=data_name, in_rows=in_rows, question=question, out_rows=ans.shape[0], out_cols=ans.shape[1], solution=solution, version=ver, git=git, fun=fun, run=2, time_sec=t, mem_gb=m, cache=cache, chk=make_chk(chk), chk_time_sec=chkt, on_disk=on_disk) +print(ans.head(3), flush=True) +print(ans.tail(3), flush=True) +del ans print("grouping finished, took %0.fs" % (timeit.default_timer()-task_init), flush=True) diff --git a/dask/join-dask.py b/dask/join-dask.py index 5ddcafa7..0c7197c7 100755 --- a/dask/join-dask.py +++ b/dask/join-dask.py @@ -43,10 +43,10 @@ # medium = dd.read_parquet(src_jn_y[1], engine="fastparquet") # big = dd.read_parquet(src_jn_y[2], engine="fastparquet") #else: -x = dd.read_csv(src_jn_x, dtype={'id1':'Int32','id2':'Int32','id3':'Int32','id4':'category','id5':'category','id6':'category','v1':'float64'}).persist() -small = dd.read_csv(src_jn_y[0], dtype={'id1':'Int32','id4':'category','v2':'float64'}).persist() -medium = dd.read_csv(src_jn_y[1], dtype={'id1':'Int32','id2':'Int32','id4':'category','id5':'category','v2':'float64'}).persist() -big = dd.read_csv(src_jn_y[2], dtype={'id1':'Int32','id2':'Int32','id3':'Int32','id4':'category','id5':'category','id6':'category','v2':'float64'}).persist() +x = dd.read_csv(src_jn_x, engine="pyarrow").persist() +small = dd.read_csv(src_jn_y[0], engine="pyarrow").persist() +medium = dd.read_csv(src_jn_y[1], engine="pyarrow").persist() +big = dd.read_csv(src_jn_y[2], engine="pyarrow").persist() in_rows = len(x) print(in_rows, flush=True) diff --git a/dask/setup-dask.sh b/dask/setup-dask.sh index f22e3148..c6fac985 100755 --- a/dask/setup-dask.sh +++ b/dask/setup-dask.sh @@ -1,13 +1,11 @@ #!/bin/bash set -e -virtualenv dask/py-dask --python=python3 +virtualenv dask/py-dask --python=python3.10 source dask/py-dask/bin/activate # install binaries python3 -m pip install "dask[complete]" -python3 -m pip install pandas psutil -python3 -m pip install distributed # check # python3 diff --git a/logs.csv b/logs.csv index 1775ca19..7ce38137 100644 --- a/logs.csv +++ b/logs.csv @@ -899,3 +899,48 @@ ip-172-31-31-147,1699289348,arrow,13.0.0.1,,join,J1_1e9_NA_0_0,1699309325.24506, ip-172-31-31-147,1699289348,arrow,13.0.0.1,,join,J1_1e9_NA_0_0,1699309934.66574,finish,1,137 ip-172-31-31-147,1699437325,data.table,1.14.9,88039186915028ab3c93ccfd8e22c0d1c3534b1a,join,J1_1e9_NA_0_0,1699437325.61783,start,, ip-172-31-31-147,1699437325,data.table,1.14.9,88039186915028ab3c93ccfd8e22c0d1c3534b1a,join,J1_1e9_NA_0_0,1699451725.72193,finish,, +ip-172-31-31-147,1701270373,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,groupby,G1_1e7_1e2_0_0,1701270373.98008,start,, +ip-172-31-31-147,1701270373,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,groupby,G1_1e7_1e2_0_0,1701270419.69202,finish,0,0 +ip-172-31-31-147,1701270373,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,groupby,G1_1e7_1e1_0_0,1701270434.7073,start,, +ip-172-31-31-147,1701270373,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,groupby,G1_1e7_1e1_0_0,1701270696.40654,finish,0,0 +ip-172-31-31-147,1701270373,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,groupby,G1_1e7_2e0_0_0,1701270711.42164,start,, +ip-172-31-31-147,1701270373,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,groupby,G1_1e7_2e0_0_0,1701271665.05859,finish,0,0 +ip-172-31-31-147,1701270373,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,groupby,G1_1e7_1e2_0_1,1701271680.07383,start,, +ip-172-31-31-147,1701270373,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,groupby,G1_1e7_1e2_0_1,1701271724.90644,finish,0,0 +ip-172-31-31-147,1701270373,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,groupby,G1_1e7_1e2_5_0,1701271739.92167,start,, +ip-172-31-31-147,1701270373,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,groupby,G1_1e7_1e2_5_0,1701271741.80487,finish,1,0 +ip-172-31-31-147,1701270373,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,groupby,G1_1e8_1e2_0_0,1701271756.82011,start,, +ip-172-31-31-147,1701270373,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,groupby,G1_1e8_1e2_0_0,1701272323.32654,finish,0,0 +ip-172-31-31-147,1701270373,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,groupby,G1_1e8_1e1_0_0,1701272338.34178,start,, +ip-172-31-31-147,1701270373,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,groupby,G1_1e8_1e1_0_0,1701279538.71904,finish,5477,0 +ip-172-31-31-147,1701270373,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,groupby,G1_1e8_2e0_0_0,1701279553.73412,start,, +ip-172-31-31-147,1701270373,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,groupby,G1_1e8_2e0_0_0,1701286773.73856,finish,1053,0 +ip-172-31-31-147,1701270373,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,groupby,G1_1e8_1e2_0_1,1701286788.7538,start,, +ip-172-31-31-147,1701270373,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,groupby,G1_1e8_1e2_0_1,1701287351.43274,finish,0,0 +ip-172-31-31-147,1701270373,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,groupby,G1_1e8_1e2_5_0,1701287366.44798,start,, +ip-172-31-31-147,1701270373,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,groupby,G1_1e8_1e2_5_0,1701287368.35025,finish,1,0 +ip-172-31-31-147,1701270373,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,groupby,G1_1e9_1e2_0_0,1701287383.3655,start,, +ip-172-31-31-147,1701270373,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,groupby,G1_1e9_1e2_0_0,1701298184.22329,finish,16301,0 +ip-172-31-31-147,1701270373,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,groupby,G1_1e9_1e1_0_0,1701298199.23849,start,, +ip-172-31-31-147,1701270373,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,groupby,G1_1e9_1e1_0_0,1701308999.66472,finish,3388,0 +ip-172-31-31-147,1701270373,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,groupby,G1_1e9_2e0_0_0,1701309014.67983,start,, +ip-172-31-31-147,1701270373,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,groupby,G1_1e9_2e0_0_0,1701319814.99198,finish,2416,0 +ip-172-31-31-147,1701270373,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,groupby,G1_1e9_1e2_0_1,1701319830.00705,start,, +ip-172-31-31-147,1701270373,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,groupby,G1_1e9_1e2_0_1,1701330630.76343,finish,30812,0 +ip-172-31-31-147,1701270373,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,groupby,G1_1e9_1e2_5_0,1701330645.77865,start,, +ip-172-31-31-147,1701270373,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,groupby,G1_1e9_1e2_5_0,1701330648.18685,finish,1,0 +ip-172-31-31-147,1701270373,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,join,J1_1e7_NA_0_0,1701330663.20195,start,, +ip-172-31-31-147,1701270373,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,join,J1_1e7_NA_0_0,1701330684.53032,finish,0,0 +ip-172-31-31-147,1701270373,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,join,J1_1e7_NA_5_0,1701330699.54542,start,, +ip-172-31-31-147,1701270373,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,join,J1_1e7_NA_5_0,1701330700.17275,finish,1,0 +ip-172-31-31-147,1701270373,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,join,J1_1e7_NA_0_1,1701330715.18799,start,, +ip-172-31-31-147,1701270373,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,join,J1_1e7_NA_0_1,1701330715.81832,finish,1,0 +ip-172-31-31-147,1701270373,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,join,J1_1e8_NA_0_0,1701330730.83357,start,, +ip-172-31-31-147,1701270373,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,join,J1_1e8_NA_0_0,1701331031.18587,finish,0,0 +ip-172-31-31-147,1701270373,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,join,J1_1e8_NA_5_0,1701331046.19362,start,, +ip-172-31-31-147,1701270373,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,join,J1_1e8_NA_5_0,1701331046.8236,finish,1,0 +ip-172-31-31-147,1701270373,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,join,J1_1e8_NA_0_1,1701331061.83881,start,, +ip-172-31-31-147,1701270373,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,join,J1_1e8_NA_0_1,1701331062.46537,finish,1,0 +ip-172-31-31-147,1701270373,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,join,J1_1e9_NA_0_0,1701331077.48058,start,, +ip-172-31-31-147,1701270373,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,join,J1_1e9_NA_0_0,1701332733.02768,finish,1,137 + diff --git a/run.conf b/run.conf index 14e0f435..c019b15f 100644 --- a/run.conf +++ b/run.conf @@ -1,7 +1,7 @@ # task, used in init-setup-iteration.R export RUN_TASKS="groupby join" # solution, used in init-setup-iteration.R -export RUN_SOLUTIONS="collapse data.table juliads juliadf dplyr pandas pydatatable spark dask clickhouse polars arrow duckdb duckdb-latest datafusion" +export RUN_SOLUTIONS="collapse data.table juliads juliadf dplyr pandas pydatatable spark dask clickhouse polars R-arrow duckdb duckdb-latest datafusion" # flag to upgrade tools, used in run.sh on init export DO_UPGRADE=false diff --git a/run.sh b/run.sh index 8afc679c..e834a09e 100755 --- a/run.sh +++ b/run.sh @@ -71,8 +71,8 @@ if [[ "$DO_UPGRADE" == true && "$RUN_SOLUTIONS" =~ "h2o" ]]; then ./h2o/upg-h2o. if [[ "$RUN_SOLUTIONS" =~ "h2o" ]]; then ./h2o/ver-h2o.sh; fi; if [[ "$DO_UPGRADE" == true && "$RUN_SOLUTIONS" =~ "polars" ]]; then ./polars/upg-polars.sh; fi; if [[ "$RUN_SOLUTIONS" =~ "polars" ]]; then ./polars/ver-polars.sh; fi; -if [[ "$DO_UPGRADE" == true && "$RUN_SOLUTIONS" =~ "arrow" ]]; then ./arrow/upg-arrow.sh; fi; -if [[ "$RUN_SOLUTIONS" =~ "arrow" ]]; then ./arrow/ver-arrow.sh; fi; +if [[ "$DO_UPGRADE" == true && "$RUN_SOLUTIONS" =~ "R-arrow" ]]; then ./R-arrow/R-upg-arrow.sh; fi; +if [[ "$RUN_SOLUTIONS" =~ "R-arrow" ]]; then ./R-arrow/ver-R-arrow.sh; fi; if [[ "$DO_UPGRADE" == true && "$RUN_SOLUTIONS" == "duckdb" ]]; then ./duckdb/upg-duckdb.sh; fi; if [[ "$RUN_SOLUTIONS" == "duckdb" ]]; then ./duckdb/ver-duckdb.sh; fi; if [[ "$DO_UPGRADE" == true && "$RUN_SOLUTIONS" == "duckdb-latest" ]]; then ./duckdb-latest/setup-duckdb-latest.sh; fi; diff --git a/time.csv b/time.csv index ab100aa0..e98a5e29 100644 --- a/time.csv +++ b/time.csv @@ -6267,3 +6267,220 @@ ip-172-31-31-147,1699289348,1699301553,join,J1_1e9_NA_0_0,1000000000,small inner ip-172-31-31-147,1699289348,1699309384,join,J1_1e9_NA_0_0,1000000000,small inner on int,899999033,9,arrow,13.0.0.1,,inner_join,1,9.688,NA,TRUE,44998904641;45286789554,1.74,NA,FALSE ip-172-31-31-147,1699289348,1699309393,join,J1_1e9_NA_0_0,1000000000,small inner on int,899999033,9,arrow,13.0.0.1,,inner_join,2,6.281,NA,TRUE,44998904641;45286789554,1.965,NA,FALSE ip-172-31-31-147,1699437325,1699447786,join,J1_1e9_NA_0_0,1000000000,small inner on int,899999033,9,data.table,1.14.9,88039186915028ab3c93ccfd8e22c0d1c3534b1a,[.data.table,1,18.94,NA,TRUE,44998904641;45286789554,3.059,NA,FALSE +ip-172-31-31-147,1701270373,1701270376.9167728,groupby,G1_1e7_1e2_0_0,10000000,sum v1 by id1,100,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,0.084,0.144,TRUE,29998789,0.0,,False +ip-172-31-31-147,1701270373,1701270377.0322177,groupby,G1_1e7_1e2_0_0,10000000,sum v1 by id1,100,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,0.083,0.144,TRUE,29998789,0.0,,False +ip-172-31-31-147,1701270373,1701270377.2725596,groupby,G1_1e7_1e2_0_0,10000000,sum v1 by id1:id2,10000,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,0.2,0.145,TRUE,29998789,0.0,,False +ip-172-31-31-147,1701270373,1701270377.4609835,groupby,G1_1e7_1e2_0_0,10000000,sum v1 by id1:id2,10000,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,0.148,0.145,TRUE,29998789,0.0,,False +ip-172-31-31-147,1701270373,1701270377.787234,groupby,G1_1e7_1e2_0_0,10000000,sum v1 mean v3 by id3,100000,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,0.288,0.153,TRUE,29998789;4999719.622,0.0,,False +ip-172-31-31-147,1701270373,1701270378.1396706,groupby,G1_1e7_1e2_0_0,10000000,sum v1 mean v3 by id3,100000,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,0.318,0.154,TRUE,29998789;4999719.622,0.0,,False +ip-172-31-31-147,1701270373,1701270378.2712116,groupby,G1_1e7_1e2_0_0,10000000,mean v1:v3 by id4,100,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,0.092,0.148,TRUE,299.988;799.894;4999.767,0.0,,False +ip-172-31-31-147,1701270373,1701270378.3957121,groupby,G1_1e7_1e2_0_0,10000000,mean v1:v3 by id4,100,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,0.091,0.148,TRUE,299.988;799.894;4999.767,0.0,,False +ip-172-31-31-147,1701270373,1701270378.6309402,groupby,G1_1e7_1e2_0_0,10000000,sum v1:v3 by id6,100000,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,0.196,0.153,TRUE,29998789;79989360;499976651.408,0.001,,False +ip-172-31-31-147,1701270373,1701270378.935985,groupby,G1_1e7_1e2_0_0,10000000,sum v1:v3 by id6,100000,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,0.271,0.152,TRUE,29998789;79989360;499976651.408,0.001,,False +ip-172-31-31-147,1701270373,1701270381.7563255,groupby,G1_1e7_1e2_0_0,10000000,median v3 sd v3 by id4 id5,10000,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,2.777,0.148,TRUE,499920.14;288648.108,0.001,,False +ip-172-31-31-147,1701270373,1701270384.5215893,groupby,G1_1e7_1e2_0_0,10000000,median v3 sd v3 by id4 id5,10000,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,2.724,0.148,TRUE,499920.14;288648.108,0.001,,False +ip-172-31-31-147,1701270373,1701270384.834282,groupby,G1_1e7_1e2_0_0,10000000,max v1 - min v2 by id3,100000,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,0.274,0.154,TRUE,399882,0.0,,False +ip-172-31-31-147,1701270373,1701270385.1283183,groupby,G1_1e7_1e2_0_0,10000000,max v1 - min v2 by id3,100000,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,0.26,0.154,TRUE,399882,0.0,,False +ip-172-31-31-147,1701270373,1701270396.1866038,groupby,G1_1e7_1e2_0_0,10000000,largest two v3 by id6,200000,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,11.021,0.166,TRUE,19700450.588,0.0,,False +ip-172-31-31-147,1701270373,1701270406.9602866,groupby,G1_1e7_1e2_0_0,10000000,largest two v3 by id6,200000,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,10.737,0.172,TRUE,19700450.588,0.0,,False +ip-172-31-31-147,1701270373,1701270408.644578,groupby,G1_1e7_1e2_0_0,10000000,regression v1 v2 by id2 id4,10000,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,1.644,0.157,TRUE,9.839,0.0,,False +ip-172-31-31-147,1701270373,1701270410.1822402,groupby,G1_1e7_1e2_0_0,10000000,regression v1 v2 by id2 id4,10000,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,1.502,0.157,TRUE,9.839,0.0,,False +ip-172-31-31-147,1701270373,1701270414.7781541,groupby,G1_1e7_1e2_0_0,10000000,sum v3 count by id1:id6,10000000,8,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,4.537,0.871,TRUE,499976651.408;10000000,0.019,,False +ip-172-31-31-147,1701270373,1701270419.2975392,groupby,G1_1e7_1e2_0_0,10000000,sum v3 count by id1:id6,10000000,8,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,4.444,1.107,TRUE,499976651.408;10000000,0.019,,False +ip-172-31-31-147,1701270373,1701270437.7671146,groupby,G1_1e7_1e1_0_0,10000000,sum v1 by id1,10,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,0.075,0.146,TRUE,29998597,0.0,,False +ip-172-31-31-147,1701270373,1701270437.8766892,groupby,G1_1e7_1e1_0_0,10000000,sum v1 by id1,10,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,0.076,0.146,TRUE,29998597,0.0,,False +ip-172-31-31-147,1701270373,1701270438.0429707,groupby,G1_1e7_1e1_0_0,10000000,sum v1 by id1:id2,100,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,0.129,0.146,TRUE,29998597,0.0,,False +ip-172-31-31-147,1701270373,1701270438.201231,groupby,G1_1e7_1e1_0_0,10000000,sum v1 by id1:id2,100,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,0.125,0.146,TRUE,29998597,0.0,,False +ip-172-31-31-147,1701270373,1701270439.9478836,groupby,G1_1e7_1e1_0_0,10000000,sum v1 mean v3 by id3,999951,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,1.704,0.19,TRUE,29998597;50000558.524,0.003,,False +ip-172-31-31-147,1701270373,1701270441.6072276,groupby,G1_1e7_1e1_0_0,10000000,sum v1 mean v3 by id3,999951,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,1.623,0.249,TRUE,29998597;50000558.524,0.002,,False +ip-172-31-31-147,1701270373,1701270441.739028,groupby,G1_1e7_1e1_0_0,10000000,mean v1:v3 by id4,10,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,0.091,0.25,TRUE,29.999;79.992;499.981,0.0,,False +ip-172-31-31-147,1701270373,1701270441.8600256,groupby,G1_1e7_1e1_0_0,10000000,mean v1:v3 by id4,10,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,0.087,0.25,TRUE,29.999;79.992;499.981,0.0,,False +ip-172-31-31-147,1701270373,1701270442.7412753,groupby,G1_1e7_1e1_0_0,10000000,sum v1:v3 by id6,999965,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,0.839,0.25,TRUE,29998597;79991898;499980747.01,0.003,,False +ip-172-31-31-147,1701270373,1701270443.6023772,groupby,G1_1e7_1e1_0_0,10000000,sum v1:v3 by id6,999965,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,0.825,0.25,TRUE,29998597;79991898;499980747.01,0.003,,False +ip-172-31-31-147,1701270373,1701270445.99507,groupby,G1_1e7_1e1_0_0,10000000,median v3 sd v3 by id4 id5,100,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,2.35,0.25,TRUE,4999.573;2887.162,0.001,,False +ip-172-31-31-147,1701270373,1701270448.3552601,groupby,G1_1e7_1e1_0_0,10000000,median v3 sd v3 by id4 id5,100,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,2.325,0.251,TRUE,4999.573;2887.162,0.001,,False +ip-172-31-31-147,1701270373,1701270449.8542595,groupby,G1_1e7_1e1_0_0,10000000,max v1 - min v2 by id3,999951,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,1.457,0.266,TRUE,2789316,0.001,,False +ip-172-31-31-147,1701270373,1701270451.3055725,groupby,G1_1e7_1e1_0_0,10000000,max v1 - min v2 by id3,999951,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,1.414,0.266,TRUE,2789316,0.001,,False +ip-172-31-31-147,1701270373,1701270553.2851121,groupby,G1_1e7_1e1_0_0,10000000,largest two v3 by id6,1999500,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,101.937,0.406,TRUE,170016562.642,0.003,,False +ip-172-31-31-147,1701270373,1701270654.7354357,groupby,G1_1e7_1e1_0_0,10000000,largest two v3 by id6,1999500,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,101.393,0.406,TRUE,170016562.642,0.003,,False +ip-172-31-31-147,1701270373,1701270656.277447,groupby,G1_1e7_1e1_0_0,10000000,regression v1 v2 by id2 id4,100,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,1.484,0.252,TRUE,0.001,0.0,,False +ip-172-31-31-147,1701270373,1701270657.756136,groupby,G1_1e7_1e1_0_0,10000000,regression v1 v2 by id2 id4,100,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,1.442,0.252,TRUE,0.001,0.0,,False +ip-172-31-31-147,1701270373,1701270676.3039303,groupby,G1_1e7_1e1_0_0,10000000,sum v3 count by id1:id6,10000000,8,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,18.487,1.441,TRUE,499980747.01;10000000,0.018,,False +ip-172-31-31-147,1701270373,1701270695.8785276,groupby,G1_1e7_1e1_0_0,10000000,sum v3 count by id1:id6,10000000,8,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,19.482,1.463,TRUE,499980747.01;10000000,0.018,,False +ip-172-31-31-147,1701270373,1701270714.5591867,groupby,G1_1e7_2e0_0_0,10000000,sum v1 by id1,2,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,0.09,0.146,TRUE,30000054,0.0,,False +ip-172-31-31-147,1701270373,1701270714.6752782,groupby,G1_1e7_2e0_0_0,10000000,sum v1 by id1,2,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,0.083,0.146,TRUE,30000054,0.0,,False +ip-172-31-31-147,1701270373,1701270714.8593626,groupby,G1_1e7_2e0_0_0,10000000,sum v1 by id1:id2,4,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,0.145,0.146,TRUE,30000054,0.0,,False +ip-172-31-31-147,1701270373,1701270715.0414004,groupby,G1_1e7_2e0_0_0,10000000,sum v1 by id1:id2,4,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,0.148,0.146,TRUE,30000054,0.0,,False +ip-172-31-31-147,1701270373,1701270719.2378724,groupby,G1_1e7_2e0_0_0,10000000,sum v1 mean v3 by id3,4323566,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,4.143,0.275,TRUE,30000054;216107547.389,0.013,,False +ip-172-31-31-147,1701270373,1701270723.387519,groupby,G1_1e7_2e0_0_0,10000000,sum v1 mean v3 by id3,4323566,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,4.084,0.295,TRUE,30000054;216107547.389,0.012,,False +ip-172-31-31-147,1701270373,1701270723.5407832,groupby,G1_1e7_2e0_0_0,10000000,mean v1:v3 by id4,2,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,0.099,0.183,TRUE,6.0;15.997;99.987,0.0,,False +ip-172-31-31-147,1701270373,1701270723.6719239,groupby,G1_1e7_2e0_0_0,10000000,mean v1:v3 by id4,2,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,0.095,0.183,TRUE,6.0;15.997;99.987,0.0,,False +ip-172-31-31-147,1701270373,1701270725.8696494,groupby,G1_1e7_2e0_0_0,10000000,sum v1:v3 by id6,4322014,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,2.138,0.28,TRUE,30000054;79986418;499936032.106,0.017,,False +ip-172-31-31-147,1701270373,1701270728.0564108,groupby,G1_1e7_2e0_0_0,10000000,sum v1:v3 by id6,4322014,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,2.13,0.28,TRUE,30000054;79986418;499936032.106,0.016,,False +ip-172-31-31-147,1701270373,1701270730.4869003,groupby,G1_1e7_2e0_0_0,10000000,median v3 sd v3 by id4 id5,4,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,2.372,0.184,TRUE,199.97;115.489,0.001,,False +ip-172-31-31-147,1701270373,1701270732.9899151,groupby,G1_1e7_2e0_0_0,10000000,median v3 sd v3 by id4 id5,4,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,2.464,0.184,TRUE,199.97;115.489,0.001,,False +ip-172-31-31-147,1701270373,1701270736.615203,groupby,G1_1e7_2e0_0_0,10000000,max v1 - min v2 by id3,4323566,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,3.575,0.266,TRUE,-8263086,0.004,,False +ip-172-31-31-147,1701270373,1701270740.2500696,groupby,G1_1e7_2e0_0_0,10000000,max v1 - min v2 by id3,4323566,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,3.578,0.266,TRUE,-8263086,0.004,,False +ip-172-31-31-147,1701270373,1701271148.4950092,groupby,G1_1e7_2e0_0_0,10000000,largest two v3 by id6,7291480,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,408.177,0.26,TRUE,419079607.0,0.01,,False +ip-172-31-31-147,1701270373,1701271554.5266051,groupby,G1_1e7_2e0_0_0,10000000,largest two v3 by id6,7291480,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,405.965,0.313,TRUE,419079607.0,0.01,,False +ip-172-31-31-147,1701270373,1701271557.4972403,groupby,G1_1e7_2e0_0_0,10000000,regression v1 v2 by id2 id4,4,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,2.903,0.21,TRUE,0.0,0.0,,False +ip-172-31-31-147,1701270373,1701271560.4159844,groupby,G1_1e7_2e0_0_0,10000000,regression v1 v2 by id2 id4,4,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,2.871,0.21,TRUE,0.0,0.0,,False +ip-172-31-31-147,1701270373,1701271612.7523394,groupby,G1_1e7_2e0_0_0,10000000,sum v3 count by id1:id6,10000000,8,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,52.265,1.436,TRUE,499936032.106;10000000,0.018,,False +ip-172-31-31-147,1701270373,1701271664.2474732,groupby,G1_1e7_2e0_0_0,10000000,sum v3 count by id1:id6,10000000,8,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,51.396,1.478,TRUE,499936032.106;10000000,0.018,,False +ip-172-31-31-147,1701270373,1701271682.8363354,groupby,G1_1e7_1e2_0_1,10000000,sum v1 by id1,100,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,0.072,0.143,TRUE,29998789,0.0,,False +ip-172-31-31-147,1701270373,1701271682.9395196,groupby,G1_1e7_1e2_0_1,10000000,sum v1 by id1,100,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,0.07,0.143,TRUE,29998789,0.0,,False +ip-172-31-31-147,1701270373,1701271683.109577,groupby,G1_1e7_1e2_0_1,10000000,sum v1 by id1:id2,10000,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,0.132,0.145,TRUE,29998789,0.0,,False +ip-172-31-31-147,1701270373,1701271683.2747264,groupby,G1_1e7_1e2_0_1,10000000,sum v1 by id1:id2,10000,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,0.131,0.146,TRUE,29998789,0.0,,False +ip-172-31-31-147,1701270373,1701271683.6124837,groupby,G1_1e7_1e2_0_1,10000000,sum v1 mean v3 by id3,100000,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,0.299,0.153,TRUE,29998789;4999719.622,0.0,,False +ip-172-31-31-147,1701270373,1701271683.9482932,groupby,G1_1e7_1e2_0_1,10000000,sum v1 mean v3 by id3,100000,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,0.301,0.154,TRUE,29998789;4999719.622,0.0,,False +ip-172-31-31-147,1701270373,1701271684.0828857,groupby,G1_1e7_1e2_0_1,10000000,mean v1:v3 by id4,100,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,0.095,0.148,TRUE,299.988;799.894;4999.767,0.0,,False +ip-172-31-31-147,1701270373,1701271684.2127635,groupby,G1_1e7_1e2_0_1,10000000,mean v1:v3 by id4,100,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,0.096,0.148,TRUE,299.988;799.894;4999.767,0.0,,False +ip-172-31-31-147,1701270373,1701271684.4555683,groupby,G1_1e7_1e2_0_1,10000000,sum v1:v3 by id6,100000,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,0.203,0.153,TRUE,29998789;79989360;499976651.408,0.001,,False +ip-172-31-31-147,1701270373,1701271684.6869807,groupby,G1_1e7_1e2_0_1,10000000,sum v1:v3 by id6,100000,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,0.197,0.153,TRUE,29998789;79989360;499976651.408,0.001,,False +ip-172-31-31-147,1701270373,1701271687.291535,groupby,G1_1e7_1e2_0_1,10000000,median v3 sd v3 by id4 id5,10000,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,2.56,0.155,TRUE,499920.14;288648.108,0.001,,False +ip-172-31-31-147,1701270373,1701271689.876269,groupby,G1_1e7_1e2_0_1,10000000,median v3 sd v3 by id4 id5,10000,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,2.55,0.155,TRUE,499920.14;288648.108,0.001,,False +ip-172-31-31-147,1701270373,1701271690.187954,groupby,G1_1e7_1e2_0_1,10000000,max v1 - min v2 by id3,100000,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,0.263,0.155,TRUE,399882,0.0,,False +ip-172-31-31-147,1701270373,1701271690.4806156,groupby,G1_1e7_1e2_0_1,10000000,max v1 - min v2 by id3,100000,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,0.258,0.155,TRUE,399882,0.0,,False +ip-172-31-31-147,1701270373,1701271701.6402454,groupby,G1_1e7_1e2_0_1,10000000,largest two v3 by id6,200000,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,11.121,0.169,TRUE,19700450.588,0.0,,False +ip-172-31-31-147,1701270373,1701271712.701363,groupby,G1_1e7_1e2_0_1,10000000,largest two v3 by id6,200000,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,11.025,0.173,TRUE,19700450.588,0.0,,False +ip-172-31-31-147,1701270373,1701271714.142669,groupby,G1_1e7_1e2_0_1,10000000,regression v1 v2 by id2 id4,10000,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,1.401,0.159,TRUE,9.839,0.0,,False +ip-172-31-31-147,1701270373,1701271715.6366665,groupby,G1_1e7_1e2_0_1,10000000,regression v1 v2 by id2 id4,10000,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,1.457,0.159,TRUE,9.839,0.0,,False +ip-172-31-31-147,1701270373,1701271720.089303,groupby,G1_1e7_1e2_0_1,10000000,sum v3 count by id1:id6,10000000,8,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,4.392,0.89,TRUE,499976651.408;10000000,0.019,,False +ip-172-31-31-147,1701270373,1701271724.488934,groupby,G1_1e7_1e2_0_1,10000000,sum v3 count by id1:id6,10000000,8,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,4.325,1.137,TRUE,499976651.408;10000000,0.018,,False +ip-172-31-31-147,1701270373,1701271765.6013942,groupby,G1_1e8_1e2_0_0,100000000,sum v1 by id1,100,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,0.206,0.146,TRUE,299991302,0.0,,False +ip-172-31-31-147,1701270373,1701271765.8140488,groupby,G1_1e8_1e2_0_0,100000000,sum v1 by id1,100,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,0.175,0.146,TRUE,299991302,0.0,,False +ip-172-31-31-147,1701270373,1701271766.2239163,groupby,G1_1e8_1e2_0_0,100000000,sum v1 by id1:id2,10000,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,0.367,0.148,TRUE,299991302,0.0,,False +ip-172-31-31-147,1701270373,1701271766.6120996,groupby,G1_1e8_1e2_0_0,100000000,sum v1 by id1:id2,10000,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,0.349,0.148,TRUE,299991302,0.0,,False +ip-172-31-31-147,1701270373,1701271772.4396286,groupby,G1_1e8_1e2_0_0,100000000,sum v1 mean v3 by id3,1000000,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,5.777,0.193,TRUE,299991302;50001192.355,0.004,,False +ip-172-31-31-147,1701270373,1701271778.0148604,groupby,G1_1e8_1e2_0_0,100000000,sum v1 mean v3 by id3,1000000,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,5.53,0.252,TRUE,299991302;50001192.355,0.004,,False +ip-172-31-31-147,1701270373,1701271778.448956,groupby,G1_1e8_1e2_0_0,100000000,mean v1:v3 by id4,100,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,0.377,0.253,TRUE,299.991;799.978;5000.104,0.001,,False +ip-172-31-31-147,1701270373,1701271778.7874477,groupby,G1_1e8_1e2_0_0,100000000,mean v1:v3 by id4,100,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,0.295,0.253,TRUE,299.991;799.978;5000.104,0.0,,False +ip-172-31-31-147,1701270373,1701271781.9041739,groupby,G1_1e8_1e2_0_0,100000000,sum v1:v3 by id6,1000000,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,3.065,0.253,TRUE,299991302;799978221;5000103937.772,0.004,,False +ip-172-31-31-147,1701270373,1701271785.090914,groupby,G1_1e8_1e2_0_0,100000000,sum v1:v3 by id6,1000000,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,3.139,0.253,TRUE,299991302;799978221;5000103937.772,0.004,,False +ip-172-31-31-147,1701270373,1701271789.215396,groupby,G1_1e8_1e2_0_0,100000000,median v3 sd v3 by id4 id5,10000,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,4.072,0.254,TRUE,500019.998;288668.357,0.001,,False +ip-172-31-31-147,1701270373,1701271793.481325,groupby,G1_1e8_1e2_0_0,100000000,median v3 sd v3 by id4 id5,10000,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,4.217,0.255,TRUE,500019.998;288668.357,0.002,,False +ip-172-31-31-147,1701270373,1701271798.4848406,groupby,G1_1e8_1e2_0_0,100000000,max v1 - min v2 by id3,1000000,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,4.932,0.27,TRUE,3998729,0.001,,False +ip-172-31-31-147,1701270373,1701271803.1239278,groupby,G1_1e8_1e2_0_0,100000000,max v1 - min v2 by id3,1000000,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,4.587,0.27,TRUE,3998729,0.002,,False +ip-172-31-31-147,1701270373,1701271866.7611928,groupby,G1_1e8_1e2_0_0,100000000,largest two v3 by id6,2000000,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,63.586,0.419,TRUE,196996660.391,0.003,,False +ip-172-31-31-147,1701270373,1701271933.098539,groupby,G1_1e8_1e2_0_0,100000000,largest two v3 by id6,2000000,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,66.262,0.422,TRUE,196996660.391,0.003,,False +ip-172-31-31-147,1701270373,1701271939.001758,groupby,G1_1e8_1e2_0_0,100000000,regression v1 v2 by id2 id4,10000,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,5.823,0.249,TRUE,1.007,0.0,,False +ip-172-31-31-147,1701270373,1701271944.2145886,groupby,G1_1e8_1e2_0_0,100000000,regression v1 v2 by id2 id4,10000,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,5.156,0.251,TRUE,1.007,0.0,,False +ip-172-31-31-147,1701270373,1701272135.447855,groupby,G1_1e8_1e2_0_0,100000000,sum v3 count by id1:id6,100000000,8,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,190.979,12.119,TRUE,5000103937.772;100000000,0.178,,False +ip-172-31-31-147,1701270373,1701272321.6028104,groupby,G1_1e8_1e2_0_0,100000000,sum v3 count by id1:id6,100000000,8,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,185.667,12.438,TRUE,5000103937.772;100000000,0.171,,False +ip-172-31-31-147,1701270373,1701272347.1373367,groupby,G1_1e8_1e1_0_0,100000000,sum v1 by id1,10,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,0.199,0.145,TRUE,300012466,0.0,,False +ip-172-31-31-147,1701270373,1701272347.352836,groupby,G1_1e8_1e1_0_0,100000000,sum v1 by id1,10,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,0.173,0.145,TRUE,300012466,0.0,,False +ip-172-31-31-147,1701270373,1701272347.696981,groupby,G1_1e8_1e1_0_0,100000000,sum v1 by id1:id2,100,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,0.303,0.145,TRUE,300012466,0.0,,False +ip-172-31-31-147,1701270373,1701272348.0351849,groupby,G1_1e8_1e1_0_0,100000000,sum v1 by id1:id2,100,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,0.3,0.146,TRUE,300012466,0.0,,False +ip-172-31-31-147,1701270373,1701272380.678586,groupby,G1_1e8_1e1_0_0,100000000,sum v1 mean v3 by id3,9999602,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,32.576,0.446,TRUE,300012466;499941400.876,0.023,,False +ip-172-31-31-147,1701270373,1701272412.917558,groupby,G1_1e8_1e1_0_0,100000000,sum v1 mean v3 by id3,9999602,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,32.137,0.448,TRUE,300012466;499941400.876,0.022,,False +ip-172-31-31-147,1701270373,1701272413.4411626,groupby,G1_1e8_1e1_0_0,100000000,mean v1:v3 by id4,10,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,0.44,0.159,TRUE,30.001;80.008;499.958,0.0,,False +ip-172-31-31-147,1701270373,1701272413.803133,groupby,G1_1e8_1e1_0_0,100000000,mean v1:v3 by id4,10,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,0.319,0.159,TRUE,30.001;80.008;499.958,0.0,,False +ip-172-31-31-147,1701270373,1701272431.17442,groupby,G1_1e8_1e1_0_0,100000000,sum v1:v3 by id6,9999538,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,17.295,0.458,TRUE,300012466;800079612;4999575436.012,0.028,,False +ip-172-31-31-147,1701270373,1701272448.5870707,groupby,G1_1e8_1e1_0_0,100000000,sum v1:v3 by id6,9999538,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,17.314,0.46,TRUE,300012466;800079612;4999575436.012,0.027,,False +ip-172-31-31-147,1701270373,1701272453.278708,groupby,G1_1e8_1e1_0_0,100000000,median v3 sd v3 by id4 id5,100,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,4.626,0.162,TRUE,4999.826;2886.819,0.001,,False +ip-172-31-31-147,1701270373,1701272457.872118,groupby,G1_1e8_1e1_0_0,100000000,median v3 sd v3 by id4 id5,100,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,4.534,0.163,TRUE,4999.826;2886.819,0.001,,False +ip-172-31-31-147,1701270373,1701272488.1121628,groupby,G1_1e8_1e1_0_0,100000000,max v1 - min v2 by id3,9999602,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,30.176,0.388,TRUE,27890093,0.007,,False +ip-172-31-31-147,1701270373,1701272515.124372,groupby,G1_1e8_1e1_0_0,100000000,max v1 - min v2 by id3,9999602,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,26.928,0.389,TRUE,27890093,0.007,,False +ip-172-31-31-147,1701270373,1701273055.48205,groupby,G1_1e8_1e1_0_0,100000000,largest two v3 by id6,19994518,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,540.245,0.959,TRUE,1700010092.167,0.024,,False +ip-172-31-31-147,1701270373,1701273588.4104173,groupby,G1_1e8_1e1_0_0,100000000,largest two v3 by id6,19994518,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,532.832,1.042,TRUE,1700010092.167,0.024,,False +ip-172-31-31-147,1701270373,1701273593.750396,groupby,G1_1e8_1e1_0_0,100000000,regression v1 v2 by id2 id4,100,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,5.261,0.745,TRUE,0.0,0.0,,False +ip-172-31-31-147,1701270373,1701273598.922429,groupby,G1_1e8_1e1_0_0,100000000,regression v1 v2 by id2 id4,100,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,5.113,0.747,TRUE,0.0,0.0,,False +ip-172-31-31-147,1701270373,1701279562.9120233,groupby,G1_1e8_2e0_0_0,100000000,sum v1 by id1,2,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,0.184,0.147,TRUE,299988126,0.0,,False +ip-172-31-31-147,1701270373,1701279563.1294274,groupby,G1_1e8_2e0_0_0,100000000,sum v1 by id1,2,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,0.178,0.147,TRUE,299988126,0.0,,False +ip-172-31-31-147,1701270373,1701279563.4639542,groupby,G1_1e8_2e0_0_0,100000000,sum v1 by id1:id2,4,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,0.295,0.148,TRUE,299988126,0.0,,False +ip-172-31-31-147,1701270373,1701279563.7826111,groupby,G1_1e8_2e0_0_0,100000000,sum v1 by id1:id2,4,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,0.283,0.148,TRUE,299988126,0.0,,False +ip-172-31-31-147,1701270373,1701279627.0953436,groupby,G1_1e8_2e0_0_0,100000000,sum v1 mean v3 by id3,43233017,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,63.135,1.44,TRUE,299988126;2161776167.331,0.134,,False +ip-172-31-31-147,1701270373,1701279690.6538594,groupby,G1_1e8_2e0_0_0,100000000,sum v1 mean v3 by id3,43233017,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,63.251,1.442,TRUE,299988126;2161776167.331,0.131,,False +ip-172-31-31-147,1701270373,1701279691.9334166,groupby,G1_1e8_2e0_0_0,100000000,mean v1:v3 by id4,2,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,1.091,0.153,TRUE,6.0;15.999;100.001,0.0,,False +ip-172-31-31-147,1701270373,1701279692.2921662,groupby,G1_1e8_2e0_0_0,100000000,mean v1:v3 by id4,2,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,0.319,0.153,TRUE,6.0;15.999;100.001,0.0,,False +ip-172-31-31-147,1701270373,1701279730.054133,groupby,G1_1e8_2e0_0_0,100000000,sum v1:v3 by id6,43238066,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,37.547,1.443,TRUE,299988126;799952220;5000051370.457,0.17,,False +ip-172-31-31-147,1701270373,1701279767.4784524,groupby,G1_1e8_2e0_0_0,100000000,sum v1:v3 by id6,43238066,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,37.112,1.443,TRUE,299988126;799952220;5000051370.457,0.17,,False +ip-172-31-31-147,1701270373,1701279774.8029108,groupby,G1_1e8_2e0_0_0,100000000,median v3 sd v3 by id4 id5,4,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,7.179,0.154,TRUE,199.998;115.468,0.001,,False +ip-172-31-31-147,1701270373,1701279781.85363,groupby,G1_1e8_2e0_0_0,100000000,median v3 sd v3 by id4 id5,4,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,7.009,0.155,TRUE,199.998;115.468,0.001,,False +ip-172-31-31-147,1701270373,1701279834.7651896,groupby,G1_1e8_2e0_0_0,100000000,max v1 - min v2 by id3,43233017,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,52.82,1.125,TRUE,-82715914,0.04,,False +ip-172-31-31-147,1701270373,1701279889.3444054,groupby,G1_1e8_2e0_0_0,100000000,max v1 - min v2 by id3,43233017,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,54.378,1.126,TRUE,-82715914,0.041,,False +ip-172-31-31-147,1701270373,1701286797.1623476,groupby,G1_1e8_1e2_0_1,100000000,sum v1 by id1,100,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,0.192,0.146,TRUE,299991302,0.0,,False +ip-172-31-31-147,1701270373,1701286797.3755906,groupby,G1_1e8_1e2_0_1,100000000,sum v1 by id1,100,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,0.173,0.146,TRUE,299991302,0.0,,False +ip-172-31-31-147,1701270373,1701286797.7259943,groupby,G1_1e8_1e2_0_1,100000000,sum v1 by id1:id2,10000,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,0.307,0.147,TRUE,299991302,0.0,,False +ip-172-31-31-147,1701270373,1701286798.0494366,groupby,G1_1e8_1e2_0_1,100000000,sum v1 by id1:id2,10000,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,0.286,0.147,TRUE,299991302,0.0,,False +ip-172-31-31-147,1701270373,1701286803.5212162,groupby,G1_1e8_1e2_0_1,100000000,sum v1 mean v3 by id3,1000000,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,5.423,0.192,TRUE,299991302;50001192.355,0.003,,False +ip-172-31-31-147,1701270373,1701286809.0156846,groupby,G1_1e8_1e2_0_1,100000000,sum v1 mean v3 by id3,1000000,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,5.451,0.252,TRUE,299991302;50001192.355,0.003,,False +ip-172-31-31-147,1701270373,1701286809.4124792,groupby,G1_1e8_1e2_0_1,100000000,mean v1:v3 by id4,100,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,0.341,0.253,TRUE,299.991;799.978;5000.104,0.0,,False +ip-172-31-31-147,1701270373,1701286809.7314208,groupby,G1_1e8_1e2_0_1,100000000,mean v1:v3 by id4,100,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,0.278,0.253,TRUE,299.991;799.978;5000.104,0.0,,False +ip-172-31-31-147,1701270373,1701286812.7587445,groupby,G1_1e8_1e2_0_1,100000000,sum v1:v3 by id6,1000000,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,2.977,0.254,TRUE,299991302;799978221;5000103937.772,0.004,,False +ip-172-31-31-147,1701270373,1701286815.7217731,groupby,G1_1e8_1e2_0_1,100000000,sum v1:v3 by id6,1000000,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,2.917,0.254,TRUE,299991302;799978221;5000103937.772,0.004,,False +ip-172-31-31-147,1701270373,1701286819.8786478,groupby,G1_1e8_1e2_0_1,100000000,median v3 sd v3 by id4 id5,10000,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,4.109,0.255,TRUE,500019.998;288668.357,0.001,,False +ip-172-31-31-147,1701270373,1701286823.9602194,groupby,G1_1e8_1e2_0_1,100000000,median v3 sd v3 by id4 id5,10000,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,4.035,0.255,TRUE,500019.998;288668.357,0.001,,False +ip-172-31-31-147,1701270373,1701286828.7490864,groupby,G1_1e8_1e2_0_1,100000000,max v1 - min v2 by id3,1000000,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,4.719,0.269,TRUE,3998729,0.001,,False +ip-172-31-31-147,1701270373,1701286833.2757466,groupby,G1_1e8_1e2_0_1,100000000,max v1 - min v2 by id3,1000000,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,4.48,0.269,TRUE,3998729,0.001,,False +ip-172-31-31-147,1701270373,1701286899.933484,groupby,G1_1e8_1e2_0_1,100000000,largest two v3 by id6,2000000,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,66.59,0.416,TRUE,196996660.391,0.003,,False +ip-172-31-31-147,1701270373,1701286964.0528538,groupby,G1_1e8_1e2_0_1,100000000,largest two v3 by id6,2000000,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,64.045,0.418,TRUE,196996660.391,0.003,,False +ip-172-31-31-147,1701270373,1701286968.5076516,groupby,G1_1e8_1e2_0_1,100000000,regression v1 v2 by id2 id4,10000,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,4.375,0.246,TRUE,1.007,0.0,,False +ip-172-31-31-147,1701270373,1701286972.9494538,groupby,G1_1e8_1e2_0_1,100000000,regression v1 v2 by id2 id4,10000,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,4.372,0.248,TRUE,1.007,0.0,,False +ip-172-31-31-147,1701270373,1701287165.7388566,groupby,G1_1e8_1e2_0_1,100000000,sum v3 count by id1:id6,100000000,8,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,192.555,11.876,TRUE,5000103937.772;100000000,0.172,,False +ip-172-31-31-147,1701270373,1701287349.8430133,groupby,G1_1e8_1e2_0_1,100000000,sum v3 count by id1:id6,100000000,8,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,183.614,12.752,TRUE,5000103937.772;100000000,0.173,,False +ip-172-31-31-147,1701270373,1701287441.070584,groupby,G1_1e9_1e2_0_0,1000000000,sum v1 by id1,100,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,1.526,0.159,TRUE,2999924714,0.0,,False +ip-172-31-31-147,1701270373,1701287442.2304997,groupby,G1_1e9_1e2_0_0,1000000000,sum v1 by id1,100,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,1.104,0.162,TRUE,2999924714,0.0,,False +ip-172-31-31-147,1701270373,1701287444.9488778,groupby,G1_1e9_1e2_0_0,1000000000,sum v1 by id1:id2,10000,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,2.662,0.166,TRUE,2999924714,0.001,,False +ip-172-31-31-147,1701270373,1701287447.4995255,groupby,G1_1e9_1e2_0_0,1000000000,sum v1 by id1:id2,10000,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,2.481,0.168,TRUE,2999924714,0.0,,False +ip-172-31-31-147,1701270373,1701287628.253746,groupby,G1_1e9_1e2_0_0,1000000000,sum v1 mean v3 by id3,10000000,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,180.651,0.472,TRUE,2999924714;499986249.525,0.031,,False +ip-172-31-31-147,1701270373,1701287868.545322,groupby,G1_1e9_1e2_0_0,1000000000,sum v1 mean v3 by id3,10000000,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,240.14,0.477,TRUE,2999924714;499986249.525,0.032,,False +ip-172-31-31-147,1701270373,1701287878.5984879,groupby,G1_1e9_1e2_0_0,1000000000,mean v1:v3 by id4,100,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,9.912,0.191,TRUE,299.992;799.999;4999.87,0.0,,False +ip-172-31-31-147,1701270373,1701287881.5974123,groupby,G1_1e9_1e2_0_0,1000000000,mean v1:v3 by id4,100,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,2.909,0.195,TRUE,299.992;799.999;4999.87,0.0,,False +ip-172-31-31-147,1701270373,1701287933.6493325,groupby,G1_1e9_1e2_0_0,1000000000,sum v1:v3 by id6,10000000,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,51.903,0.497,TRUE,2999924714;7999992854;49998699477.823,0.038,,False +ip-172-31-31-147,1701270373,1701287984.4997504,groupby,G1_1e9_1e2_0_0,1000000000,sum v1:v3 by id6,10000000,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,50.687,0.501,TRUE,2999924714;7999992854;49998699477.823,0.029,,False +ip-172-31-31-147,1701270373,1701288052.1076963,groupby,G1_1e9_1e2_0_0,1000000000,median v3 sd v3 by id4 id5,10000,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,67.441,0.216,TRUE,499981.751;288669.152,0.002,,False +ip-172-31-31-147,1701270373,1701288117.1575553,groupby,G1_1e9_1e2_0_0,1000000000,median v3 sd v3 by id4 id5,10000,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,64.918,0.232,TRUE,499981.751;288669.152,0.001,,False +ip-172-31-31-147,1701270373,1701288286.0172052,groupby,G1_1e9_1e2_0_0,1000000000,max v1 - min v2 by id3,10000000,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,168.721,0.458,TRUE,39987226,0.01,,False +ip-172-31-31-147,1701270373,1701288452.3057954,groupby,G1_1e9_1e2_0_0,1000000000,max v1 - min v2 by id3,10000000,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,166.108,0.46,TRUE,39987226,0.01,,False +ip-172-31-31-147,1701270373,1701289103.1724594,groupby,G1_1e9_1e2_0_0,1000000000,largest two v3 by id6,20000000,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,650.675,1.21,TRUE,1970001789.633,0.024,,False +ip-172-31-31-147,1701270373,1701289732.0969014,groupby,G1_1e9_1e2_0_0,1000000000,largest two v3 by id6,20000000,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,628.72,1.199,TRUE,1970001789.633,0.024,,False +ip-172-31-31-147,1701270373,1701289950.0646155,groupby,G1_1e9_1e2_0_0,1000000000,regression v1 v2 by id2 id4,10000,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,217.772,0.72,TRUE,0.098,0.0,,False +ip-172-31-31-147,1701270373,1701290168.7458365,groupby,G1_1e9_1e2_0_0,1000000000,regression v1 v2 by id2 id4,10000,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,218.409,0.721,TRUE,0.098,0.0,,False +ip-172-31-31-147,1701270373,1701298256.1023448,groupby,G1_1e9_1e1_0_0,1000000000,sum v1 by id1,10,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,1.598,0.16,TRUE,2999933732,0.0,,False +ip-172-31-31-147,1701270373,1701298257.3023472,groupby,G1_1e9_1e1_0_0,1000000000,sum v1 by id1,10,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,1.123,0.162,TRUE,2999933732,0.001,,False +ip-172-31-31-147,1701270373,1701298259.6293845,groupby,G1_1e9_1e1_0_0,1000000000,sum v1 by id1:id2,100,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,2.224,0.165,TRUE,2999933732,0.0,,False +ip-172-31-31-147,1701270373,1701298261.7867439,groupby,G1_1e9_1e1_0_0,1000000000,sum v1 by id1:id2,100,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,2.093,0.169,TRUE,2999933732,0.0,,False +ip-172-31-31-147,1701270373,1701309070.1026845,groupby,G1_1e9_2e0_0_0,1000000000,sum v1 by id1,2,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,1.304,0.159,TRUE,2999997259,0.0,,False +ip-172-31-31-147,1701270373,1701309071.2744927,groupby,G1_1e9_2e0_0_0,1000000000,sum v1 by id1,2,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,1.112,0.162,TRUE,2999997259,0.0,,False +ip-172-31-31-147,1701270373,1701309073.4942474,groupby,G1_1e9_2e0_0_0,1000000000,sum v1 by id1:id2,4,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,2.149,0.165,TRUE,2999997259,0.0,,False +ip-172-31-31-147,1701270373,1701309075.7645607,groupby,G1_1e9_2e0_0_0,1000000000,sum v1 by id1:id2,4,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,2.202,0.168,TRUE,2999997259,0.0,,False +ip-172-31-31-147,1701270373,1701319887.266315,groupby,G1_1e9_1e2_0_1,1000000000,sum v1 by id1,100,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,1.37,0.162,TRUE,2999924714,0.0,,False +ip-172-31-31-147,1701270373,1701319888.4063828,groupby,G1_1e9_1e2_0_1,1000000000,sum v1 by id1,100,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,1.084,0.166,TRUE,2999924714,0.0,,False +ip-172-31-31-147,1701270373,1701319890.917585,groupby,G1_1e9_1e2_0_1,1000000000,sum v1 by id1:id2,10000,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,2.457,0.169,TRUE,2999924714,0.0,,False +ip-172-31-31-147,1701270373,1701319893.3536305,groupby,G1_1e9_1e2_0_1,1000000000,sum v1 by id1:id2,10000,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,2.37,0.172,TRUE,2999924714,0.0,,False +ip-172-31-31-147,1701270373,1701320072.757093,groupby,G1_1e9_1e2_0_1,1000000000,sum v1 mean v3 by id3,10000000,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,179.307,0.476,TRUE,2999924714;499986249.525,0.032,,False +ip-172-31-31-147,1701270373,1701320223.710797,groupby,G1_1e9_1e2_0_1,1000000000,sum v1 mean v3 by id3,10000000,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,150.808,0.481,TRUE,2999924714;499986249.525,0.031,,False +ip-172-31-31-147,1701270373,1701320247.6839767,groupby,G1_1e9_1e2_0_1,1000000000,mean v1:v3 by id4,100,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,23.845,0.196,TRUE,299.992;799.999;4999.87,0.0,,False +ip-172-31-31-147,1701270373,1701320250.1565726,groupby,G1_1e9_1e2_0_1,1000000000,mean v1:v3 by id4,100,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,2.392,0.199,TRUE,299.992;799.999;4999.87,0.0,,False +ip-172-31-31-147,1701270373,1701320316.5739267,groupby,G1_1e9_1e2_0_1,1000000000,sum v1:v3 by id6,10000000,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,66.3,0.5,TRUE,2999924714;7999992854;49998699477.823,0.038,,False +ip-172-31-31-147,1701270373,1701320374.1152494,groupby,G1_1e9_1e2_0_1,1000000000,sum v1:v3 by id6,10000000,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,57.394,0.503,TRUE,2999924714;7999992854;49998699477.823,0.028,,False +ip-172-31-31-147,1701270373,1701320442.3258793,groupby,G1_1e9_1e2_0_1,1000000000,median v3 sd v3 by id4 id5,10000,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,68.084,0.219,TRUE,499981.751;288669.152,0.002,,False +ip-172-31-31-147,1701270373,1701320507.3586907,groupby,G1_1e9_1e2_0_1,1000000000,median v3 sd v3 by id4 id5,10000,4,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,64.9,0.233,TRUE,499981.751;288669.152,0.002,,False +ip-172-31-31-147,1701270373,1701320627.4618857,groupby,G1_1e9_1e2_0_1,1000000000,max v1 - min v2 by id3,10000000,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,119.926,0.456,TRUE,39987226,0.007,,False +ip-172-31-31-147,1701270373,1701320770.1905253,groupby,G1_1e9_1e2_0_1,1000000000,max v1 - min v2 by id3,10000000,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,142.538,0.458,TRUE,39987226,0.007,,False +ip-172-31-31-147,1701270373,1701321406.5192816,groupby,G1_1e9_1e2_0_1,1000000000,largest two v3 by id6,20000000,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,636.128,1.21,TRUE,1970001789.633,0.023,,False +ip-172-31-31-147,1701270373,1701322023.264214,groupby,G1_1e9_1e2_0_1,1000000000,largest two v3 by id6,20000000,2,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,616.519,1.183,TRUE,1970001789.633,0.024,,False +ip-172-31-31-147,1701270373,1701322228.1617336,groupby,G1_1e9_1e2_0_1,1000000000,regression v1 v2 by id2 id4,10000,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,1,204.648,0.889,TRUE,0.098,0.0,,False +ip-172-31-31-147,1701270373,1701322434.9540725,groupby,G1_1e9_1e2_0_1,1000000000,regression v1 v2 by id2 id4,10000,3,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.groupby,2,206.537,0.888,TRUE,0.098,0.0,,False +ip-172-31-31-147,1701270373,1701330673.969423,join,J1_1e7_NA_0_0,10000000,small inner on int,8998860,9,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.merge,1,0.418,3.259,TRUE,450015153.577;347720187.395,0.023,,False +ip-172-31-31-147,1701270373,1701330674.3800976,join,J1_1e7_NA_0_0,10000000,small inner on int,8998860,9,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.merge,2,0.367,3.428,TRUE,450015153.577;347720187.395,0.023,,False +ip-172-31-31-147,1701270373,1701330674.964119,join,J1_1e7_NA_0_0,10000000,medium inner on int,8998412,11,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.merge,1,0.526,3.499,TRUE,449954076.026;449999844.937,0.022,,False +ip-172-31-31-147,1701270373,1701330675.514599,join,J1_1e7_NA_0_0,10000000,medium inner on int,8998412,11,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.merge,2,0.508,3.481,TRUE,449954076.026;449999844.937,0.021,,False +ip-172-31-31-147,1701270373,1701330676.0076911,join,J1_1e7_NA_0_0,10000000,medium outer on int,10000000,11,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.merge,1,0.402,3.531,TRUE,500043740.752;449999844.937,0.058,,False +ip-172-31-31-147,1701270373,1701330676.4824815,join,J1_1e7_NA_0_0,10000000,medium outer on int,10000000,11,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.merge,2,0.395,3.456,TRUE,500043740.752;449999844.937,0.06,,False +ip-172-31-31-147,1701270373,1701330677.1112347,join,J1_1e7_NA_0_0,10000000,medium inner on factor,8998412,11,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.merge,1,0.573,3.774,TRUE,449954076.026;449999844.937,0.021,,False +ip-172-31-31-147,1701270373,1701330677.7000687,join,J1_1e7_NA_0_0,10000000,medium inner on factor,8998412,11,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.merge,2,0.548,3.842,TRUE,449954076.026;449999844.937,0.021,,False +ip-172-31-31-147,1701270373,1701330681.05543,join,J1_1e7_NA_0_0,10000000,big inner on int,9000000,13,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.merge,1,3.301,6.838,TRUE,450032091.841;449860428.616,0.021,,False +ip-172-31-31-147,1701270373,1701330684.104251,join,J1_1e7_NA_0_0,10000000,big inner on int,9000000,13,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.merge,2,3.007,7.982,TRUE,450032091.841;449860428.616,0.021,,False +ip-172-31-31-147,1701270373,1701330842.793844,join,J1_1e8_NA_0_0,100000000,small inner on int,89997128,9,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.merge,1,3.189,29.063,TRUE,4499430832.39;4388703871.229,0.239,,False +ip-172-31-31-147,1701270373,1701330845.8377287,join,J1_1e8_NA_0_0,100000000,small inner on int,89997128,9,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.merge,2,2.793,30.591,TRUE,4499430832.39;4388703871.229,0.221,,False +ip-172-31-31-147,1701270373,1701330849.3190267,join,J1_1e8_NA_0_0,100000000,medium inner on int,89995511,11,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.merge,1,3.211,32.915,TRUE,4499423746.365;4507751463.252,0.22,,False +ip-172-31-31-147,1701270373,1701330852.6214228,join,J1_1e8_NA_0_0,100000000,medium inner on int,89995511,11,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.merge,2,3.039,33.698,TRUE,4499423746.365;4507751463.252,0.228,,False +ip-172-31-31-147,1701270373,1701330856.895314,join,J1_1e8_NA_0_0,100000000,medium outer on int,100000000,11,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.merge,1,3.425,31.78,TRUE,4999542477.919;4507751463.252,0.794,,False +ip-172-31-31-147,1701270373,1701330861.6940563,join,J1_1e8_NA_0_0,100000000,medium outer on int,100000000,11,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.merge,2,3.974,32.036,TRUE,4999542477.919;4507751463.252,0.77,,False +ip-172-31-31-147,1701270373,1701330865.5744674,join,J1_1e8_NA_0_0,100000000,medium inner on factor,89995511,11,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.merge,1,3.599,36.495,TRUE,4499423746.365;4507751463.252,0.223,,False +ip-172-31-31-147,1701270373,1701330869.3830082,join,J1_1e8_NA_0_0,100000000,medium inner on factor,89995511,11,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.merge,2,3.544,36.732,TRUE,4499423746.365;4507751463.252,0.222,,False +ip-172-31-31-147,1701270373,1701330945.315101,join,J1_1e8_NA_0_0,100000000,big inner on int,90000000,13,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.merge,1,75.662,58.546,TRUE,4499590098.078;4499913694.243,0.21,,False +ip-172-31-31-147,1701270373,1701331022.3666933,join,J1_1e8_NA_0_0,100000000,big inner on int,90000000,13,dask,2023.10.0,3a8f8248884f8c69b76d610d8a44d9b6501d7a7a,.merge,2,76.789,63.037,TRUE,4499590098.078;4499913694.243,0.21,,False +