From c2a694a0480cbcbe051902c6ae97bb6dd4519239 Mon Sep 17 00:00:00 2001 From: Nikolas Siccha Date: Wed, 15 Oct 2025 13:14:55 +0200 Subject: [PATCH] update docs --- docs/_quarto.yml | 10 +- docs/slic/crowdsource.qmd | 286 ++++++++++++++++++++++++++++++++++++++ docs/slic/golf.qmd | 14 +- docs/slic/isba-2024.qmd | 28 ++-- 4 files changed, 326 insertions(+), 12 deletions(-) create mode 100644 docs/slic/crowdsource.qmd diff --git a/docs/_quarto.yml b/docs/_quarto.yml index 4f41c9c..96a5a47 100644 --- a/docs/_quarto.yml +++ b/docs/_quarto.yml @@ -8,12 +8,16 @@ website: left: - text: "Overview" href: index.qmd - - text: "Performance" - href: performance.qmd - text: "Implementations" href: implementations.qmd - text: "@slic" href: slic/ + - text: "Golf" + href: slic/golf.qmd + - text: "ISBA-2024" + href: slic/isba-2024.qmd + - text: "Crowdsourcing" + href: slic/crowdsource.qmd - icon: github href: https://github.com/nsiccha/StanBlocks.jl format: @@ -21,7 +25,7 @@ format: page-layout: full toc: true code-tools: true - code-overflow: wrap + code-overflow: scroll code-line-numbers: true include-in-header: text: | diff --git a/docs/slic/crowdsource.qmd b/docs/slic/crowdsource.qmd new file mode 100644 index 0000000..ad4cd38 --- /dev/null +++ b/docs/slic/crowdsource.qmd @@ -0,0 +1,286 @@ +--- +title: "Reimplementing the Stan models from https://github.com/seongwoohan/crowdsource-computo-bayes" +--- + +The full Julia code for this notebook can be accessed via the top right corner (` Code`). + +The Julia packages needed to reproduce this document are [`StanBlocks.jl`](https://github.com/nsiccha/StanBlocks.jl) (for the model generation) and [`QuartoComponents.jl`](https://github.com/nsiccha/QuartoComponents.jl) (for the "pretty" printing). +Both packages have to be installed from the latest `main` branch (as of Oct 14th 2025). + +## StanBlocks.jl implementation + +The function and model definitions below make use of + +* higher-order user defined functions, +* variadic functions, +* "Post-hoc model adjustment via model component replacement and splicing". + +For an explanation of the first two, see [the ISBA 2024 example](isba-2024.qmd). + +### Post-hoc model adjustment via model component replacement and splicing + +TO DO: elaborate + + +## Full Julia + StanBlocks.jl code to define the models + +The following generates all models: + +```julia +using StanBlocks + +@deffun begin + increment_at(rv0, idxs::int[n], arg1) = begin + rv = rv0 + for i in 1:n + idx = idxs[i] + rv[idx] += arg1[idx] + end + rv + end + increment_at(f, rv0, idxs::int[n], arg1, arg2) = begin + rv = rv0 + for i in 1:n + idx = idxs[i] + rv[idx] += f(arg1[idx], arg2[idx]) + end + rv + end + vote_count(rating, item, rater, I, J) = increment_at( + rep_array(0, J + 1), increment_at(rep_array(1, I), item, rating), rep_array(1, I) + ) + rater_count(rating, rater, J) = increment_at(rep_array(0, J), rater, rating) + lte_sim_rng(x, y) = if x == y + bernoulli_rng(.5) + else + x < y + end + lte_sim_rng(x::anything[_], y) = jbroadcasted(lte_sim_rng, x, y) + pos_probs(lambda, delta, alpha_sens, beta_, item, rater) = ( + lambda[item] + (1 - lambda[item]) * inv_logit(delta[item] * (alpha_sens[rater] - beta_[item])) + ) + neg_probs(lambda, delta, alpha_spec, beta_, item, rater) = ( + (1 - lambda[item]) * inv_logit(-delta[item] * (alpha_spec[rater] - beta_[item])) + ) + no_good_name_lpmfs(rating, I, item, rater, pi_, alpha_spec, alpha_sens, beta_, delta, lambda) = jbroadcasted( + log_sum_exp, + increment_at(bernoulli_lpmf, rep_vector(log(pi_), I), item, rating, pos_probs(lambda, delta, alpha_sens, beta_, item, rater)), + increment_at(bernoulli_lpmf, rep_vector(log1m(pi_), I), item, rating, neg_probs(lambda, delta, alpha_spec, beta_, item, rater)) + ) + no_good_name_lpmf(args...) = sum(no_good_name_lpmfs(args...)) + no_good_name_rng(I, item, rater, pi_, alpha_spec, alpha_sens, beta_, delta, lambda) = begin + z_sim = to_vector(bernoulli_rng(rep_vector(pi_, I))) + bernoulli_rng( + z_sim[item] .* pos_probs(lambda, delta, alpha_sens, beta_, item, rater) + + (1 - z_sim[item]) .* neg_probs(lambda, delta, alpha_spec, beta_, item, rater) + ) + end + StanBlocks.stan.log_sum_exp(::real, ::real)::real +end + +mock_data = (;I=1,J=1,item=[1],rater=[1],rating=[1]) + +full = @slic mock_data begin + votes_data = vote_count(rating, item, rater, I, J) + rater_data = rater_count(rating, rater, J) + pi_ ~ beta(2, 2) + alpha_spec ~ normal(2, 2; n=J) + alpha_sens ~ normal(1, 2; n=J, lower=-alpha_spec) + beta_ ~ normal(0, 1; n=I) + delta ~ lognormal(0, 0.25; n=I) + lambda ~ beta(2, 2; n=I) + rating ~ no_good_name(I, item, rater, pi_, alpha_spec, alpha_sens, beta_, delta, lambda) + + rating_sim = no_good_name_rng(I, item, rater, pi_, alpha_spec, alpha_sens, beta_, delta, lambda) + votes_sim = vote_count(rating_sim, item, rater, I, J) + votes_sim_lt_data = lte_sim_rng(votes_sim, votes_data) + rater_sim = rater_count(rating_sim, rater, J) + rater_sim_lt_data = lte_sim_rng(rater_sim, rater_data) +end + +a_transform = quote + lambda = rep_vector(0, I) +end +b_transform = quote + delta = rep_vector(1, I) +end +c_transform = quote + beta_ = rep_vector(0, I) +end +d_transform = quote + alpha_acc ~ normal(1, 2; n=J, lower=0) + alpha_sens = alpha_acc + alpha_spec = alpha_acc +end +de_transform = quote + alpha_acc_scalar ~ normal(1, 2; lower=0) + alpha_sens = rep_vector(alpha_acc_scalar, J) + alpha_spec = rep_vector(alpha_acc_scalar, J) +end +e_transform = quote + alpha_spec_scalar ~ normal(2, 2) + alpha_sens_scalar ~ normal(1, 2; lower=-alpha_spec_scalar) + alpha_spec = rep_vector(alpha_spec_scalar, J) + alpha_sens = rep_vector(alpha_sens_scalar, J) +end + +a = full(a_transform) +ab = a(b_transform) +abc = ab(c_transform) +abcd = abc(d_transform) +abcde = abc(de_transform) +abce = abc(e_transform) +abd = ab(d_transform) +abde = ab(de_transform) +ac = a(c_transform) +acd = ac(d_transform) +ad = a(d_transform) +b = full(b_transform) +bc = b(c_transform) +bcd = bc(d_transform) +bd = b(d_transform) +c = full(c_transform) +cd_ = c(d_transform) +d = full(d_transform) + +posteriors = (; + full, + a, ab, abc, abcd, abcde, abce, abd, abde, ac, acd, ad, + b, bc, bcd, bd, + c, cd=cd_, + d, +) +``` + +## Generated Stan code + + +```{julia} +using StanBlocks, QuartoComponents + +@deffun begin + increment_at(rv0, idxs::int[n], arg1) = begin + rv = rv0 + for i in 1:n + idx = idxs[i] + rv[idx] += arg1[idx] + end + rv + end + increment_at(f, rv0, idxs::int[n], arg1, arg2) = begin + rv = rv0 + for i in 1:n + idx = idxs[i] + rv[idx] += f(arg1[idx], arg2[idx]) + end + rv + end + vote_count(rating, item, rater, I, J) = increment_at( + rep_array(0, J + 1), increment_at(rep_array(1, I), item, rating), rep_array(1, I) + ) + rater_count(rating, rater, J) = increment_at(rep_array(0, J), rater, rating) + lte_sim_rng(x, y) = if x == y + bernoulli_rng(.5) + else + x < y + end + lte_sim_rng(x::anything[_], y) = jbroadcasted(lte_sim_rng, x, y) + pos_probs(lambda, delta, alpha_sens, beta_, item, rater) = ( + lambda[item] + (1 - lambda[item]) * inv_logit(delta[item] * (alpha_sens[rater] - beta_[item])) + ) + neg_probs(lambda, delta, alpha_spec, beta_, item, rater) = ( + (1 - lambda[item]) * inv_logit(-delta[item] * (alpha_spec[rater] - beta_[item])) + ) + no_good_name_lpmfs(rating, I, item, rater, pi_, alpha_spec, alpha_sens, beta_, delta, lambda) = jbroadcasted( + log_sum_exp, + increment_at(bernoulli_lpmf, rep_vector(log(pi_), I), item, rating, pos_probs(lambda, delta, alpha_sens, beta_, item, rater)), + increment_at(bernoulli_lpmf, rep_vector(log1m(pi_), I), item, rating, neg_probs(lambda, delta, alpha_spec, beta_, item, rater)) + ) + no_good_name_lpmf(args...) = sum(no_good_name_lpmfs(args...)) + no_good_name_rng(I, item, rater, pi_, alpha_spec, alpha_sens, beta_, delta, lambda) = begin + z_sim = to_vector(bernoulli_rng(rep_vector(pi_, I))) + bernoulli_rng( + z_sim[item] .* pos_probs(lambda, delta, alpha_sens, beta_, item, rater) + + (1 - z_sim[item]) .* neg_probs(lambda, delta, alpha_spec, beta_, item, rater) + ) + end + StanBlocks.stan.log_sum_exp(::real, ::real)::real +end + +mock_data = (;I=1,J=1,item=[1],rater=[1],rating=[1]) + +full = @slic mock_data begin + votes_data = vote_count(rating, item, rater, I, J) + rater_data = rater_count(rating, rater, J) + pi_ ~ beta(2, 2) + alpha_spec ~ normal(2, 2; n=J) + alpha_sens ~ normal(1, 2; n=J, lower=-alpha_spec) + beta_ ~ normal(0, 1; n=I) + delta ~ lognormal(0, 0.25; n=I) + lambda ~ beta(2, 2; n=I) + rating ~ no_good_name(I, item, rater, pi_, alpha_spec, alpha_sens, beta_, delta, lambda) + + rating_sim = no_good_name_rng(I, item, rater, pi_, alpha_spec, alpha_sens, beta_, delta, lambda) + votes_sim = vote_count(rating_sim, item, rater, I, J) + votes_sim_lt_data = lte_sim_rng(votes_sim, votes_data) + rater_sim = rater_count(rating_sim, rater, J) + rater_sim_lt_data = lte_sim_rng(rater_sim, rater_data) +end + +a_transform = quote + lambda = rep_vector(0, I) +end +b_transform = quote + delta = rep_vector(1, I) +end +c_transform = quote + beta_ = rep_vector(0, I) +end +d_transform = quote + alpha_acc ~ normal(1, 2; n=J, lower=0) + alpha_sens = alpha_acc + alpha_spec = alpha_acc +end +de_transform = quote + alpha_acc_scalar ~ normal(1, 2; lower=0) + alpha_sens = rep_vector(alpha_acc_scalar, J) + alpha_spec = rep_vector(alpha_acc_scalar, J) +end +e_transform = quote + alpha_spec_scalar ~ normal(2, 2) + alpha_sens_scalar ~ normal(1, 2; lower=-alpha_spec_scalar) + alpha_spec = rep_vector(alpha_spec_scalar, J) + alpha_sens = rep_vector(alpha_sens_scalar, J) +end + +a = full(a_transform) +ab = a(b_transform) +abc = ab(c_transform) +abcd = abc(d_transform) +abcde = abc(de_transform) +abce = abc(e_transform) +abd = ab(d_transform) +abde = ab(de_transform) +ac = a(c_transform) +acd = ac(d_transform) +ad = a(d_transform) +b = full(b_transform) +bc = b(c_transform) +bcd = bc(d_transform) +bd = b(d_transform) +c = full(c_transform) +cd_ = c(d_transform) +d = full(d_transform) + +posteriors = (; + full, + a, ab, abc, abcd, abcde, abce, abd, abde, ac, acd, ad, + b, bc, bcd, bd, + c, cd=cd_, + d, +) + + map(posteriors) do posterior + QuartoComponents.Code("stan", stan_code(posterior)) +end |> QuartoComponents.Tabset +``` \ No newline at end of file diff --git a/docs/slic/golf.qmd b/docs/slic/golf.qmd index 8cdb0aa..5f3d11c 100644 --- a/docs/slic/golf.qmd +++ b/docs/slic/golf.qmd @@ -1,3 +1,15 @@ +--- +title: "Golf models reimplementation" +--- + +TO DO: elaborate + +## StanBlocks.jl implementation + +TO DO: elaborate + +## Generated Stan models + ```{julia} using StanBlocks, QuartoComponents @@ -56,7 +68,7 @@ golf_angle_distance_4 = golf_angle_distance_2(quote distance_tolerance ~ normal(3, 5; lower=0) end) -map(stan_code, (; +map(x->QuartoComponents.Code("stan", stan_code(x)), (; golf_logistic=golf_logistic(;dataset1...), golf_angle=golf_angle(;r, R, dataset1...), golf_angle_distance_2=golf_angle_distance_2(;r, R, overshot, distance_tolerance, dataset12...), diff --git a/docs/slic/isba-2024.qmd b/docs/slic/isba-2024.qmd index aa413b2..ca51b99 100644 --- a/docs/slic/isba-2024.qmd +++ b/docs/slic/isba-2024.qmd @@ -1,8 +1,5 @@ --- title: "Reimplementing the Stan models from https://github.com/bob-carpenter/pcr-sensitivity-vs-time" -format: - html: - code-overflow: scroll --- The full Julia code for this notebook can be accessed via the top right corner (` Code`). @@ -41,8 +38,8 @@ In the below models, `my_bernoulli_lpmfs` will be called with the following sign ```julia my_bernoulli_lpmfs(y::int[n], f::typeof(logit), theta::vector[n]) my_bernoulli_lpmfs(y::int[n], f::typeof(log), theta::vector[n]) -my_bernoulli_lpmfs(y::int, f::typeof(logit), theta::vector) -my_bernoulli_lpmfs(y::int, f::typeof(log), theta::vector) +my_bernoulli_lpmfs(y::int, f::typeof(logit), theta::real) +my_bernoulli_lpmfs(y::int, f::typeof(log), theta::real) ``` all of which will be covered by the variadic function definitions at the beginning of this section. @@ -69,6 +66,7 @@ my_bernoulli_lpmf(y, ::typeof(logit), theta) = bernoulli_logit_lpmf(y, theta) my_bernoulli_lpmf(y, ::typeof(log), theta) = bernoulli_lpmf(y, exp(theta)) ``` which gets used to make the likelihood implementation depend on the link function of the model, allowing us + * to forward `y` and `theta` to `bernoulli_logit_lpmf(y, theta)`, or * to forward `y` and `theta` to `bernoulli_lpmf(y, exp(theta))`. @@ -85,6 +83,8 @@ The following reproduces all of the code necessary to implement the 2x5 model ma ```julia using StanBlocks +import StanBlocks.stan: logit + @deffun begin "Needed for cross validation" my_bernoulli_lpmfs(y::int[n], args...) = jbroadcasted(my_bernoulli_lpmfs, y, args...) @@ -185,12 +185,13 @@ bases = (; theta ~ regression(;link_f, t) end), regression_mix=base_model(quote - theta ~ regression(;link_f, t) + theta ~ regression_mix(;link_f, t) y ~ bernoulli(theta) end) ) link_fs = (;logit, log) +# `posteriors` will be a (nested) named tuple - accessing e.g. the `Hetero (log)` model works via `posteriors.hetero.log` posteriors = map(bases) do base map(link_fs) do link_f base(;link_f) @@ -205,9 +206,19 @@ The top level (with keys `hetero`, `rw1`, `rw2`, `regression`, and `regression_m combines with the link function in the second level (with keys `logit` and `log`) to give you access to the corresponding Stan models from the poster. +::: {.callout-warning} + +Due to [this issue](https://github.com/nsiccha/StanBlocks.jl/issues/38), the below Stan codes should not actually compile as they are. + +I think removing the offending UDF definitions should make compilation work. + +::: + ```{julia} using StanBlocks, QuartoComponents +import StanBlocks.stan: logit + @deffun begin "Needed for cross validation" my_bernoulli_lpmfs(y::int[n], args...) = jbroadcasted(my_bernoulli_lpmfs, y, args...) @@ -308,12 +319,13 @@ bases = (; theta ~ regression(;link_f, t) end), regression_mix=base_model(quote - theta ~ regression(;link_f, t) + theta ~ regression_mix(;link_f, t) y ~ bernoulli(theta) end) ) -link_fs = (;logit, log) +link_fs = (;logit, log) +# `posteriors` will be a (nested) named tuple - accessing e.g. the `Hetero (log)` model works via `posteriors.hetero.log` posteriors = map(bases) do base map(link_fs) do link_f base(;link_f)